1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1030 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
,
1043 /* | m_GENERIC | m_ATHLON_K8 ? */
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies */
1047 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1048 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386
| m_486
| m_K6_GEODE
,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1089 | m_GENERIC
/* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT
| m_486
| m_386
),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386
| m_PENT4
| m_NOCONA
,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1117 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1124 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1129 | m_GENERIC
| m_GEODE
),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10
,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO
| m_PENT4
| m_NOCONA
,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10
,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10
,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
1198 /* Feature tests against the various architecture variations. */
1199 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1200 /* X86_ARCH_CMOVE */
1201 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1203 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1206 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1209 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1212 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1216 static const unsigned int x86_accumulate_outgoing_args
1217 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1219 static const unsigned int x86_arch_always_fancy_math_387
1220 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1221 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1223 static enum stringop_alg stringop_alg
= no_stringop
;
1225 /* In case the average insn count for single function invocation is
1226 lower than this constant, emit fast (but longer) prologue and
1228 #define FAST_PROLOGUE_INSN_COUNT 20
1230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1231 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1232 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1233 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1235 /* Array of the smallest class containing reg number REGNO, indexed by
1236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1238 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1240 /* ax, dx, cx, bx */
1241 AREG
, DREG
, CREG
, BREG
,
1242 /* si, di, bp, sp */
1243 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1245 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1246 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1249 /* flags, fpsr, fpcr, frame */
1250 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1251 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1253 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1255 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1256 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1257 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1261 /* The "default" register map used in 32bit mode. */
1263 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1274 static int const x86_64_int_parameter_registers
[6] =
1276 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1277 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1280 static int const x86_64_int_return_registers
[4] =
1282 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1285 /* The "default" register map used in 64bit mode. */
1286 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1288 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1289 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1290 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1291 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1292 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1293 8,9,10,11,12,13,14,15, /* extended integer registers */
1294 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1297 /* Define the register numbers to be used in Dwarf debugging information.
1298 The SVR4 reference port C compiler uses the following register numbers
1299 in its Dwarf output code:
1300 0 for %eax (gcc regno = 0)
1301 1 for %ecx (gcc regno = 2)
1302 2 for %edx (gcc regno = 1)
1303 3 for %ebx (gcc regno = 3)
1304 4 for %esp (gcc regno = 7)
1305 5 for %ebp (gcc regno = 6)
1306 6 for %esi (gcc regno = 4)
1307 7 for %edi (gcc regno = 5)
1308 The following three DWARF register numbers are never generated by
1309 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1310 believes these numbers have these meanings.
1311 8 for %eip (no gcc equivalent)
1312 9 for %eflags (gcc regno = 17)
1313 10 for %trapno (no gcc equivalent)
1314 It is not at all clear how we should number the FP stack registers
1315 for the x86 architecture. If the version of SDB on x86/svr4 were
1316 a bit less brain dead with respect to floating-point then we would
1317 have a precedent to follow with respect to DWARF register numbers
1318 for x86 FP registers, but the SDB on x86/svr4 is so completely
1319 broken with respect to FP registers that it is hardly worth thinking
1320 of it as something to strive for compatibility with.
1321 The version of x86/svr4 SDB I have at the moment does (partially)
1322 seem to believe that DWARF register number 11 is associated with
1323 the x86 register %st(0), but that's about all. Higher DWARF
1324 register numbers don't seem to be associated with anything in
1325 particular, and even for DWARF regno 11, SDB only seems to under-
1326 stand that it should say that a variable lives in %st(0) (when
1327 asked via an `=' command) if we said it was in DWARF regno 11,
1328 but SDB still prints garbage when asked for the value of the
1329 variable in question (via a `/' command).
1330 (Also note that the labels SDB prints for various FP stack regs
1331 when doing an `x' command are all wrong.)
1332 Note that these problems generally don't affect the native SVR4
1333 C compiler because it doesn't allow the use of -O with -g and
1334 because when it is *not* optimizing, it allocates a memory
1335 location for each floating-point variable, and the memory
1336 location is what gets described in the DWARF AT_location
1337 attribute for the variable in question.
1338 Regardless of the severe mental illness of the x86/svr4 SDB, we
1339 do something sensible here and we use the following DWARF
1340 register numbers. Note that these are all stack-top-relative
1342 11 for %st(0) (gcc regno = 8)
1343 12 for %st(1) (gcc regno = 9)
1344 13 for %st(2) (gcc regno = 10)
1345 14 for %st(3) (gcc regno = 11)
1346 15 for %st(4) (gcc regno = 12)
1347 16 for %st(5) (gcc regno = 13)
1348 17 for %st(6) (gcc regno = 14)
1349 18 for %st(7) (gcc regno = 15)
1351 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1353 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1354 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1355 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1362 /* Test and compare insns in i386.md store the information needed to
1363 generate branch and scc insns here. */
1365 rtx ix86_compare_op0
= NULL_RTX
;
1366 rtx ix86_compare_op1
= NULL_RTX
;
1367 rtx ix86_compare_emitted
= NULL_RTX
;
1369 /* Size of the register save area. */
1370 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1372 /* Define the structure for the machine field in struct function. */
1374 struct stack_local_entry
GTY(())
1376 unsigned short mode
;
1379 struct stack_local_entry
*next
;
1382 /* Structure describing stack frame layout.
1383 Stack grows downward:
1389 saved frame pointer if frame_pointer_needed
1390 <- HARD_FRAME_POINTER
1395 [va_arg registers] (
1396 > to_allocate <- FRAME_POINTER
1406 HOST_WIDE_INT frame
;
1408 int outgoing_arguments_size
;
1411 HOST_WIDE_INT to_allocate
;
1412 /* The offsets relative to ARG_POINTER. */
1413 HOST_WIDE_INT frame_pointer_offset
;
1414 HOST_WIDE_INT hard_frame_pointer_offset
;
1415 HOST_WIDE_INT stack_pointer_offset
;
1417 /* When save_regs_using_mov is set, emit prologue using
1418 move instead of push instructions. */
1419 bool save_regs_using_mov
;
1422 /* Code model option. */
1423 enum cmodel ix86_cmodel
;
1425 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1427 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1429 /* Which unit we are generating floating point math for. */
1430 enum fpmath_unit ix86_fpmath
;
1432 /* Which cpu are we scheduling for. */
1433 enum processor_type ix86_tune
;
1435 /* Which instruction set architecture to use. */
1436 enum processor_type ix86_arch
;
1438 /* true if sse prefetch instruction is not NOOP. */
1439 int x86_prefetch_sse
;
1441 /* true if cmpxchg16b is supported. */
1444 /* ix86_regparm_string as a number */
1445 static int ix86_regparm
;
1447 /* -mstackrealign option */
1448 extern int ix86_force_align_arg_pointer
;
1449 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1451 /* Preferred alignment for stack boundary in bits. */
1452 unsigned int ix86_preferred_stack_boundary
;
1454 /* Values 1-5: see jump.c */
1455 int ix86_branch_cost
;
1457 /* Variables which are this size or smaller are put in the data/bss
1458 or ldata/lbss sections. */
1460 int ix86_section_threshold
= 65536;
1462 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1463 char internal_label_prefix
[16];
1464 int internal_label_prefix_len
;
1466 static bool ix86_handle_option (size_t, const char *, int);
1467 static void output_pic_addr_const (FILE *, rtx
, int);
1468 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1470 static const char *get_some_local_dynamic_name (void);
1471 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1472 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1473 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1475 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1476 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1478 static rtx
get_thread_pointer (int);
1479 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1480 static void get_pc_thunk_name (char [32], unsigned int);
1481 static rtx
gen_push (rtx
);
1482 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1483 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1484 static struct machine_function
* ix86_init_machine_status (void);
1485 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1486 static int ix86_nsaved_regs (void);
1487 static void ix86_emit_save_regs (void);
1488 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1489 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1490 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1491 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1492 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1493 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1494 static int ix86_issue_rate (void);
1495 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1496 static int ia32_multipass_dfa_lookahead (void);
1497 static void ix86_init_mmx_sse_builtins (void);
1498 static rtx
x86_this_parameter (tree
);
1499 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1500 HOST_WIDE_INT
, tree
);
1501 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1502 static void x86_file_start (void);
1503 static void ix86_reorg (void);
1504 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1505 static tree
ix86_build_builtin_va_list (void);
1506 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1508 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1509 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1510 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1512 static int ix86_address_cost (rtx
);
1513 static bool ix86_cannot_force_const_mem (rtx
);
1514 static rtx
ix86_delegitimize_address (rtx
);
1516 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1518 struct builtin_description
;
1519 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1521 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1523 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1524 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1525 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1526 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1527 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1528 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1529 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1530 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1531 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1532 static int ix86_fp_comparison_cost (enum rtx_code code
);
1533 static unsigned int ix86_select_alt_pic_regnum (void);
1534 static int ix86_save_reg (unsigned int, int);
1535 static void ix86_compute_frame_layout (struct ix86_frame
*);
1536 static int ix86_comp_type_attributes (tree
, tree
);
1537 static int ix86_function_regparm (tree
, tree
);
1538 const struct attribute_spec ix86_attribute_table
[];
1539 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1540 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1541 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1542 static bool contains_128bit_aligned_vector_p (tree
);
1543 static rtx
ix86_struct_value_rtx (tree
, int);
1544 static bool ix86_ms_bitfield_layout_p (tree
);
1545 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1546 static int extended_reg_mentioned_1 (rtx
*, void *);
1547 static bool ix86_rtx_costs (rtx
, int, int, int *);
1548 static int min_insn_size (rtx
);
1549 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1550 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1551 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1553 static void ix86_init_builtins (void);
1554 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1555 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1556 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1557 static const char *ix86_mangle_fundamental_type (tree
);
1558 static tree
ix86_stack_protect_fail (void);
1559 static rtx
ix86_internal_arg_pointer (void);
1560 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1561 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1564 /* This function is only used on Solaris. */
1565 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1568 /* Register class used for passing given 64bit part of the argument.
1569 These represent classes as documented by the PS ABI, with the exception
1570 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1571 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1573 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1574 whenever possible (upper half does contain padding).
1576 enum x86_64_reg_class
1579 X86_64_INTEGER_CLASS
,
1580 X86_64_INTEGERSI_CLASS
,
1587 X86_64_COMPLEX_X87_CLASS
,
1590 static const char * const x86_64_reg_class_name
[] = {
1591 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1592 "sseup", "x87", "x87up", "cplx87", "no"
1595 #define MAX_CLASSES 4
1597 /* Table of constants used by fldpi, fldln2, etc.... */
1598 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1599 static bool ext_80387_constants_init
= 0;
1600 static void init_ext_80387_constants (void);
1601 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1602 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1603 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1604 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1605 unsigned HOST_WIDE_INT align
)
1608 /* Initialize the GCC target structure. */
1609 #undef TARGET_ATTRIBUTE_TABLE
1610 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1611 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1612 # undef TARGET_MERGE_DECL_ATTRIBUTES
1613 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1616 #undef TARGET_COMP_TYPE_ATTRIBUTES
1617 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1619 #undef TARGET_INIT_BUILTINS
1620 #define TARGET_INIT_BUILTINS ix86_init_builtins
1621 #undef TARGET_EXPAND_BUILTIN
1622 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1626 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1627 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1629 #undef TARGET_ASM_FUNCTION_EPILOGUE
1630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1632 #undef TARGET_ENCODE_SECTION_INFO
1633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1639 #undef TARGET_ASM_OPEN_PAREN
1640 #define TARGET_ASM_OPEN_PAREN ""
1641 #undef TARGET_ASM_CLOSE_PAREN
1642 #define TARGET_ASM_CLOSE_PAREN ""
1644 #undef TARGET_ASM_ALIGNED_HI_OP
1645 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1646 #undef TARGET_ASM_ALIGNED_SI_OP
1647 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1649 #undef TARGET_ASM_ALIGNED_DI_OP
1650 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1653 #undef TARGET_ASM_UNALIGNED_HI_OP
1654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1655 #undef TARGET_ASM_UNALIGNED_SI_OP
1656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1657 #undef TARGET_ASM_UNALIGNED_DI_OP
1658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1660 #undef TARGET_SCHED_ADJUST_COST
1661 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1662 #undef TARGET_SCHED_ISSUE_RATE
1663 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1666 ia32_multipass_dfa_lookahead
1668 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1669 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1672 #undef TARGET_HAVE_TLS
1673 #define TARGET_HAVE_TLS true
1675 #undef TARGET_CANNOT_FORCE_CONST_MEM
1676 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1677 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1678 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1680 #undef TARGET_DELEGITIMIZE_ADDRESS
1681 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1683 #undef TARGET_MS_BITFIELD_LAYOUT_P
1684 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1687 #undef TARGET_BINDS_LOCAL_P
1688 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1691 #undef TARGET_ASM_OUTPUT_MI_THUNK
1692 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1693 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1694 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1696 #undef TARGET_ASM_FILE_START
1697 #define TARGET_ASM_FILE_START x86_file_start
1699 #undef TARGET_DEFAULT_TARGET_FLAGS
1700 #define TARGET_DEFAULT_TARGET_FLAGS \
1702 | TARGET_64BIT_DEFAULT \
1703 | TARGET_SUBTARGET_DEFAULT \
1704 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1706 #undef TARGET_HANDLE_OPTION
1707 #define TARGET_HANDLE_OPTION ix86_handle_option
1709 #undef TARGET_RTX_COSTS
1710 #define TARGET_RTX_COSTS ix86_rtx_costs
1711 #undef TARGET_ADDRESS_COST
1712 #define TARGET_ADDRESS_COST ix86_address_cost
1714 #undef TARGET_FIXED_CONDITION_CODE_REGS
1715 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1716 #undef TARGET_CC_MODES_COMPATIBLE
1717 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1719 #undef TARGET_MACHINE_DEPENDENT_REORG
1720 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1722 #undef TARGET_BUILD_BUILTIN_VA_LIST
1723 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1725 #undef TARGET_MD_ASM_CLOBBERS
1726 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1728 #undef TARGET_PROMOTE_PROTOTYPES
1729 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1730 #undef TARGET_STRUCT_VALUE_RTX
1731 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1732 #undef TARGET_SETUP_INCOMING_VARARGS
1733 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1734 #undef TARGET_MUST_PASS_IN_STACK
1735 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1736 #undef TARGET_PASS_BY_REFERENCE
1737 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1738 #undef TARGET_INTERNAL_ARG_POINTER
1739 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1740 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1741 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1743 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1744 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1746 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1747 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1749 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1750 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1753 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1754 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1757 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1758 #undef TARGET_INSERT_ATTRIBUTES
1759 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1762 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1763 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1765 #undef TARGET_STACK_PROTECT_FAIL
1766 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1768 #undef TARGET_FUNCTION_VALUE
1769 #define TARGET_FUNCTION_VALUE ix86_function_value
1771 struct gcc_target targetm
= TARGET_INITIALIZER
;
1774 /* The svr4 ABI for the i386 says that records and unions are returned
1776 #ifndef DEFAULT_PCC_STRUCT_RETURN
1777 #define DEFAULT_PCC_STRUCT_RETURN 1
1780 /* Implement TARGET_HANDLE_OPTION. */
1783 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1790 target_flags
&= ~MASK_3DNOW_A
;
1791 target_flags_explicit
|= MASK_3DNOW_A
;
1798 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1799 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1806 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1807 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1814 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1815 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1822 target_flags
&= ~MASK_SSE4A
;
1823 target_flags_explicit
|= MASK_SSE4A
;
1832 /* Sometimes certain combinations of command options do not make
1833 sense on a particular target machine. You can define a macro
1834 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1835 defined, is executed once just after all the command options have
1838 Don't use this macro to turn on various extra optimizations for
1839 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1842 override_options (void)
1845 int ix86_tune_defaulted
= 0;
1846 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1848 /* Comes from final.c -- no real reason to change it. */
1849 #define MAX_CODE_ALIGN 16
1853 const struct processor_costs
*cost
; /* Processor costs */
1854 const int target_enable
; /* Target flags to enable. */
1855 const int target_disable
; /* Target flags to disable. */
1856 const int align_loop
; /* Default alignments. */
1857 const int align_loop_max_skip
;
1858 const int align_jump
;
1859 const int align_jump_max_skip
;
1860 const int align_func
;
1862 const processor_target_table
[PROCESSOR_max
] =
1864 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1865 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1866 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1867 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1868 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1869 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1870 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1871 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1872 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1873 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1874 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1875 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1876 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1877 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1880 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1883 const char *const name
; /* processor name or nickname. */
1884 const enum processor_type processor
;
1885 const enum pta_flags
1891 PTA_PREFETCH_SSE
= 16,
1902 const processor_alias_table
[] =
1904 {"i386", PROCESSOR_I386
, 0},
1905 {"i486", PROCESSOR_I486
, 0},
1906 {"i586", PROCESSOR_PENTIUM
, 0},
1907 {"pentium", PROCESSOR_PENTIUM
, 0},
1908 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1909 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1910 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1911 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1912 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1913 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1914 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1915 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1916 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1917 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1918 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1919 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1920 | PTA_MMX
| PTA_PREFETCH_SSE
},
1921 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1922 | PTA_MMX
| PTA_PREFETCH_SSE
},
1923 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1924 | PTA_MMX
| PTA_PREFETCH_SSE
},
1925 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1926 | PTA_MMX
| PTA_PREFETCH_SSE
| PTA_CX16
},
1927 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1928 | PTA_64BIT
| PTA_MMX
1929 | PTA_PREFETCH_SSE
| PTA_CX16
},
1930 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1932 {"k6", PROCESSOR_K6
, PTA_MMX
},
1933 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1934 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1935 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1937 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1938 | PTA_3DNOW
| PTA_3DNOW_A
},
1939 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1940 | PTA_3DNOW_A
| PTA_SSE
},
1941 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1942 | PTA_3DNOW_A
| PTA_SSE
},
1943 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1944 | PTA_3DNOW_A
| PTA_SSE
},
1945 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1946 | PTA_SSE
| PTA_SSE2
},
1947 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1948 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1949 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1950 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1951 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1952 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1953 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1954 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1955 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1956 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1957 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1958 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1959 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1960 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1963 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1965 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1966 SUBTARGET_OVERRIDE_OPTIONS
;
1969 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1970 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1973 /* -fPIC is the default for x86_64. */
1974 if (TARGET_MACHO
&& TARGET_64BIT
)
1977 /* Set the default values for switches whose default depends on TARGET_64BIT
1978 in case they weren't overwritten by command line options. */
1981 /* Mach-O doesn't support omitting the frame pointer for now. */
1982 if (flag_omit_frame_pointer
== 2)
1983 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1984 if (flag_asynchronous_unwind_tables
== 2)
1985 flag_asynchronous_unwind_tables
= 1;
1986 if (flag_pcc_struct_return
== 2)
1987 flag_pcc_struct_return
= 0;
1991 if (flag_omit_frame_pointer
== 2)
1992 flag_omit_frame_pointer
= 0;
1993 if (flag_asynchronous_unwind_tables
== 2)
1994 flag_asynchronous_unwind_tables
= 0;
1995 if (flag_pcc_struct_return
== 2)
1996 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1999 /* Need to check -mtune=generic first. */
2000 if (ix86_tune_string
)
2002 if (!strcmp (ix86_tune_string
, "generic")
2003 || !strcmp (ix86_tune_string
, "i686")
2004 /* As special support for cross compilers we read -mtune=native
2005 as -mtune=generic. With native compilers we won't see the
2006 -mtune=native, as it was changed by the driver. */
2007 || !strcmp (ix86_tune_string
, "native"))
2010 ix86_tune_string
= "generic64";
2012 ix86_tune_string
= "generic32";
2014 else if (!strncmp (ix86_tune_string
, "generic", 7))
2015 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2019 if (ix86_arch_string
)
2020 ix86_tune_string
= ix86_arch_string
;
2021 if (!ix86_tune_string
)
2023 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2024 ix86_tune_defaulted
= 1;
2027 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2028 need to use a sensible tune option. */
2029 if (!strcmp (ix86_tune_string
, "generic")
2030 || !strcmp (ix86_tune_string
, "x86-64")
2031 || !strcmp (ix86_tune_string
, "i686"))
2034 ix86_tune_string
= "generic64";
2036 ix86_tune_string
= "generic32";
2039 if (ix86_stringop_string
)
2041 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2042 stringop_alg
= rep_prefix_1_byte
;
2043 else if (!strcmp (ix86_stringop_string
, "libcall"))
2044 stringop_alg
= libcall
;
2045 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2046 stringop_alg
= rep_prefix_4_byte
;
2047 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2048 stringop_alg
= rep_prefix_8_byte
;
2049 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2050 stringop_alg
= loop_1_byte
;
2051 else if (!strcmp (ix86_stringop_string
, "loop"))
2052 stringop_alg
= loop
;
2053 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2054 stringop_alg
= unrolled_loop
;
2056 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2058 if (!strcmp (ix86_tune_string
, "x86-64"))
2059 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2060 "-mtune=generic instead as appropriate.");
2062 if (!ix86_arch_string
)
2063 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2064 if (!strcmp (ix86_arch_string
, "generic"))
2065 error ("generic CPU can be used only for -mtune= switch");
2066 if (!strncmp (ix86_arch_string
, "generic", 7))
2067 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2069 if (ix86_cmodel_string
!= 0)
2071 if (!strcmp (ix86_cmodel_string
, "small"))
2072 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2073 else if (!strcmp (ix86_cmodel_string
, "medium"))
2074 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2075 else if (!strcmp (ix86_cmodel_string
, "large"))
2076 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2078 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2079 else if (!strcmp (ix86_cmodel_string
, "32"))
2080 ix86_cmodel
= CM_32
;
2081 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2082 ix86_cmodel
= CM_KERNEL
;
2084 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2088 ix86_cmodel
= CM_32
;
2090 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2092 if (ix86_asm_string
!= 0)
2095 && !strcmp (ix86_asm_string
, "intel"))
2096 ix86_asm_dialect
= ASM_INTEL
;
2097 else if (!strcmp (ix86_asm_string
, "att"))
2098 ix86_asm_dialect
= ASM_ATT
;
2100 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2102 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2103 error ("code model %qs not supported in the %s bit mode",
2104 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2105 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2106 sorry ("%i-bit mode not compiled in",
2107 (target_flags
& MASK_64BIT
) ? 64 : 32);
2109 for (i
= 0; i
< pta_size
; i
++)
2110 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2112 ix86_arch
= processor_alias_table
[i
].processor
;
2113 /* Default cpu tuning to the architecture. */
2114 ix86_tune
= ix86_arch
;
2115 if (processor_alias_table
[i
].flags
& PTA_MMX
2116 && !(target_flags_explicit
& MASK_MMX
))
2117 target_flags
|= MASK_MMX
;
2118 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2119 && !(target_flags_explicit
& MASK_3DNOW
))
2120 target_flags
|= MASK_3DNOW
;
2121 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2122 && !(target_flags_explicit
& MASK_3DNOW_A
))
2123 target_flags
|= MASK_3DNOW_A
;
2124 if (processor_alias_table
[i
].flags
& PTA_SSE
2125 && !(target_flags_explicit
& MASK_SSE
))
2126 target_flags
|= MASK_SSE
;
2127 if (processor_alias_table
[i
].flags
& PTA_SSE2
2128 && !(target_flags_explicit
& MASK_SSE2
))
2129 target_flags
|= MASK_SSE2
;
2130 if (processor_alias_table
[i
].flags
& PTA_SSE3
2131 && !(target_flags_explicit
& MASK_SSE3
))
2132 target_flags
|= MASK_SSE3
;
2133 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2134 && !(target_flags_explicit
& MASK_SSSE3
))
2135 target_flags
|= MASK_SSSE3
;
2136 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2137 x86_prefetch_sse
= true;
2138 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2139 x86_cmpxchg16b
= true;
2140 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2141 && !(target_flags_explicit
& MASK_POPCNT
))
2142 target_flags
|= MASK_POPCNT
;
2143 if (processor_alias_table
[i
].flags
& PTA_ABM
2144 && !(target_flags_explicit
& MASK_ABM
))
2145 target_flags
|= MASK_ABM
;
2146 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2147 && !(target_flags_explicit
& MASK_SSE4A
))
2148 target_flags
|= MASK_SSE4A
;
2149 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2150 error ("CPU you selected does not support x86-64 "
2156 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2158 ix86_arch_mask
= 1u << ix86_arch
;
2159 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2160 ix86_arch_features
[i
] &= ix86_arch_mask
;
2162 for (i
= 0; i
< pta_size
; i
++)
2163 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2165 ix86_tune
= processor_alias_table
[i
].processor
;
2166 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2168 if (ix86_tune_defaulted
)
2170 ix86_tune_string
= "x86-64";
2171 for (i
= 0; i
< pta_size
; i
++)
2172 if (! strcmp (ix86_tune_string
,
2173 processor_alias_table
[i
].name
))
2175 ix86_tune
= processor_alias_table
[i
].processor
;
2178 error ("CPU you selected does not support x86-64 "
2181 /* Intel CPUs have always interpreted SSE prefetch instructions as
2182 NOPs; so, we can enable SSE prefetch instructions even when
2183 -mtune (rather than -march) points us to a processor that has them.
2184 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2185 higher processors. */
2186 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2187 x86_prefetch_sse
= true;
2191 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2193 ix86_tune_mask
= 1u << ix86_tune
;
2194 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2195 ix86_tune_features
[i
] &= ix86_tune_mask
;
2198 ix86_cost
= &size_cost
;
2200 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2201 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2202 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2204 /* Arrange to set up i386_stack_locals for all functions. */
2205 init_machine_status
= ix86_init_machine_status
;
2207 /* Validate -mregparm= value. */
2208 if (ix86_regparm_string
)
2210 i
= atoi (ix86_regparm_string
);
2211 if (i
< 0 || i
> REGPARM_MAX
)
2212 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2218 ix86_regparm
= REGPARM_MAX
;
2220 /* If the user has provided any of the -malign-* options,
2221 warn and use that value only if -falign-* is not set.
2222 Remove this code in GCC 3.2 or later. */
2223 if (ix86_align_loops_string
)
2225 warning (0, "-malign-loops is obsolete, use -falign-loops");
2226 if (align_loops
== 0)
2228 i
= atoi (ix86_align_loops_string
);
2229 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2230 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2232 align_loops
= 1 << i
;
2236 if (ix86_align_jumps_string
)
2238 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2239 if (align_jumps
== 0)
2241 i
= atoi (ix86_align_jumps_string
);
2242 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2243 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2245 align_jumps
= 1 << i
;
2249 if (ix86_align_funcs_string
)
2251 warning (0, "-malign-functions is obsolete, use -falign-functions");
2252 if (align_functions
== 0)
2254 i
= atoi (ix86_align_funcs_string
);
2255 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2256 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2258 align_functions
= 1 << i
;
2262 /* Default align_* from the processor table. */
2263 if (align_loops
== 0)
2265 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2266 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2268 if (align_jumps
== 0)
2270 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2271 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2273 if (align_functions
== 0)
2275 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2278 /* Validate -mbranch-cost= value, or provide default. */
2279 ix86_branch_cost
= ix86_cost
->branch_cost
;
2280 if (ix86_branch_cost_string
)
2282 i
= atoi (ix86_branch_cost_string
);
2284 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2286 ix86_branch_cost
= i
;
2288 if (ix86_section_threshold_string
)
2290 i
= atoi (ix86_section_threshold_string
);
2292 error ("-mlarge-data-threshold=%d is negative", i
);
2294 ix86_section_threshold
= i
;
2297 if (ix86_tls_dialect_string
)
2299 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2300 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2301 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2302 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2303 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2304 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2306 error ("bad value (%s) for -mtls-dialect= switch",
2307 ix86_tls_dialect_string
);
2310 /* Keep nonleaf frame pointers. */
2311 if (flag_omit_frame_pointer
)
2312 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2313 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2314 flag_omit_frame_pointer
= 1;
2316 /* If we're doing fast math, we don't care about comparison order
2317 wrt NaNs. This lets us use a shorter comparison sequence. */
2318 if (flag_finite_math_only
)
2319 target_flags
&= ~MASK_IEEE_FP
;
2321 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2322 since the insns won't need emulation. */
2323 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2324 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2326 /* Likewise, if the target doesn't have a 387, or we've specified
2327 software floating point, don't use 387 inline intrinsics. */
2329 target_flags
|= MASK_NO_FANCY_MATH_387
;
2331 /* Turn on SSE3 builtins for -mssse3. */
2333 target_flags
|= MASK_SSE3
;
2335 /* Turn on SSE3 builtins for -msse4a. */
2337 target_flags
|= MASK_SSE3
;
2339 /* Turn on SSE2 builtins for -msse3. */
2341 target_flags
|= MASK_SSE2
;
2343 /* Turn on SSE builtins for -msse2. */
2345 target_flags
|= MASK_SSE
;
2347 /* Turn on MMX builtins for -msse. */
2350 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2351 x86_prefetch_sse
= true;
2354 /* Turn on MMX builtins for 3Dnow. */
2356 target_flags
|= MASK_MMX
;
2358 /* Turn on POPCNT builtins for -mabm. */
2360 target_flags
|= MASK_POPCNT
;
2364 if (TARGET_ALIGN_DOUBLE
)
2365 error ("-malign-double makes no sense in the 64bit mode");
2367 error ("-mrtd calling convention not supported in the 64bit mode");
2369 /* Enable by default the SSE and MMX builtins. Do allow the user to
2370 explicitly disable any of these. In particular, disabling SSE and
2371 MMX for kernel code is extremely useful. */
2373 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2374 & ~target_flags_explicit
);
2378 /* i386 ABI does not specify red zone. It still makes sense to use it
2379 when programmer takes care to stack from being destroyed. */
2380 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2381 target_flags
|= MASK_NO_RED_ZONE
;
2384 /* Validate -mpreferred-stack-boundary= value, or provide default.
2385 The default of 128 bits is for Pentium III's SSE __m128. We can't
2386 change it because of optimize_size. Otherwise, we can't mix object
2387 files compiled with -Os and -On. */
2388 ix86_preferred_stack_boundary
= 128;
2389 if (ix86_preferred_stack_boundary_string
)
2391 i
= atoi (ix86_preferred_stack_boundary_string
);
2392 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2393 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2394 TARGET_64BIT
? 4 : 2);
2396 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2399 /* Accept -msseregparm only if at least SSE support is enabled. */
2400 if (TARGET_SSEREGPARM
2402 error ("-msseregparm used without SSE enabled");
2404 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2405 if (ix86_fpmath_string
!= 0)
2407 if (! strcmp (ix86_fpmath_string
, "387"))
2408 ix86_fpmath
= FPMATH_387
;
2409 else if (! strcmp (ix86_fpmath_string
, "sse"))
2413 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2414 ix86_fpmath
= FPMATH_387
;
2417 ix86_fpmath
= FPMATH_SSE
;
2419 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2420 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2424 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2425 ix86_fpmath
= FPMATH_387
;
2427 else if (!TARGET_80387
)
2429 warning (0, "387 instruction set disabled, using SSE arithmetics");
2430 ix86_fpmath
= FPMATH_SSE
;
2433 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2436 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2439 /* If the i387 is disabled, then do not return values in it. */
2441 target_flags
&= ~MASK_FLOAT_RETURNS
;
2443 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2444 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2446 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2448 /* ??? Unwind info is not correct around the CFG unless either a frame
2449 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2450 unwind info generation to be aware of the CFG and propagating states
2452 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2453 || flag_exceptions
|| flag_non_call_exceptions
)
2454 && flag_omit_frame_pointer
2455 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2457 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2458 warning (0, "unwind tables currently require either a frame pointer "
2459 "or -maccumulate-outgoing-args for correctness");
2460 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2463 /* For sane SSE instruction set generation we need fcomi instruction.
2464 It is safe to enable all CMOVE instructions. */
2468 /* ??? Any idea why this is unconditionally disabled for 64-bit? */
2470 TARGET_USE_SAHF
= 0;
2472 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2475 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2476 p
= strchr (internal_label_prefix
, 'X');
2477 internal_label_prefix_len
= p
- internal_label_prefix
;
2481 /* When scheduling description is not available, disable scheduler pass
2482 so it won't slow down the compilation and make x87 code slower. */
2483 if (!TARGET_SCHEDULE
)
2484 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2486 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2487 set_param_value ("simultaneous-prefetches",
2488 ix86_cost
->simultaneous_prefetches
);
2489 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2490 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2493 /* switch to the appropriate section for output of DECL.
2494 DECL is either a `VAR_DECL' node or a constant of some sort.
2495 RELOC indicates whether forming the initial value of DECL requires
2496 link-time relocations. */
2499 x86_64_elf_select_section (tree decl
, int reloc
,
2500 unsigned HOST_WIDE_INT align
)
2502 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2503 && ix86_in_large_data_p (decl
))
2505 const char *sname
= NULL
;
2506 unsigned int flags
= SECTION_WRITE
;
2507 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2512 case SECCAT_DATA_REL
:
2513 sname
= ".ldata.rel";
2515 case SECCAT_DATA_REL_LOCAL
:
2516 sname
= ".ldata.rel.local";
2518 case SECCAT_DATA_REL_RO
:
2519 sname
= ".ldata.rel.ro";
2521 case SECCAT_DATA_REL_RO_LOCAL
:
2522 sname
= ".ldata.rel.ro.local";
2526 flags
|= SECTION_BSS
;
2529 case SECCAT_RODATA_MERGE_STR
:
2530 case SECCAT_RODATA_MERGE_STR_INIT
:
2531 case SECCAT_RODATA_MERGE_CONST
:
2535 case SECCAT_SRODATA
:
2542 /* We don't split these for medium model. Place them into
2543 default sections and hope for best. */
2548 /* We might get called with string constants, but get_named_section
2549 doesn't like them as they are not DECLs. Also, we need to set
2550 flags in that case. */
2552 return get_section (sname
, flags
, NULL
);
2553 return get_named_section (decl
, sname
, reloc
);
2556 return default_elf_select_section (decl
, reloc
, align
);
2559 /* Build up a unique section name, expressed as a
2560 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2561 RELOC indicates whether the initial value of EXP requires
2562 link-time relocations. */
2565 x86_64_elf_unique_section (tree decl
, int reloc
)
2567 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2568 && ix86_in_large_data_p (decl
))
2570 const char *prefix
= NULL
;
2571 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2572 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2574 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2577 case SECCAT_DATA_REL
:
2578 case SECCAT_DATA_REL_LOCAL
:
2579 case SECCAT_DATA_REL_RO
:
2580 case SECCAT_DATA_REL_RO_LOCAL
:
2581 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2584 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2587 case SECCAT_RODATA_MERGE_STR
:
2588 case SECCAT_RODATA_MERGE_STR_INIT
:
2589 case SECCAT_RODATA_MERGE_CONST
:
2590 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2592 case SECCAT_SRODATA
:
2599 /* We don't split these for medium model. Place them into
2600 default sections and hope for best. */
2608 plen
= strlen (prefix
);
2610 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2611 name
= targetm
.strip_name_encoding (name
);
2612 nlen
= strlen (name
);
2614 string
= alloca (nlen
+ plen
+ 1);
2615 memcpy (string
, prefix
, plen
);
2616 memcpy (string
+ plen
, name
, nlen
+ 1);
2618 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2622 default_unique_section (decl
, reloc
);
2625 #ifdef COMMON_ASM_OP
2626 /* This says how to output assembler code to declare an
2627 uninitialized external linkage data object.
2629 For medium model x86-64 we need to use .largecomm opcode for
2632 x86_elf_aligned_common (FILE *file
,
2633 const char *name
, unsigned HOST_WIDE_INT size
,
2636 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2637 && size
> (unsigned int)ix86_section_threshold
)
2638 fprintf (file
, ".largecomm\t");
2640 fprintf (file
, "%s", COMMON_ASM_OP
);
2641 assemble_name (file
, name
);
2642 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2643 size
, align
/ BITS_PER_UNIT
);
2646 /* Utility function for targets to use in implementing
2647 ASM_OUTPUT_ALIGNED_BSS. */
2650 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2651 const char *name
, unsigned HOST_WIDE_INT size
,
2654 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2655 && size
> (unsigned int)ix86_section_threshold
)
2656 switch_to_section (get_named_section (decl
, ".lbss", 0));
2658 switch_to_section (bss_section
);
2659 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2660 #ifdef ASM_DECLARE_OBJECT_NAME
2661 last_assemble_variable_decl
= decl
;
2662 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2664 /* Standard thing is just output label for the object. */
2665 ASM_OUTPUT_LABEL (file
, name
);
2666 #endif /* ASM_DECLARE_OBJECT_NAME */
2667 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2671 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2673 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2674 make the problem with not enough registers even worse. */
2675 #ifdef INSN_SCHEDULING
2677 flag_schedule_insns
= 0;
2681 /* The Darwin libraries never set errno, so we might as well
2682 avoid calling them when that's the only reason we would. */
2683 flag_errno_math
= 0;
2685 /* The default values of these switches depend on the TARGET_64BIT
2686 that is not known at this moment. Mark these values with 2 and
2687 let user the to override these. In case there is no command line option
2688 specifying them, we will set the defaults in override_options. */
2690 flag_omit_frame_pointer
= 2;
2691 flag_pcc_struct_return
= 2;
2692 flag_asynchronous_unwind_tables
= 2;
2693 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2694 SUBTARGET_OPTIMIZATION_OPTIONS
;
2698 /* Table of valid machine attributes. */
2699 const struct attribute_spec ix86_attribute_table
[] =
2701 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2702 /* Stdcall attribute says callee is responsible for popping arguments
2703 if they are not variable. */
2704 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2705 /* Fastcall attribute says callee is responsible for popping arguments
2706 if they are not variable. */
2707 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2708 /* Cdecl attribute says the callee is a normal C declaration */
2709 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2710 /* Regparm attribute specifies how many integer arguments are to be
2711 passed in registers. */
2712 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2713 /* Sseregparm attribute says we are using x86_64 calling conventions
2714 for FP arguments. */
2715 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2716 /* force_align_arg_pointer says this function realigns the stack at entry. */
2717 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2718 false, true, true, ix86_handle_cconv_attribute
},
2719 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2720 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2721 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2722 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2724 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2725 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2726 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2727 SUBTARGET_ATTRIBUTE_TABLE
,
2729 { NULL
, 0, 0, false, false, false, NULL
}
2732 /* Decide whether we can make a sibling call to a function. DECL is the
2733 declaration of the function being targeted by the call and EXP is the
2734 CALL_EXPR representing the call. */
2737 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2742 /* If we are generating position-independent code, we cannot sibcall
2743 optimize any indirect call, or a direct call to a global function,
2744 as the PLT requires %ebx be live. */
2745 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2752 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2753 if (POINTER_TYPE_P (func
))
2754 func
= TREE_TYPE (func
);
2757 /* Check that the return value locations are the same. Like
2758 if we are returning floats on the 80387 register stack, we cannot
2759 make a sibcall from a function that doesn't return a float to a
2760 function that does or, conversely, from a function that does return
2761 a float to a function that doesn't; the necessary stack adjustment
2762 would not be executed. This is also the place we notice
2763 differences in the return value ABI. Note that it is ok for one
2764 of the functions to have void return type as long as the return
2765 value of the other is passed in a register. */
2766 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2767 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2769 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2771 if (!rtx_equal_p (a
, b
))
2774 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2776 else if (!rtx_equal_p (a
, b
))
2779 /* If this call is indirect, we'll need to be able to use a call-clobbered
2780 register for the address of the target function. Make sure that all
2781 such registers are not used for passing parameters. */
2782 if (!decl
&& !TARGET_64BIT
)
2786 /* We're looking at the CALL_EXPR, we need the type of the function. */
2787 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2788 type
= TREE_TYPE (type
); /* pointer type */
2789 type
= TREE_TYPE (type
); /* function type */
2791 if (ix86_function_regparm (type
, NULL
) >= 3)
2793 /* ??? Need to count the actual number of registers to be used,
2794 not the possible number of registers. Fix later. */
2799 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2800 /* Dllimport'd functions are also called indirectly. */
2801 if (decl
&& DECL_DLLIMPORT_P (decl
)
2802 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2806 /* If we forced aligned the stack, then sibcalling would unalign the
2807 stack, which may break the called function. */
2808 if (cfun
->machine
->force_align_arg_pointer
)
2811 /* Otherwise okay. That also includes certain types of indirect calls. */
2815 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2816 calling convention attributes;
2817 arguments as in struct attribute_spec.handler. */
2820 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2822 int flags ATTRIBUTE_UNUSED
,
2825 if (TREE_CODE (*node
) != FUNCTION_TYPE
2826 && TREE_CODE (*node
) != METHOD_TYPE
2827 && TREE_CODE (*node
) != FIELD_DECL
2828 && TREE_CODE (*node
) != TYPE_DECL
)
2830 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2831 IDENTIFIER_POINTER (name
));
2832 *no_add_attrs
= true;
2836 /* Can combine regparm with all attributes but fastcall. */
2837 if (is_attribute_p ("regparm", name
))
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2843 error ("fastcall and regparm attributes are not compatible");
2846 cst
= TREE_VALUE (args
);
2847 if (TREE_CODE (cst
) != INTEGER_CST
)
2849 warning (OPT_Wattributes
,
2850 "%qs attribute requires an integer constant argument",
2851 IDENTIFIER_POINTER (name
));
2852 *no_add_attrs
= true;
2854 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2856 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2857 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2858 *no_add_attrs
= true;
2862 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2863 TYPE_ATTRIBUTES (*node
))
2864 && compare_tree_int (cst
, REGPARM_MAX
-1))
2866 error ("%s functions limited to %d register parameters",
2867 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2875 warning (OPT_Wattributes
, "%qs attribute ignored",
2876 IDENTIFIER_POINTER (name
));
2877 *no_add_attrs
= true;
2881 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2882 if (is_attribute_p ("fastcall", name
))
2884 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2886 error ("fastcall and cdecl attributes are not compatible");
2888 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2890 error ("fastcall and stdcall attributes are not compatible");
2892 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2894 error ("fastcall and regparm attributes are not compatible");
2898 /* Can combine stdcall with fastcall (redundant), regparm and
2900 else if (is_attribute_p ("stdcall", name
))
2902 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2904 error ("stdcall and cdecl attributes are not compatible");
2906 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2908 error ("stdcall and fastcall attributes are not compatible");
2912 /* Can combine cdecl with regparm and sseregparm. */
2913 else if (is_attribute_p ("cdecl", name
))
2915 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2917 error ("stdcall and cdecl attributes are not compatible");
2919 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2921 error ("fastcall and cdecl attributes are not compatible");
2925 /* Can combine sseregparm with all attributes. */
2930 /* Return 0 if the attributes for two types are incompatible, 1 if they
2931 are compatible, and 2 if they are nearly compatible (which causes a
2932 warning to be generated). */
2935 ix86_comp_type_attributes (tree type1
, tree type2
)
2937 /* Check for mismatch of non-default calling convention. */
2938 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2940 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2943 /* Check for mismatched fastcall/regparm types. */
2944 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2945 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2946 || (ix86_function_regparm (type1
, NULL
)
2947 != ix86_function_regparm (type2
, NULL
)))
2950 /* Check for mismatched sseregparm types. */
2951 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2952 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2955 /* Check for mismatched return types (cdecl vs stdcall). */
2956 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2957 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2963 /* Return the regparm value for a function with the indicated TYPE and DECL.
2964 DECL may be NULL when calling function indirectly
2965 or considering a libcall. */
2968 ix86_function_regparm (tree type
, tree decl
)
2971 int regparm
= ix86_regparm
;
2972 bool user_convention
= false;
2976 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2979 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2980 user_convention
= true;
2983 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2986 user_convention
= true;
2989 /* Use register calling convention for local functions when possible. */
2990 if (!TARGET_64BIT
&& !user_convention
&& decl
2991 && flag_unit_at_a_time
&& !profile_flag
)
2993 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2996 int local_regparm
, globals
= 0, regno
;
2998 /* Make sure no regparm register is taken by a global register
3000 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3001 if (global_regs
[local_regparm
])
3003 /* We can't use regparm(3) for nested functions as these use
3004 static chain pointer in third argument. */
3005 if (local_regparm
== 3
3006 && decl_function_context (decl
)
3007 && !DECL_NO_STATIC_CHAIN (decl
))
3009 /* If the function realigns its stackpointer, the
3010 prologue will clobber %ecx. If we've already
3011 generated code for the callee, the callee
3012 DECL_STRUCT_FUNCTION is gone, so we fall back to
3013 scanning the attributes for the self-realigning
3015 if ((DECL_STRUCT_FUNCTION (decl
)
3016 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
3017 || (!DECL_STRUCT_FUNCTION (decl
)
3018 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3019 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3021 /* Each global register variable increases register preassure,
3022 so the more global reg vars there are, the smaller regparm
3023 optimization use, unless requested by the user explicitly. */
3024 for (regno
= 0; regno
< 6; regno
++)
3025 if (global_regs
[regno
])
3028 = globals
< local_regparm
? local_regparm
- globals
: 0;
3030 if (local_regparm
> regparm
)
3031 regparm
= local_regparm
;
3038 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3039 DFmode (2) arguments in SSE registers for a function with the
3040 indicated TYPE and DECL. DECL may be NULL when calling function
3041 indirectly or considering a libcall. Otherwise return 0. */
3044 ix86_function_sseregparm (tree type
, tree decl
)
3046 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3047 by the sseregparm attribute. */
3048 if (TARGET_SSEREGPARM
3050 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3055 error ("Calling %qD with attribute sseregparm without "
3056 "SSE/SSE2 enabled", decl
);
3058 error ("Calling %qT with attribute sseregparm without "
3059 "SSE/SSE2 enabled", type
);
3066 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3067 (and DFmode for SSE2) arguments in SSE registers,
3068 even for 32-bit targets. */
3069 if (!TARGET_64BIT
&& decl
3070 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3072 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3074 return TARGET_SSE2
? 2 : 1;
3080 /* Return true if EAX is live at the start of the function. Used by
3081 ix86_expand_prologue to determine if we need special help before
3082 calling allocate_stack_worker. */
3085 ix86_eax_live_at_start_p (void)
3087 /* Cheat. Don't bother working forward from ix86_function_regparm
3088 to the function type to whether an actual argument is located in
3089 eax. Instead just look at cfg info, which is still close enough
3090 to correct at this point. This gives false positives for broken
3091 functions that might use uninitialized data that happens to be
3092 allocated in eax, but who cares? */
3093 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3096 /* Value is the number of bytes of arguments automatically
3097 popped when returning from a subroutine call.
3098 FUNDECL is the declaration node of the function (as a tree),
3099 FUNTYPE is the data type of the function (as a tree),
3100 or for a library call it is an identifier node for the subroutine name.
3101 SIZE is the number of bytes of arguments passed on the stack.
3103 On the 80386, the RTD insn may be used to pop them if the number
3104 of args is fixed, but if the number is variable then the caller
3105 must pop them all. RTD can't be used for library calls now
3106 because the library is compiled with the Unix compiler.
3107 Use of RTD is a selectable option, since it is incompatible with
3108 standard Unix calling sequences. If the option is not selected,
3109 the caller must always pop the args.
3111 The attribute stdcall is equivalent to RTD on a per module basis. */
3114 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3116 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3118 /* Cdecl functions override -mrtd, and never pop the stack. */
3119 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3121 /* Stdcall and fastcall functions will pop the stack if not
3123 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3124 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3128 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3129 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3130 == void_type_node
)))
3134 /* Lose any fake structure return argument if it is passed on the stack. */
3135 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3137 && !KEEP_AGGREGATE_RETURN_POINTER
)
3139 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3142 return GET_MODE_SIZE (Pmode
);
3148 /* Argument support functions. */
3150 /* Return true when register may be used to pass function parameters. */
3152 ix86_function_arg_regno_p (int regno
)
3158 return (regno
< REGPARM_MAX
3159 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3161 return (regno
< REGPARM_MAX
3162 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3163 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3164 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3165 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3170 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3175 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3176 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3179 /* RAX is used as hidden argument to va_arg functions. */
3182 for (i
= 0; i
< REGPARM_MAX
; i
++)
3183 if (regno
== x86_64_int_parameter_registers
[i
])
3188 /* Return if we do not know how to pass TYPE solely in registers. */
3191 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3193 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3196 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3197 The layout_type routine is crafty and tries to trick us into passing
3198 currently unsupported vector types on the stack by using TImode. */
3199 return (!TARGET_64BIT
&& mode
== TImode
3200 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3203 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3204 for a call to a function whose data type is FNTYPE.
3205 For a library call, FNTYPE is 0. */
3208 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3209 tree fntype
, /* tree ptr for function decl */
3210 rtx libname
, /* SYMBOL_REF of library name or 0 */
3213 static CUMULATIVE_ARGS zero_cum
;
3214 tree param
, next_param
;
3216 if (TARGET_DEBUG_ARG
)
3218 fprintf (stderr
, "\ninit_cumulative_args (");
3220 fprintf (stderr
, "fntype code = %s, ret code = %s",
3221 tree_code_name
[(int) TREE_CODE (fntype
)],
3222 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3224 fprintf (stderr
, "no fntype");
3227 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3232 /* Set up the number of registers to use for passing arguments. */
3233 cum
->nregs
= ix86_regparm
;
3235 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3237 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3238 cum
->warn_sse
= true;
3239 cum
->warn_mmx
= true;
3240 cum
->maybe_vaarg
= false;
3242 /* Use ecx and edx registers if function has fastcall attribute,
3243 else look for regparm information. */
3244 if (fntype
&& !TARGET_64BIT
)
3246 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3252 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3255 /* Set up the number of SSE registers used for passing SFmode
3256 and DFmode arguments. Warn for mismatching ABI. */
3257 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3259 /* Determine if this function has variable arguments. This is
3260 indicated by the last argument being 'void_type_mode' if there
3261 are no variable arguments. If there are variable arguments, then
3262 we won't pass anything in registers in 32-bit mode. */
3264 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3266 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3267 param
!= 0; param
= next_param
)
3269 next_param
= TREE_CHAIN (param
);
3270 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3280 cum
->float_in_sse
= 0;
3282 cum
->maybe_vaarg
= true;
3286 if ((!fntype
&& !libname
)
3287 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3288 cum
->maybe_vaarg
= true;
3290 if (TARGET_DEBUG_ARG
)
3291 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3296 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3297 But in the case of vector types, it is some vector mode.
3299 When we have only some of our vector isa extensions enabled, then there
3300 are some modes for which vector_mode_supported_p is false. For these
3301 modes, the generic vector support in gcc will choose some non-vector mode
3302 in order to implement the type. By computing the natural mode, we'll
3303 select the proper ABI location for the operand and not depend on whatever
3304 the middle-end decides to do with these vector types. */
3306 static enum machine_mode
3307 type_natural_mode (tree type
)
3309 enum machine_mode mode
= TYPE_MODE (type
);
3311 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3313 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3314 if ((size
== 8 || size
== 16)
3315 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3316 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3318 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3320 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3321 mode
= MIN_MODE_VECTOR_FLOAT
;
3323 mode
= MIN_MODE_VECTOR_INT
;
3325 /* Get the mode which has this inner mode and number of units. */
3326 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3327 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3328 && GET_MODE_INNER (mode
) == innermode
)
3338 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3339 this may not agree with the mode that the type system has chosen for the
3340 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3341 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3344 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3349 if (orig_mode
!= BLKmode
)
3350 tmp
= gen_rtx_REG (orig_mode
, regno
);
3353 tmp
= gen_rtx_REG (mode
, regno
);
3354 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3355 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3361 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3362 of this code is to classify each 8bytes of incoming argument by the register
3363 class and assign registers accordingly. */
3365 /* Return the union class of CLASS1 and CLASS2.
3366 See the x86-64 PS ABI for details. */
3368 static enum x86_64_reg_class
3369 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3371 /* Rule #1: If both classes are equal, this is the resulting class. */
3372 if (class1
== class2
)
3375 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3377 if (class1
== X86_64_NO_CLASS
)
3379 if (class2
== X86_64_NO_CLASS
)
3382 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3383 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3384 return X86_64_MEMORY_CLASS
;
3386 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3387 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3388 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3389 return X86_64_INTEGERSI_CLASS
;
3390 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3391 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3392 return X86_64_INTEGER_CLASS
;
3394 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3396 if (class1
== X86_64_X87_CLASS
3397 || class1
== X86_64_X87UP_CLASS
3398 || class1
== X86_64_COMPLEX_X87_CLASS
3399 || class2
== X86_64_X87_CLASS
3400 || class2
== X86_64_X87UP_CLASS
3401 || class2
== X86_64_COMPLEX_X87_CLASS
)
3402 return X86_64_MEMORY_CLASS
;
3404 /* Rule #6: Otherwise class SSE is used. */
3405 return X86_64_SSE_CLASS
;
3408 /* Classify the argument of type TYPE and mode MODE.
3409 CLASSES will be filled by the register class used to pass each word
3410 of the operand. The number of words is returned. In case the parameter
3411 should be passed in memory, 0 is returned. As a special case for zero
3412 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3414 BIT_OFFSET is used internally for handling records and specifies offset
3415 of the offset in bits modulo 256 to avoid overflow cases.
3417 See the x86-64 PS ABI for details.
3421 classify_argument (enum machine_mode mode
, tree type
,
3422 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3424 HOST_WIDE_INT bytes
=
3425 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3426 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3428 /* Variable sized entities are always passed/returned in memory. */
3432 if (mode
!= VOIDmode
3433 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3436 if (type
&& AGGREGATE_TYPE_P (type
))
3440 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3442 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3446 for (i
= 0; i
< words
; i
++)
3447 classes
[i
] = X86_64_NO_CLASS
;
3449 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3450 signalize memory class, so handle it as special case. */
3453 classes
[0] = X86_64_NO_CLASS
;
3457 /* Classify each field of record and merge classes. */
3458 switch (TREE_CODE (type
))
3461 /* And now merge the fields of structure. */
3462 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3464 if (TREE_CODE (field
) == FIELD_DECL
)
3468 if (TREE_TYPE (field
) == error_mark_node
)
3471 /* Bitfields are always classified as integer. Handle them
3472 early, since later code would consider them to be
3473 misaligned integers. */
3474 if (DECL_BIT_FIELD (field
))
3476 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3477 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3478 + tree_low_cst (DECL_SIZE (field
), 0)
3481 merge_classes (X86_64_INTEGER_CLASS
,
3486 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3487 TREE_TYPE (field
), subclasses
,
3488 (int_bit_position (field
)
3489 + bit_offset
) % 256);
3492 for (i
= 0; i
< num
; i
++)
3495 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3497 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3505 /* Arrays are handled as small records. */
3508 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3509 TREE_TYPE (type
), subclasses
, bit_offset
);
3513 /* The partial classes are now full classes. */
3514 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3515 subclasses
[0] = X86_64_SSE_CLASS
;
3516 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3517 subclasses
[0] = X86_64_INTEGER_CLASS
;
3519 for (i
= 0; i
< words
; i
++)
3520 classes
[i
] = subclasses
[i
% num
];
3525 case QUAL_UNION_TYPE
:
3526 /* Unions are similar to RECORD_TYPE but offset is always 0.
3528 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3530 if (TREE_CODE (field
) == FIELD_DECL
)
3534 if (TREE_TYPE (field
) == error_mark_node
)
3537 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3538 TREE_TYPE (field
), subclasses
,
3542 for (i
= 0; i
< num
; i
++)
3543 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3552 /* Final merger cleanup. */
3553 for (i
= 0; i
< words
; i
++)
3555 /* If one class is MEMORY, everything should be passed in
3557 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3560 /* The X86_64_SSEUP_CLASS should be always preceded by
3561 X86_64_SSE_CLASS. */
3562 if (classes
[i
] == X86_64_SSEUP_CLASS
3563 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3564 classes
[i
] = X86_64_SSE_CLASS
;
3566 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3567 if (classes
[i
] == X86_64_X87UP_CLASS
3568 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3569 classes
[i
] = X86_64_SSE_CLASS
;
3574 /* Compute alignment needed. We align all types to natural boundaries with
3575 exception of XFmode that is aligned to 64bits. */
3576 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3578 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3581 mode_alignment
= 128;
3582 else if (mode
== XCmode
)
3583 mode_alignment
= 256;
3584 if (COMPLEX_MODE_P (mode
))
3585 mode_alignment
/= 2;
3586 /* Misaligned fields are always returned in memory. */
3587 if (bit_offset
% mode_alignment
)
3591 /* for V1xx modes, just use the base mode */
3592 if (VECTOR_MODE_P (mode
)
3593 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3594 mode
= GET_MODE_INNER (mode
);
3596 /* Classification of atomic types. */
3601 classes
[0] = X86_64_SSE_CLASS
;
3604 classes
[0] = X86_64_SSE_CLASS
;
3605 classes
[1] = X86_64_SSEUP_CLASS
;
3614 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3615 classes
[0] = X86_64_INTEGERSI_CLASS
;
3617 classes
[0] = X86_64_INTEGER_CLASS
;
3621 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3626 if (!(bit_offset
% 64))
3627 classes
[0] = X86_64_SSESF_CLASS
;
3629 classes
[0] = X86_64_SSE_CLASS
;
3632 classes
[0] = X86_64_SSEDF_CLASS
;
3635 classes
[0] = X86_64_X87_CLASS
;
3636 classes
[1] = X86_64_X87UP_CLASS
;
3639 classes
[0] = X86_64_SSE_CLASS
;
3640 classes
[1] = X86_64_SSEUP_CLASS
;
3643 classes
[0] = X86_64_SSE_CLASS
;
3646 classes
[0] = X86_64_SSEDF_CLASS
;
3647 classes
[1] = X86_64_SSEDF_CLASS
;
3650 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3653 /* This modes is larger than 16 bytes. */
3661 classes
[0] = X86_64_SSE_CLASS
;
3662 classes
[1] = X86_64_SSEUP_CLASS
;
3668 classes
[0] = X86_64_SSE_CLASS
;
3674 gcc_assert (VECTOR_MODE_P (mode
));
3679 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3681 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3682 classes
[0] = X86_64_INTEGERSI_CLASS
;
3684 classes
[0] = X86_64_INTEGER_CLASS
;
3685 classes
[1] = X86_64_INTEGER_CLASS
;
3686 return 1 + (bytes
> 8);
3690 /* Examine the argument and return set number of register required in each
3691 class. Return 0 iff parameter should be passed in memory. */
3693 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3694 int *int_nregs
, int *sse_nregs
)
3696 enum x86_64_reg_class
class[MAX_CLASSES
];
3697 int n
= classify_argument (mode
, type
, class, 0);
3703 for (n
--; n
>= 0; n
--)
3706 case X86_64_INTEGER_CLASS
:
3707 case X86_64_INTEGERSI_CLASS
:
3710 case X86_64_SSE_CLASS
:
3711 case X86_64_SSESF_CLASS
:
3712 case X86_64_SSEDF_CLASS
:
3715 case X86_64_NO_CLASS
:
3716 case X86_64_SSEUP_CLASS
:
3718 case X86_64_X87_CLASS
:
3719 case X86_64_X87UP_CLASS
:
3723 case X86_64_COMPLEX_X87_CLASS
:
3724 return in_return
? 2 : 0;
3725 case X86_64_MEMORY_CLASS
:
3731 /* Construct container for the argument used by GCC interface. See
3732 FUNCTION_ARG for the detailed description. */
3735 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3736 tree type
, int in_return
, int nintregs
, int nsseregs
,
3737 const int *intreg
, int sse_regno
)
3739 /* The following variables hold the static issued_error state. */
3740 static bool issued_sse_arg_error
;
3741 static bool issued_sse_ret_error
;
3742 static bool issued_x87_ret_error
;
3744 enum machine_mode tmpmode
;
3746 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3747 enum x86_64_reg_class
class[MAX_CLASSES
];
3751 int needed_sseregs
, needed_intregs
;
3752 rtx exp
[MAX_CLASSES
];
3755 n
= classify_argument (mode
, type
, class, 0);
3756 if (TARGET_DEBUG_ARG
)
3759 fprintf (stderr
, "Memory class\n");
3762 fprintf (stderr
, "Classes:");
3763 for (i
= 0; i
< n
; i
++)
3765 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3767 fprintf (stderr
, "\n");
3772 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3775 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3778 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3779 some less clueful developer tries to use floating-point anyway. */
3780 if (needed_sseregs
&& !TARGET_SSE
)
3784 if (!issued_sse_ret_error
)
3786 error ("SSE register return with SSE disabled");
3787 issued_sse_ret_error
= true;
3790 else if (!issued_sse_arg_error
)
3792 error ("SSE register argument with SSE disabled");
3793 issued_sse_arg_error
= true;
3798 /* Likewise, error if the ABI requires us to return values in the
3799 x87 registers and the user specified -mno-80387. */
3800 if (!TARGET_80387
&& in_return
)
3801 for (i
= 0; i
< n
; i
++)
3802 if (class[i
] == X86_64_X87_CLASS
3803 || class[i
] == X86_64_X87UP_CLASS
3804 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3806 if (!issued_x87_ret_error
)
3808 error ("x87 register return with x87 disabled");
3809 issued_x87_ret_error
= true;
3814 /* First construct simple cases. Avoid SCmode, since we want to use
3815 single register to pass this type. */
3816 if (n
== 1 && mode
!= SCmode
)
3819 case X86_64_INTEGER_CLASS
:
3820 case X86_64_INTEGERSI_CLASS
:
3821 return gen_rtx_REG (mode
, intreg
[0]);
3822 case X86_64_SSE_CLASS
:
3823 case X86_64_SSESF_CLASS
:
3824 case X86_64_SSEDF_CLASS
:
3825 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3826 case X86_64_X87_CLASS
:
3827 case X86_64_COMPLEX_X87_CLASS
:
3828 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3829 case X86_64_NO_CLASS
:
3830 /* Zero sized array, struct or class. */
3835 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3837 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3839 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3840 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3841 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3842 && class[1] == X86_64_INTEGER_CLASS
3843 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3844 && intreg
[0] + 1 == intreg
[1])
3845 return gen_rtx_REG (mode
, intreg
[0]);
3847 /* Otherwise figure out the entries of the PARALLEL. */
3848 for (i
= 0; i
< n
; i
++)
3852 case X86_64_NO_CLASS
:
3854 case X86_64_INTEGER_CLASS
:
3855 case X86_64_INTEGERSI_CLASS
:
3856 /* Merge TImodes on aligned occasions here too. */
3857 if (i
* 8 + 8 > bytes
)
3858 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3859 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3863 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3864 if (tmpmode
== BLKmode
)
3866 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3867 gen_rtx_REG (tmpmode
, *intreg
),
3871 case X86_64_SSESF_CLASS
:
3872 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3873 gen_rtx_REG (SFmode
,
3874 SSE_REGNO (sse_regno
)),
3878 case X86_64_SSEDF_CLASS
:
3879 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3880 gen_rtx_REG (DFmode
,
3881 SSE_REGNO (sse_regno
)),
3885 case X86_64_SSE_CLASS
:
3886 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3890 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3891 gen_rtx_REG (tmpmode
,
3892 SSE_REGNO (sse_regno
)),
3894 if (tmpmode
== TImode
)
3903 /* Empty aligned struct, union or class. */
3907 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3908 for (i
= 0; i
< nexps
; i
++)
3909 XVECEXP (ret
, 0, i
) = exp
[i
];
3913 /* Update the data in CUM to advance over an argument
3914 of mode MODE and data type TYPE.
3915 (TYPE is null for libcalls where that information may not be available.) */
3918 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3919 tree type
, int named
)
3922 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3923 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3926 mode
= type_natural_mode (type
);
3928 if (TARGET_DEBUG_ARG
)
3929 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3930 "mode=%s, named=%d)\n\n",
3931 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3932 GET_MODE_NAME (mode
), named
);
3936 int int_nregs
, sse_nregs
;
3937 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3938 cum
->words
+= words
;
3939 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3941 cum
->nregs
-= int_nregs
;
3942 cum
->sse_nregs
-= sse_nregs
;
3943 cum
->regno
+= int_nregs
;
3944 cum
->sse_regno
+= sse_nregs
;
3947 cum
->words
+= words
;
3965 cum
->words
+= words
;
3966 cum
->nregs
-= words
;
3967 cum
->regno
+= words
;
3969 if (cum
->nregs
<= 0)
3977 if (cum
->float_in_sse
< 2)
3980 if (cum
->float_in_sse
< 1)
3991 if (!type
|| !AGGREGATE_TYPE_P (type
))
3993 cum
->sse_words
+= words
;
3994 cum
->sse_nregs
-= 1;
3995 cum
->sse_regno
+= 1;
3996 if (cum
->sse_nregs
<= 0)
4008 if (!type
|| !AGGREGATE_TYPE_P (type
))
4010 cum
->mmx_words
+= words
;
4011 cum
->mmx_nregs
-= 1;
4012 cum
->mmx_regno
+= 1;
4013 if (cum
->mmx_nregs
<= 0)
4024 /* Define where to put the arguments to a function.
4025 Value is zero to push the argument on the stack,
4026 or a hard register in which to store the argument.
4028 MODE is the argument's machine mode.
4029 TYPE is the data type of the argument (as a tree).
4030 This is null for libcalls where that information may
4032 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4033 the preceding args and about the function being called.
4034 NAMED is nonzero if this argument is a named parameter
4035 (otherwise it is an extra parameter matching an ellipsis). */
4038 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
4039 tree type
, int named
)
4041 enum machine_mode mode
= orig_mode
;
4044 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
4045 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4046 static bool warnedsse
, warnedmmx
;
4048 /* To simplify the code below, represent vector types with a vector mode
4049 even if MMX/SSE are not active. */
4050 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4051 mode
= type_natural_mode (type
);
4053 /* Handle a hidden AL argument containing number of registers for varargs
4054 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4056 if (mode
== VOIDmode
)
4059 return GEN_INT (cum
->maybe_vaarg
4060 ? (cum
->sse_nregs
< 0
4068 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4070 &x86_64_int_parameter_registers
[cum
->regno
],
4075 /* For now, pass fp/complex values on the stack. */
4087 if (words
<= cum
->nregs
)
4089 int regno
= cum
->regno
;
4091 /* Fastcall allocates the first two DWORD (SImode) or
4092 smaller arguments to ECX and EDX. */
4095 if (mode
== BLKmode
|| mode
== DImode
)
4098 /* ECX not EAX is the first allocated register. */
4102 ret
= gen_rtx_REG (mode
, regno
);
4106 if (cum
->float_in_sse
< 2)
4109 if (cum
->float_in_sse
< 1)
4119 if (!type
|| !AGGREGATE_TYPE_P (type
))
4121 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4124 warning (0, "SSE vector argument without SSE enabled "
4128 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4129 cum
->sse_regno
+ FIRST_SSE_REG
);
4136 if (!type
|| !AGGREGATE_TYPE_P (type
))
4138 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4141 warning (0, "MMX vector argument without MMX enabled "
4145 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4146 cum
->mmx_regno
+ FIRST_MMX_REG
);
4151 if (TARGET_DEBUG_ARG
)
4154 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4155 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4158 print_simple_rtl (stderr
, ret
);
4160 fprintf (stderr
, ", stack");
4162 fprintf (stderr
, " )\n");
4168 /* A C expression that indicates when an argument must be passed by
4169 reference. If nonzero for an argument, a copy of that argument is
4170 made in memory and a pointer to the argument is passed instead of
4171 the argument itself. The pointer is passed in whatever way is
4172 appropriate for passing a pointer to that type. */
4175 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4176 enum machine_mode mode ATTRIBUTE_UNUSED
,
4177 tree type
, bool named ATTRIBUTE_UNUSED
)
4182 if (type
&& int_size_in_bytes (type
) == -1)
4184 if (TARGET_DEBUG_ARG
)
4185 fprintf (stderr
, "function_arg_pass_by_reference\n");
4192 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4193 ABI. Only called if TARGET_SSE. */
4195 contains_128bit_aligned_vector_p (tree type
)
4197 enum machine_mode mode
= TYPE_MODE (type
);
4198 if (SSE_REG_MODE_P (mode
)
4199 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4201 if (TYPE_ALIGN (type
) < 128)
4204 if (AGGREGATE_TYPE_P (type
))
4206 /* Walk the aggregates recursively. */
4207 switch (TREE_CODE (type
))
4211 case QUAL_UNION_TYPE
:
4215 /* Walk all the structure fields. */
4216 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4218 if (TREE_CODE (field
) == FIELD_DECL
4219 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4226 /* Just for use if some languages passes arrays by value. */
4227 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4238 /* Gives the alignment boundary, in bits, of an argument with the
4239 specified mode and type. */
4242 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4246 align
= TYPE_ALIGN (type
);
4248 align
= GET_MODE_ALIGNMENT (mode
);
4249 if (align
< PARM_BOUNDARY
)
4250 align
= PARM_BOUNDARY
;
4253 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4254 make an exception for SSE modes since these require 128bit
4257 The handling here differs from field_alignment. ICC aligns MMX
4258 arguments to 4 byte boundaries, while structure fields are aligned
4259 to 8 byte boundaries. */
4261 align
= PARM_BOUNDARY
;
4264 if (!SSE_REG_MODE_P (mode
))
4265 align
= PARM_BOUNDARY
;
4269 if (!contains_128bit_aligned_vector_p (type
))
4270 align
= PARM_BOUNDARY
;
4278 /* Return true if N is a possible register number of function value. */
4280 ix86_function_value_regno_p (int regno
)
4286 return ((regno
) == 0
4287 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4288 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4290 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4291 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4292 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4297 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4298 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4302 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4309 /* Define how to find the value returned by a function.
4310 VALTYPE is the data type of the value (as a tree).
4311 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4312 otherwise, FUNC is 0. */
4314 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4315 bool outgoing ATTRIBUTE_UNUSED
)
4317 enum machine_mode natmode
= type_natural_mode (valtype
);
4321 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4322 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4323 x86_64_int_return_registers
, 0);
4324 /* For zero sized structures, construct_container return NULL, but we
4325 need to keep rest of compiler happy by returning meaningful value. */
4327 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4332 tree fn
= NULL_TREE
, fntype
;
4334 && DECL_P (fntype_or_decl
))
4335 fn
= fntype_or_decl
;
4336 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4337 return gen_rtx_REG (TYPE_MODE (valtype
),
4338 ix86_value_regno (natmode
, fn
, fntype
));
4342 /* Return true iff type is returned in memory. */
4344 ix86_return_in_memory (tree type
)
4346 int needed_intregs
, needed_sseregs
, size
;
4347 enum machine_mode mode
= type_natural_mode (type
);
4350 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4352 if (mode
== BLKmode
)
4355 size
= int_size_in_bytes (type
);
4357 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4360 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4362 /* User-created vectors small enough to fit in EAX. */
4366 /* MMX/3dNow values are returned in MM0,
4367 except when it doesn't exits. */
4369 return (TARGET_MMX
? 0 : 1);
4371 /* SSE values are returned in XMM0, except when it doesn't exist. */
4373 return (TARGET_SSE
? 0 : 1);
4387 /* When returning SSE vector types, we have a choice of either
4388 (1) being abi incompatible with a -march switch, or
4389 (2) generating an error.
4390 Given no good solution, I think the safest thing is one warning.
4391 The user won't be able to use -Werror, but....
4393 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4394 called in response to actually generating a caller or callee that
4395 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4396 via aggregate_value_p for general type probing from tree-ssa. */
4399 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4401 static bool warnedsse
, warnedmmx
;
4405 /* Look at the return type of the function, not the function type. */
4406 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4408 if (!TARGET_SSE
&& !warnedsse
)
4411 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4414 warning (0, "SSE vector return without SSE enabled "
4419 if (!TARGET_MMX
&& !warnedmmx
)
4421 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4424 warning (0, "MMX vector return without MMX enabled "
4433 /* Define how to find the value returned by a library function
4434 assuming the value has mode MODE. */
4436 ix86_libcall_value (enum machine_mode mode
)
4450 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4453 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4457 return gen_rtx_REG (mode
, 0);
4461 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4464 /* Given a mode, return the register to use for a return value. */
4467 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4469 gcc_assert (!TARGET_64BIT
);
4471 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4472 we normally prevent this case when mmx is not available. However
4473 some ABIs may require the result to be returned like DImode. */
4474 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4475 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4477 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4478 we prevent this case when sse is not available. However some ABIs
4479 may require the result to be returned like integer TImode. */
4480 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4481 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4483 /* Decimal floating point values can go in %eax, unlike other float modes. */
4484 if (DECIMAL_FLOAT_MODE_P (mode
))
4487 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4488 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4491 /* Floating point return values in %st(0), except for local functions when
4492 SSE math is enabled or for functions with sseregparm attribute. */
4493 if ((func
|| fntype
)
4494 && (mode
== SFmode
|| mode
== DFmode
))
4496 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4497 if ((sse_level
>= 1 && mode
== SFmode
)
4498 || (sse_level
== 2 && mode
== DFmode
))
4499 return FIRST_SSE_REG
;
4502 return FIRST_FLOAT_REG
;
4505 /* Create the va_list data type. */
4508 ix86_build_builtin_va_list (void)
4510 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4512 /* For i386 we use plain pointer to argument area. */
4514 return build_pointer_type (char_type_node
);
4516 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4517 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4519 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4520 unsigned_type_node
);
4521 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4522 unsigned_type_node
);
4523 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4525 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4528 va_list_gpr_counter_field
= f_gpr
;
4529 va_list_fpr_counter_field
= f_fpr
;
4531 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4532 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4533 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4534 DECL_FIELD_CONTEXT (f_sav
) = record
;
4536 TREE_CHAIN (record
) = type_decl
;
4537 TYPE_NAME (record
) = type_decl
;
4538 TYPE_FIELDS (record
) = f_gpr
;
4539 TREE_CHAIN (f_gpr
) = f_fpr
;
4540 TREE_CHAIN (f_fpr
) = f_ovf
;
4541 TREE_CHAIN (f_ovf
) = f_sav
;
4543 layout_type (record
);
4545 /* The correct type is an array type of one element. */
4546 return build_array_type (record
, build_index_type (size_zero_node
));
4549 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4552 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4553 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4556 CUMULATIVE_ARGS next_cum
;
4557 rtx save_area
= NULL_RTX
, mem
;
4570 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4573 /* Indicate to allocate space on the stack for varargs save area. */
4574 ix86_save_varrargs_registers
= 1;
4576 cfun
->stack_alignment_needed
= 128;
4578 fntype
= TREE_TYPE (current_function_decl
);
4579 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4580 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4581 != void_type_node
));
4583 /* For varargs, we do not want to skip the dummy va_dcl argument.
4584 For stdargs, we do want to skip the last named argument. */
4587 function_arg_advance (&next_cum
, mode
, type
, 1);
4590 save_area
= frame_pointer_rtx
;
4592 set
= get_varargs_alias_set ();
4594 for (i
= next_cum
.regno
;
4596 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4599 mem
= gen_rtx_MEM (Pmode
,
4600 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4601 MEM_NOTRAP_P (mem
) = 1;
4602 set_mem_alias_set (mem
, set
);
4603 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4604 x86_64_int_parameter_registers
[i
]));
4607 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4609 /* Now emit code to save SSE registers. The AX parameter contains number
4610 of SSE parameter registers used to call this function. We use
4611 sse_prologue_save insn template that produces computed jump across
4612 SSE saves. We need some preparation work to get this working. */
4614 label
= gen_label_rtx ();
4615 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4617 /* Compute address to jump to :
4618 label - 5*eax + nnamed_sse_arguments*5 */
4619 tmp_reg
= gen_reg_rtx (Pmode
);
4620 nsse_reg
= gen_reg_rtx (Pmode
);
4621 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4622 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4623 gen_rtx_MULT (Pmode
, nsse_reg
,
4625 if (next_cum
.sse_regno
)
4628 gen_rtx_CONST (DImode
,
4629 gen_rtx_PLUS (DImode
,
4631 GEN_INT (next_cum
.sse_regno
* 4))));
4633 emit_move_insn (nsse_reg
, label_ref
);
4634 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4636 /* Compute address of memory block we save into. We always use pointer
4637 pointing 127 bytes after first byte to store - this is needed to keep
4638 instruction size limited by 4 bytes. */
4639 tmp_reg
= gen_reg_rtx (Pmode
);
4640 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4641 plus_constant (save_area
,
4642 8 * REGPARM_MAX
+ 127)));
4643 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4644 MEM_NOTRAP_P (mem
) = 1;
4645 set_mem_alias_set (mem
, set
);
4646 set_mem_align (mem
, BITS_PER_WORD
);
4648 /* And finally do the dirty job! */
4649 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4650 GEN_INT (next_cum
.sse_regno
), label
));
4655 /* Implement va_start. */
4658 ix86_va_start (tree valist
, rtx nextarg
)
4660 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4661 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4662 tree gpr
, fpr
, ovf
, sav
, t
;
4665 /* Only 64bit target needs something special. */
4668 std_expand_builtin_va_start (valist
, nextarg
);
4672 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4673 f_fpr
= TREE_CHAIN (f_gpr
);
4674 f_ovf
= TREE_CHAIN (f_fpr
);
4675 f_sav
= TREE_CHAIN (f_ovf
);
4677 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4678 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4679 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4680 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4681 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4683 /* Count number of gp and fp argument registers used. */
4684 words
= current_function_args_info
.words
;
4685 n_gpr
= current_function_args_info
.regno
;
4686 n_fpr
= current_function_args_info
.sse_regno
;
4688 if (TARGET_DEBUG_ARG
)
4689 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4690 (int) words
, (int) n_gpr
, (int) n_fpr
);
4692 if (cfun
->va_list_gpr_size
)
4694 type
= TREE_TYPE (gpr
);
4695 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4696 build_int_cst (type
, n_gpr
* 8));
4697 TREE_SIDE_EFFECTS (t
) = 1;
4698 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4701 if (cfun
->va_list_fpr_size
)
4703 type
= TREE_TYPE (fpr
);
4704 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4705 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4706 TREE_SIDE_EFFECTS (t
) = 1;
4707 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4710 /* Find the overflow area. */
4711 type
= TREE_TYPE (ovf
);
4712 t
= make_tree (type
, virtual_incoming_args_rtx
);
4714 t
= build2 (PLUS_EXPR
, type
, t
,
4715 build_int_cst (type
, words
* UNITS_PER_WORD
));
4716 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4717 TREE_SIDE_EFFECTS (t
) = 1;
4718 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4720 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4722 /* Find the register save area.
4723 Prologue of the function save it right above stack frame. */
4724 type
= TREE_TYPE (sav
);
4725 t
= make_tree (type
, frame_pointer_rtx
);
4726 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4727 TREE_SIDE_EFFECTS (t
) = 1;
4728 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4732 /* Implement va_arg. */
4735 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4737 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4738 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4739 tree gpr
, fpr
, ovf
, sav
, t
;
4741 tree lab_false
, lab_over
= NULL_TREE
;
4746 enum machine_mode nat_mode
;
4748 /* Only 64bit target needs something special. */
4750 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4752 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4753 f_fpr
= TREE_CHAIN (f_gpr
);
4754 f_ovf
= TREE_CHAIN (f_fpr
);
4755 f_sav
= TREE_CHAIN (f_ovf
);
4757 valist
= build_va_arg_indirect_ref (valist
);
4758 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4759 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4760 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4761 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4763 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4765 type
= build_pointer_type (type
);
4766 size
= int_size_in_bytes (type
);
4767 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4769 nat_mode
= type_natural_mode (type
);
4770 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4771 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4773 /* Pull the value out of the saved registers. */
4775 addr
= create_tmp_var (ptr_type_node
, "addr");
4776 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4780 int needed_intregs
, needed_sseregs
;
4782 tree int_addr
, sse_addr
;
4784 lab_false
= create_artificial_label ();
4785 lab_over
= create_artificial_label ();
4787 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4789 need_temp
= (!REG_P (container
)
4790 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4791 || TYPE_ALIGN (type
) > 128));
4793 /* In case we are passing structure, verify that it is consecutive block
4794 on the register save area. If not we need to do moves. */
4795 if (!need_temp
&& !REG_P (container
))
4797 /* Verify that all registers are strictly consecutive */
4798 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4802 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4804 rtx slot
= XVECEXP (container
, 0, i
);
4805 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4806 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4814 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4816 rtx slot
= XVECEXP (container
, 0, i
);
4817 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4818 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4830 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4831 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4832 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4833 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4836 /* First ensure that we fit completely in registers. */
4839 t
= build_int_cst (TREE_TYPE (gpr
),
4840 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4841 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4842 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4843 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4844 gimplify_and_add (t
, pre_p
);
4848 t
= build_int_cst (TREE_TYPE (fpr
),
4849 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4851 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4852 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4853 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4854 gimplify_and_add (t
, pre_p
);
4857 /* Compute index to start of area used for integer regs. */
4860 /* int_addr = gpr + sav; */
4861 t
= fold_convert (ptr_type_node
, gpr
);
4862 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4863 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4864 gimplify_and_add (t
, pre_p
);
4868 /* sse_addr = fpr + sav; */
4869 t
= fold_convert (ptr_type_node
, fpr
);
4870 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4871 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4872 gimplify_and_add (t
, pre_p
);
4877 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4880 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4881 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4882 gimplify_and_add (t
, pre_p
);
4884 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4886 rtx slot
= XVECEXP (container
, 0, i
);
4887 rtx reg
= XEXP (slot
, 0);
4888 enum machine_mode mode
= GET_MODE (reg
);
4889 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4890 tree addr_type
= build_pointer_type (piece_type
);
4893 tree dest_addr
, dest
;
4895 if (SSE_REGNO_P (REGNO (reg
)))
4897 src_addr
= sse_addr
;
4898 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4902 src_addr
= int_addr
;
4903 src_offset
= REGNO (reg
) * 8;
4905 src_addr
= fold_convert (addr_type
, src_addr
);
4906 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4907 size_int (src_offset
));
4908 src
= build_va_arg_indirect_ref (src_addr
);
4910 dest_addr
= fold_convert (addr_type
, addr
);
4911 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4912 size_int (INTVAL (XEXP (slot
, 1))));
4913 dest
= build_va_arg_indirect_ref (dest_addr
);
4915 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4916 gimplify_and_add (t
, pre_p
);
4922 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4923 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4924 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4925 gimplify_and_add (t
, pre_p
);
4929 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4930 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4931 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4932 gimplify_and_add (t
, pre_p
);
4935 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4936 gimplify_and_add (t
, pre_p
);
4938 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4939 append_to_statement_list (t
, pre_p
);
4942 /* ... otherwise out of the overflow area. */
4944 /* Care for on-stack alignment if needed. */
4945 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4946 || integer_zerop (TYPE_SIZE (type
)))
4950 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4951 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4952 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4953 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4954 build_int_cst (TREE_TYPE (t
), -align
));
4956 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4958 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4959 gimplify_and_add (t2
, pre_p
);
4961 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4962 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4963 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4964 gimplify_and_add (t
, pre_p
);
4968 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4969 append_to_statement_list (t
, pre_p
);
4972 ptrtype
= build_pointer_type (type
);
4973 addr
= fold_convert (ptrtype
, addr
);
4976 addr
= build_va_arg_indirect_ref (addr
);
4977 return build_va_arg_indirect_ref (addr
);
4980 /* Return nonzero if OPNUM's MEM should be matched
4981 in movabs* patterns. */
4984 ix86_check_movabs (rtx insn
, int opnum
)
4988 set
= PATTERN (insn
);
4989 if (GET_CODE (set
) == PARALLEL
)
4990 set
= XVECEXP (set
, 0, 0);
4991 gcc_assert (GET_CODE (set
) == SET
);
4992 mem
= XEXP (set
, opnum
);
4993 while (GET_CODE (mem
) == SUBREG
)
4994 mem
= SUBREG_REG (mem
);
4995 gcc_assert (MEM_P (mem
));
4996 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4999 /* Initialize the table of extra 80387 mathematical constants. */
5002 init_ext_80387_constants (void)
5004 static const char * cst
[5] =
5006 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5007 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5008 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5009 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5010 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5014 for (i
= 0; i
< 5; i
++)
5016 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5017 /* Ensure each constant is rounded to XFmode precision. */
5018 real_convert (&ext_80387_constants_table
[i
],
5019 XFmode
, &ext_80387_constants_table
[i
]);
5022 ext_80387_constants_init
= 1;
5025 /* Return true if the constant is something that can be loaded with
5026 a special instruction. */
5029 standard_80387_constant_p (rtx x
)
5033 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
5036 if (x
== CONST0_RTX (GET_MODE (x
)))
5038 if (x
== CONST1_RTX (GET_MODE (x
)))
5041 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5043 /* For XFmode constants, try to find a special 80387 instruction when
5044 optimizing for size or on those CPUs that benefit from them. */
5045 if (GET_MODE (x
) == XFmode
5046 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5050 if (! ext_80387_constants_init
)
5051 init_ext_80387_constants ();
5053 for (i
= 0; i
< 5; i
++)
5054 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5058 /* Load of the constant -0.0 or -1.0 will be split as
5059 fldz;fchs or fld1;fchs sequence. */
5060 if (real_isnegzero (&r
))
5062 if (real_identical (&r
, &dconstm1
))
5068 /* Return the opcode of the special instruction to be used to load
5072 standard_80387_constant_opcode (rtx x
)
5074 switch (standard_80387_constant_p (x
))
5098 /* Return the CONST_DOUBLE representing the 80387 constant that is
5099 loaded by the specified special instruction. The argument IDX
5100 matches the return value from standard_80387_constant_p. */
5103 standard_80387_constant_rtx (int idx
)
5107 if (! ext_80387_constants_init
)
5108 init_ext_80387_constants ();
5124 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5128 /* Return 1 if mode is a valid mode for sse. */
5130 standard_sse_mode_p (enum machine_mode mode
)
5147 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5150 standard_sse_constant_p (rtx x
)
5152 enum machine_mode mode
= GET_MODE (x
);
5154 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5156 if (vector_all_ones_operand (x
, mode
)
5157 && standard_sse_mode_p (mode
))
5158 return TARGET_SSE2
? 2 : -1;
5163 /* Return the opcode of the special instruction to be used to load
5167 standard_sse_constant_opcode (rtx insn
, rtx x
)
5169 switch (standard_sse_constant_p (x
))
5172 if (get_attr_mode (insn
) == MODE_V4SF
)
5173 return "xorps\t%0, %0";
5174 else if (get_attr_mode (insn
) == MODE_V2DF
)
5175 return "xorpd\t%0, %0";
5177 return "pxor\t%0, %0";
5179 return "pcmpeqd\t%0, %0";
5184 /* Returns 1 if OP contains a symbol reference */
5187 symbolic_reference_mentioned_p (rtx op
)
5192 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5195 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5196 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5202 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5203 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5207 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5214 /* Return 1 if it is appropriate to emit `ret' instructions in the
5215 body of a function. Do this only if the epilogue is simple, needing a
5216 couple of insns. Prior to reloading, we can't tell how many registers
5217 must be saved, so return 0 then. Return 0 if there is no frame
5218 marker to de-allocate. */
5221 ix86_can_use_return_insn_p (void)
5223 struct ix86_frame frame
;
5225 if (! reload_completed
|| frame_pointer_needed
)
5228 /* Don't allow more than 32 pop, since that's all we can do
5229 with one instruction. */
5230 if (current_function_pops_args
5231 && current_function_args_size
>= 32768)
5234 ix86_compute_frame_layout (&frame
);
5235 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5238 /* Value should be nonzero if functions must have frame pointers.
5239 Zero means the frame pointer need not be set up (and parms may
5240 be accessed via the stack pointer) in functions that seem suitable. */
5243 ix86_frame_pointer_required (void)
5245 /* If we accessed previous frames, then the generated code expects
5246 to be able to access the saved ebp value in our frame. */
5247 if (cfun
->machine
->accesses_prev_frame
)
5250 /* Several x86 os'es need a frame pointer for other reasons,
5251 usually pertaining to setjmp. */
5252 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5255 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5256 the frame pointer by default. Turn it back on now if we've not
5257 got a leaf function. */
5258 if (TARGET_OMIT_LEAF_FRAME_POINTER
5259 && (!current_function_is_leaf
5260 || ix86_current_function_calls_tls_descriptor
))
5263 if (current_function_profile
)
5269 /* Record that the current function accesses previous call frames. */
5272 ix86_setup_frame_addresses (void)
5274 cfun
->machine
->accesses_prev_frame
= 1;
5277 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5278 # define USE_HIDDEN_LINKONCE 1
5280 # define USE_HIDDEN_LINKONCE 0
5283 static int pic_labels_used
;
5285 /* Fills in the label name that should be used for a pc thunk for
5286 the given register. */
5289 get_pc_thunk_name (char name
[32], unsigned int regno
)
5291 gcc_assert (!TARGET_64BIT
);
5293 if (USE_HIDDEN_LINKONCE
)
5294 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5296 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5300 /* This function generates code for -fpic that loads %ebx with
5301 the return address of the caller and then returns. */
5304 ix86_file_end (void)
5309 for (regno
= 0; regno
< 8; ++regno
)
5313 if (! ((pic_labels_used
>> regno
) & 1))
5316 get_pc_thunk_name (name
, regno
);
5321 switch_to_section (darwin_sections
[text_coal_section
]);
5322 fputs ("\t.weak_definition\t", asm_out_file
);
5323 assemble_name (asm_out_file
, name
);
5324 fputs ("\n\t.private_extern\t", asm_out_file
);
5325 assemble_name (asm_out_file
, name
);
5326 fputs ("\n", asm_out_file
);
5327 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5331 if (USE_HIDDEN_LINKONCE
)
5335 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5337 TREE_PUBLIC (decl
) = 1;
5338 TREE_STATIC (decl
) = 1;
5339 DECL_ONE_ONLY (decl
) = 1;
5341 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5342 switch_to_section (get_named_section (decl
, NULL
, 0));
5344 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5345 fputs ("\t.hidden\t", asm_out_file
);
5346 assemble_name (asm_out_file
, name
);
5347 fputc ('\n', asm_out_file
);
5348 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5352 switch_to_section (text_section
);
5353 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5356 xops
[0] = gen_rtx_REG (SImode
, regno
);
5357 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5358 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5359 output_asm_insn ("ret", xops
);
5362 if (NEED_INDICATE_EXEC_STACK
)
5363 file_end_indicate_exec_stack ();
5366 /* Emit code for the SET_GOT patterns. */
5369 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5375 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5377 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5378 xops
[2] = gen_rtx_MEM (Pmode
,
5379 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5380 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5382 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5383 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5384 an unadorned address. */
5385 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5386 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5387 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5391 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5393 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5395 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5398 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5400 output_asm_insn ("call\t%a2", xops
);
5403 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5404 is what will be referenced by the Mach-O PIC subsystem. */
5406 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5409 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5410 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5413 output_asm_insn ("pop{l}\t%0", xops
);
5418 get_pc_thunk_name (name
, REGNO (dest
));
5419 pic_labels_used
|= 1 << REGNO (dest
);
5421 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5422 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5423 output_asm_insn ("call\t%X2", xops
);
5424 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5425 is what will be referenced by the Mach-O PIC subsystem. */
5428 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5430 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5431 CODE_LABEL_NUMBER (label
));
5438 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5439 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5441 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5446 /* Generate an "push" pattern for input ARG. */
5451 return gen_rtx_SET (VOIDmode
,
5453 gen_rtx_PRE_DEC (Pmode
,
5454 stack_pointer_rtx
)),
5458 /* Return >= 0 if there is an unused call-clobbered register available
5459 for the entire function. */
5462 ix86_select_alt_pic_regnum (void)
5464 if (current_function_is_leaf
&& !current_function_profile
5465 && !ix86_current_function_calls_tls_descriptor
)
5468 for (i
= 2; i
>= 0; --i
)
5469 if (!regs_ever_live
[i
])
5473 return INVALID_REGNUM
;
5476 /* Return 1 if we need to save REGNO. */
5478 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5480 if (pic_offset_table_rtx
5481 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5482 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5483 || current_function_profile
5484 || current_function_calls_eh_return
5485 || current_function_uses_const_pool
))
5487 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5492 if (current_function_calls_eh_return
&& maybe_eh_return
)
5497 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5498 if (test
== INVALID_REGNUM
)
5505 if (cfun
->machine
->force_align_arg_pointer
5506 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5509 return (regs_ever_live
[regno
]
5510 && !call_used_regs
[regno
]
5511 && !fixed_regs
[regno
]
5512 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5515 /* Return number of registers to be saved on the stack. */
5518 ix86_nsaved_regs (void)
5523 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5524 if (ix86_save_reg (regno
, true))
5529 /* Return the offset between two registers, one to be eliminated, and the other
5530 its replacement, at the start of a routine. */
5533 ix86_initial_elimination_offset (int from
, int to
)
5535 struct ix86_frame frame
;
5536 ix86_compute_frame_layout (&frame
);
5538 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5539 return frame
.hard_frame_pointer_offset
;
5540 else if (from
== FRAME_POINTER_REGNUM
5541 && to
== HARD_FRAME_POINTER_REGNUM
)
5542 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5545 gcc_assert (to
== STACK_POINTER_REGNUM
);
5547 if (from
== ARG_POINTER_REGNUM
)
5548 return frame
.stack_pointer_offset
;
5550 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5551 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5555 /* Fill structure ix86_frame about frame of currently computed function. */
5558 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5560 HOST_WIDE_INT total_size
;
5561 unsigned int stack_alignment_needed
;
5562 HOST_WIDE_INT offset
;
5563 unsigned int preferred_alignment
;
5564 HOST_WIDE_INT size
= get_frame_size ();
5566 frame
->nregs
= ix86_nsaved_regs ();
5569 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5570 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5572 /* During reload iteration the amount of registers saved can change.
5573 Recompute the value as needed. Do not recompute when amount of registers
5574 didn't change as reload does multiple calls to the function and does not
5575 expect the decision to change within single iteration. */
5577 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5579 int count
= frame
->nregs
;
5581 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5582 /* The fast prologue uses move instead of push to save registers. This
5583 is significantly longer, but also executes faster as modern hardware
5584 can execute the moves in parallel, but can't do that for push/pop.
5586 Be careful about choosing what prologue to emit: When function takes
5587 many instructions to execute we may use slow version as well as in
5588 case function is known to be outside hot spot (this is known with
5589 feedback only). Weight the size of function by number of registers
5590 to save as it is cheap to use one or two push instructions but very
5591 slow to use many of them. */
5593 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5594 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5595 || (flag_branch_probabilities
5596 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5597 cfun
->machine
->use_fast_prologue_epilogue
= false;
5599 cfun
->machine
->use_fast_prologue_epilogue
5600 = !expensive_function_p (count
);
5602 if (TARGET_PROLOGUE_USING_MOVE
5603 && cfun
->machine
->use_fast_prologue_epilogue
)
5604 frame
->save_regs_using_mov
= true;
5606 frame
->save_regs_using_mov
= false;
5609 /* Skip return address and saved base pointer. */
5610 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5612 frame
->hard_frame_pointer_offset
= offset
;
5614 /* Do some sanity checking of stack_alignment_needed and
5615 preferred_alignment, since i386 port is the only using those features
5616 that may break easily. */
5618 gcc_assert (!size
|| stack_alignment_needed
);
5619 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5620 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5621 gcc_assert (stack_alignment_needed
5622 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5624 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5625 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5627 /* Register save area */
5628 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5631 if (ix86_save_varrargs_registers
)
5633 offset
+= X86_64_VARARGS_SIZE
;
5634 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5637 frame
->va_arg_size
= 0;
5639 /* Align start of frame for local function. */
5640 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5641 & -stack_alignment_needed
) - offset
;
5643 offset
+= frame
->padding1
;
5645 /* Frame pointer points here. */
5646 frame
->frame_pointer_offset
= offset
;
5650 /* Add outgoing arguments area. Can be skipped if we eliminated
5651 all the function calls as dead code.
5652 Skipping is however impossible when function calls alloca. Alloca
5653 expander assumes that last current_function_outgoing_args_size
5654 of stack frame are unused. */
5655 if (ACCUMULATE_OUTGOING_ARGS
5656 && (!current_function_is_leaf
|| current_function_calls_alloca
5657 || ix86_current_function_calls_tls_descriptor
))
5659 offset
+= current_function_outgoing_args_size
;
5660 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5663 frame
->outgoing_arguments_size
= 0;
5665 /* Align stack boundary. Only needed if we're calling another function
5667 if (!current_function_is_leaf
|| current_function_calls_alloca
5668 || ix86_current_function_calls_tls_descriptor
)
5669 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5670 & -preferred_alignment
) - offset
;
5672 frame
->padding2
= 0;
5674 offset
+= frame
->padding2
;
5676 /* We've reached end of stack frame. */
5677 frame
->stack_pointer_offset
= offset
;
5679 /* Size prologue needs to allocate. */
5680 frame
->to_allocate
=
5681 (size
+ frame
->padding1
+ frame
->padding2
5682 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5684 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5685 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5686 frame
->save_regs_using_mov
= false;
5688 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5689 && current_function_is_leaf
5690 && !ix86_current_function_calls_tls_descriptor
)
5692 frame
->red_zone_size
= frame
->to_allocate
;
5693 if (frame
->save_regs_using_mov
)
5694 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5695 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5696 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5699 frame
->red_zone_size
= 0;
5700 frame
->to_allocate
-= frame
->red_zone_size
;
5701 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5703 fprintf (stderr
, "\n");
5704 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5705 fprintf (stderr
, "size: %ld\n", (long)size
);
5706 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5707 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5708 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5709 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5710 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5711 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5712 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5713 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5714 (long)frame
->hard_frame_pointer_offset
);
5715 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5716 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5717 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5718 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5722 /* Emit code to save registers in the prologue. */
5725 ix86_emit_save_regs (void)
5730 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5731 if (ix86_save_reg (regno
, true))
5733 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5734 RTX_FRAME_RELATED_P (insn
) = 1;
5738 /* Emit code to save registers using MOV insns. First register
5739 is restored from POINTER + OFFSET. */
5741 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5746 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5747 if (ix86_save_reg (regno
, true))
5749 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5751 gen_rtx_REG (Pmode
, regno
));
5752 RTX_FRAME_RELATED_P (insn
) = 1;
5753 offset
+= UNITS_PER_WORD
;
5757 /* Expand prologue or epilogue stack adjustment.
5758 The pattern exist to put a dependency on all ebp-based memory accesses.
5759 STYLE should be negative if instructions should be marked as frame related,
5760 zero if %r11 register is live and cannot be freely used and positive
5764 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5769 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5770 else if (x86_64_immediate_operand (offset
, DImode
))
5771 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5775 /* r11 is used by indirect sibcall return as well, set before the
5776 epilogue and used after the epilogue. ATM indirect sibcall
5777 shouldn't be used together with huge frame sizes in one
5778 function because of the frame_size check in sibcall.c. */
5780 r11
= gen_rtx_REG (DImode
, R11_REG
);
5781 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5783 RTX_FRAME_RELATED_P (insn
) = 1;
5784 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5788 RTX_FRAME_RELATED_P (insn
) = 1;
5791 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5794 ix86_internal_arg_pointer (void)
5796 bool has_force_align_arg_pointer
=
5797 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5798 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5799 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5800 && DECL_NAME (current_function_decl
)
5801 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5802 && DECL_FILE_SCOPE_P (current_function_decl
))
5803 || ix86_force_align_arg_pointer
5804 || has_force_align_arg_pointer
)
5806 /* Nested functions can't realign the stack due to a register
5808 if (DECL_CONTEXT (current_function_decl
)
5809 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5811 if (ix86_force_align_arg_pointer
)
5812 warning (0, "-mstackrealign ignored for nested functions");
5813 if (has_force_align_arg_pointer
)
5814 error ("%s not supported for nested functions",
5815 ix86_force_align_arg_pointer_string
);
5816 return virtual_incoming_args_rtx
;
5818 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5819 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5822 return virtual_incoming_args_rtx
;
5825 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5826 This is called from dwarf2out.c to emit call frame instructions
5827 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5829 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5831 rtx unspec
= SET_SRC (pattern
);
5832 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5836 case UNSPEC_REG_SAVE
:
5837 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5838 SET_DEST (pattern
));
5840 case UNSPEC_DEF_CFA
:
5841 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5842 INTVAL (XVECEXP (unspec
, 0, 0)));
5849 /* Expand the prologue into a bunch of separate insns. */
5852 ix86_expand_prologue (void)
5856 struct ix86_frame frame
;
5857 HOST_WIDE_INT allocate
;
5859 ix86_compute_frame_layout (&frame
);
5861 if (cfun
->machine
->force_align_arg_pointer
)
5865 /* Grab the argument pointer. */
5866 x
= plus_constant (stack_pointer_rtx
, 4);
5867 y
= cfun
->machine
->force_align_arg_pointer
;
5868 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5869 RTX_FRAME_RELATED_P (insn
) = 1;
5871 /* The unwind info consists of two parts: install the fafp as the cfa,
5872 and record the fafp as the "save register" of the stack pointer.
5873 The later is there in order that the unwinder can see where it
5874 should restore the stack pointer across the and insn. */
5875 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5876 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5877 RTX_FRAME_RELATED_P (x
) = 1;
5878 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5880 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5881 RTX_FRAME_RELATED_P (y
) = 1;
5882 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5883 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5884 REG_NOTES (insn
) = x
;
5886 /* Align the stack. */
5887 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5890 /* And here we cheat like madmen with the unwind info. We force the
5891 cfa register back to sp+4, which is exactly what it was at the
5892 start of the function. Re-pushing the return address results in
5893 the return at the same spot relative to the cfa, and thus is
5894 correct wrt the unwind info. */
5895 x
= cfun
->machine
->force_align_arg_pointer
;
5896 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5897 insn
= emit_insn (gen_push (x
));
5898 RTX_FRAME_RELATED_P (insn
) = 1;
5901 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5902 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5903 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5904 REG_NOTES (insn
) = x
;
5907 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5908 slower on all targets. Also sdb doesn't like it. */
5910 if (frame_pointer_needed
)
5912 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5913 RTX_FRAME_RELATED_P (insn
) = 1;
5915 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5916 RTX_FRAME_RELATED_P (insn
) = 1;
5919 allocate
= frame
.to_allocate
;
5921 if (!frame
.save_regs_using_mov
)
5922 ix86_emit_save_regs ();
5924 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5926 /* When using red zone we may start register saving before allocating
5927 the stack frame saving one cycle of the prologue. */
5928 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5929 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5930 : stack_pointer_rtx
,
5931 -frame
.nregs
* UNITS_PER_WORD
);
5935 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5936 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5937 GEN_INT (-allocate
), -1);
5940 /* Only valid for Win32. */
5941 rtx eax
= gen_rtx_REG (SImode
, 0);
5942 bool eax_live
= ix86_eax_live_at_start_p ();
5945 gcc_assert (!TARGET_64BIT
);
5949 emit_insn (gen_push (eax
));
5953 emit_move_insn (eax
, GEN_INT (allocate
));
5955 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5956 RTX_FRAME_RELATED_P (insn
) = 1;
5957 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5958 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5959 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5960 t
, REG_NOTES (insn
));
5964 if (frame_pointer_needed
)
5965 t
= plus_constant (hard_frame_pointer_rtx
,
5968 - frame
.nregs
* UNITS_PER_WORD
);
5970 t
= plus_constant (stack_pointer_rtx
, allocate
);
5971 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5975 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5977 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5978 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5980 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5981 -frame
.nregs
* UNITS_PER_WORD
);
5984 pic_reg_used
= false;
5985 if (pic_offset_table_rtx
5986 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5987 || current_function_profile
))
5989 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5991 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5992 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5994 pic_reg_used
= true;
6001 if (ix86_cmodel
== CM_LARGE_PIC
)
6003 rtx tmp_reg
= gen_rtx_REG (DImode
,
6004 FIRST_REX_INT_REG
+ 3 /* R11 */);
6005 rtx label
= gen_label_rtx ();
6007 LABEL_PRESERVE_P (label
) = 1;
6008 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6009 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6010 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6011 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6012 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6013 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6014 pic_offset_table_rtx
, tmp_reg
));
6017 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6020 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6022 /* Even with accurate pre-reload life analysis, we can wind up
6023 deleting all references to the pic register after reload.
6024 Consider if cross-jumping unifies two sides of a branch
6025 controlled by a comparison vs the only read from a global.
6026 In which case, allow the set_got to be deleted, though we're
6027 too late to do anything about the ebx save in the prologue. */
6028 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6031 /* Prevent function calls from be scheduled before the call to mcount.
6032 In the pic_reg_used case, make sure that the got load isn't deleted. */
6033 if (current_function_profile
)
6034 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6037 /* Emit code to restore saved registers using MOV insns. First register
6038 is restored from POINTER + OFFSET. */
6040 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6041 int maybe_eh_return
)
6044 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6046 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6047 if (ix86_save_reg (regno
, maybe_eh_return
))
6049 /* Ensure that adjust_address won't be forced to produce pointer
6050 out of range allowed by x86-64 instruction set. */
6051 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6055 r11
= gen_rtx_REG (DImode
, R11_REG
);
6056 emit_move_insn (r11
, GEN_INT (offset
));
6057 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6058 base_address
= gen_rtx_MEM (Pmode
, r11
);
6061 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6062 adjust_address (base_address
, Pmode
, offset
));
6063 offset
+= UNITS_PER_WORD
;
6067 /* Restore function stack, frame, and registers. */
6070 ix86_expand_epilogue (int style
)
6073 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6074 struct ix86_frame frame
;
6075 HOST_WIDE_INT offset
;
6077 ix86_compute_frame_layout (&frame
);
6079 /* Calculate start of saved registers relative to ebp. Special care
6080 must be taken for the normal return case of a function using
6081 eh_return: the eax and edx registers are marked as saved, but not
6082 restored along this path. */
6083 offset
= frame
.nregs
;
6084 if (current_function_calls_eh_return
&& style
!= 2)
6086 offset
*= -UNITS_PER_WORD
;
6088 /* If we're only restoring one register and sp is not valid then
6089 using a move instruction to restore the register since it's
6090 less work than reloading sp and popping the register.
6092 The default code result in stack adjustment using add/lea instruction,
6093 while this code results in LEAVE instruction (or discrete equivalent),
6094 so it is profitable in some other cases as well. Especially when there
6095 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6096 and there is exactly one register to pop. This heuristic may need some
6097 tuning in future. */
6098 if ((!sp_valid
&& frame
.nregs
<= 1)
6099 || (TARGET_EPILOGUE_USING_MOVE
6100 && cfun
->machine
->use_fast_prologue_epilogue
6101 && (frame
.nregs
> 1 || frame
.to_allocate
))
6102 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6103 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6104 && cfun
->machine
->use_fast_prologue_epilogue
6105 && frame
.nregs
== 1)
6106 || current_function_calls_eh_return
)
6108 /* Restore registers. We can use ebp or esp to address the memory
6109 locations. If both are available, default to ebp, since offsets
6110 are known to be small. Only exception is esp pointing directly to the
6111 end of block of saved registers, where we may simplify addressing
6114 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6115 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6116 frame
.to_allocate
, style
== 2);
6118 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6119 offset
, style
== 2);
6121 /* eh_return epilogues need %ecx added to the stack pointer. */
6124 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6126 if (frame_pointer_needed
)
6128 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6129 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6130 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6132 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6133 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6135 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6140 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6141 tmp
= plus_constant (tmp
, (frame
.to_allocate
6142 + frame
.nregs
* UNITS_PER_WORD
));
6143 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6146 else if (!frame_pointer_needed
)
6147 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6148 GEN_INT (frame
.to_allocate
6149 + frame
.nregs
* UNITS_PER_WORD
),
6151 /* If not an i386, mov & pop is faster than "leave". */
6152 else if (TARGET_USE_LEAVE
|| optimize_size
6153 || !cfun
->machine
->use_fast_prologue_epilogue
)
6154 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6157 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6158 hard_frame_pointer_rtx
,
6161 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6163 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6168 /* First step is to deallocate the stack frame so that we can
6169 pop the registers. */
6172 gcc_assert (frame_pointer_needed
);
6173 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6174 hard_frame_pointer_rtx
,
6175 GEN_INT (offset
), style
);
6177 else if (frame
.to_allocate
)
6178 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6179 GEN_INT (frame
.to_allocate
), style
);
6181 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6182 if (ix86_save_reg (regno
, false))
6185 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6187 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6189 if (frame_pointer_needed
)
6191 /* Leave results in shorter dependency chains on CPUs that are
6192 able to grok it fast. */
6193 if (TARGET_USE_LEAVE
)
6194 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6195 else if (TARGET_64BIT
)
6196 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6198 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6202 if (cfun
->machine
->force_align_arg_pointer
)
6204 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6205 cfun
->machine
->force_align_arg_pointer
,
6209 /* Sibcall epilogues don't want a return instruction. */
6213 if (current_function_pops_args
&& current_function_args_size
)
6215 rtx popc
= GEN_INT (current_function_pops_args
);
6217 /* i386 can only pop 64K bytes. If asked to pop more, pop
6218 return address, do explicit add, and jump indirectly to the
6221 if (current_function_pops_args
>= 65536)
6223 rtx ecx
= gen_rtx_REG (SImode
, 2);
6225 /* There is no "pascal" calling convention in 64bit ABI. */
6226 gcc_assert (!TARGET_64BIT
);
6228 emit_insn (gen_popsi1 (ecx
));
6229 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6230 emit_jump_insn (gen_return_indirect_internal (ecx
));
6233 emit_jump_insn (gen_return_pop_internal (popc
));
6236 emit_jump_insn (gen_return_internal ());
6239 /* Reset from the function's potential modifications. */
6242 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6243 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6245 if (pic_offset_table_rtx
)
6246 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6248 /* Mach-O doesn't support labels at the end of objects, so if
6249 it looks like we might want one, insert a NOP. */
6251 rtx insn
= get_last_insn ();
6254 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6255 insn
= PREV_INSN (insn
);
6259 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6260 fputs ("\tnop\n", file
);
6266 /* Extract the parts of an RTL expression that is a valid memory address
6267 for an instruction. Return 0 if the structure of the address is
6268 grossly off. Return -1 if the address contains ASHIFT, so it is not
6269 strictly valid, but still used for computing length of lea instruction. */
6272 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6274 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6275 rtx base_reg
, index_reg
;
6276 HOST_WIDE_INT scale
= 1;
6277 rtx scale_rtx
= NULL_RTX
;
6279 enum ix86_address_seg seg
= SEG_DEFAULT
;
6281 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6283 else if (GET_CODE (addr
) == PLUS
)
6293 addends
[n
++] = XEXP (op
, 1);
6296 while (GET_CODE (op
) == PLUS
);
6301 for (i
= n
; i
>= 0; --i
)
6304 switch (GET_CODE (op
))
6309 index
= XEXP (op
, 0);
6310 scale_rtx
= XEXP (op
, 1);
6314 if (XINT (op
, 1) == UNSPEC_TP
6315 && TARGET_TLS_DIRECT_SEG_REFS
6316 && seg
== SEG_DEFAULT
)
6317 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6346 else if (GET_CODE (addr
) == MULT
)
6348 index
= XEXP (addr
, 0); /* index*scale */
6349 scale_rtx
= XEXP (addr
, 1);
6351 else if (GET_CODE (addr
) == ASHIFT
)
6355 /* We're called for lea too, which implements ashift on occasion. */
6356 index
= XEXP (addr
, 0);
6357 tmp
= XEXP (addr
, 1);
6358 if (!CONST_INT_P (tmp
))
6360 scale
= INTVAL (tmp
);
6361 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6367 disp
= addr
; /* displacement */
6369 /* Extract the integral value of scale. */
6372 if (!CONST_INT_P (scale_rtx
))
6374 scale
= INTVAL (scale_rtx
);
6377 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6378 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6380 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6381 if (base_reg
&& index_reg
&& scale
== 1
6382 && (index_reg
== arg_pointer_rtx
6383 || index_reg
== frame_pointer_rtx
6384 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6387 tmp
= base
, base
= index
, index
= tmp
;
6388 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6391 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6392 if ((base_reg
== hard_frame_pointer_rtx
6393 || base_reg
== frame_pointer_rtx
6394 || base_reg
== arg_pointer_rtx
) && !disp
)
6397 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6398 Avoid this by transforming to [%esi+0]. */
6399 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6400 && base_reg
&& !index_reg
&& !disp
6402 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6405 /* Special case: encode reg+reg instead of reg*2. */
6406 if (!base
&& index
&& scale
&& scale
== 2)
6407 base
= index
, base_reg
= index_reg
, scale
= 1;
6409 /* Special case: scaling cannot be encoded without base or displacement. */
6410 if (!base
&& !disp
&& index
&& scale
!= 1)
6422 /* Return cost of the memory address x.
6423 For i386, it is better to use a complex address than let gcc copy
6424 the address into a reg and make a new pseudo. But not if the address
6425 requires to two regs - that would mean more pseudos with longer
6428 ix86_address_cost (rtx x
)
6430 struct ix86_address parts
;
6432 int ok
= ix86_decompose_address (x
, &parts
);
6436 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6437 parts
.base
= SUBREG_REG (parts
.base
);
6438 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6439 parts
.index
= SUBREG_REG (parts
.index
);
6441 /* More complex memory references are better. */
6442 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6444 if (parts
.seg
!= SEG_DEFAULT
)
6447 /* Attempt to minimize number of registers in the address. */
6449 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6451 && (!REG_P (parts
.index
)
6452 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6456 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6458 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6459 && parts
.base
!= parts
.index
)
6462 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6463 since it's predecode logic can't detect the length of instructions
6464 and it degenerates to vector decoded. Increase cost of such
6465 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6466 to split such addresses or even refuse such addresses at all.
6468 Following addressing modes are affected:
6473 The first and last case may be avoidable by explicitly coding the zero in
6474 memory address, but I don't have AMD-K6 machine handy to check this
6478 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6479 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6480 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6486 /* If X is a machine specific address (i.e. a symbol or label being
6487 referenced as a displacement from the GOT implemented using an
6488 UNSPEC), then return the base term. Otherwise return X. */
6491 ix86_find_base_term (rtx x
)
6497 if (GET_CODE (x
) != CONST
)
6500 if (GET_CODE (term
) == PLUS
6501 && (CONST_INT_P (XEXP (term
, 1))
6502 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6503 term
= XEXP (term
, 0);
6504 if (GET_CODE (term
) != UNSPEC
6505 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6508 term
= XVECEXP (term
, 0, 0);
6510 if (GET_CODE (term
) != SYMBOL_REF
6511 && GET_CODE (term
) != LABEL_REF
)
6517 term
= ix86_delegitimize_address (x
);
6519 if (GET_CODE (term
) != SYMBOL_REF
6520 && GET_CODE (term
) != LABEL_REF
)
6526 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6527 this is used for to form addresses to local data when -fPIC is in
6531 darwin_local_data_pic (rtx disp
)
6533 if (GET_CODE (disp
) == MINUS
)
6535 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6536 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6537 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6539 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6540 if (! strcmp (sym_name
, "<pic base>"))
6548 /* Determine if a given RTX is a valid constant. We already know this
6549 satisfies CONSTANT_P. */
6552 legitimate_constant_p (rtx x
)
6554 switch (GET_CODE (x
))
6559 if (GET_CODE (x
) == PLUS
)
6561 if (!CONST_INT_P (XEXP (x
, 1)))
6566 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6569 /* Only some unspecs are valid as "constants". */
6570 if (GET_CODE (x
) == UNSPEC
)
6571 switch (XINT (x
, 1))
6576 return TARGET_64BIT
;
6579 x
= XVECEXP (x
, 0, 0);
6580 return (GET_CODE (x
) == SYMBOL_REF
6581 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6583 x
= XVECEXP (x
, 0, 0);
6584 return (GET_CODE (x
) == SYMBOL_REF
6585 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6590 /* We must have drilled down to a symbol. */
6591 if (GET_CODE (x
) == LABEL_REF
)
6593 if (GET_CODE (x
) != SYMBOL_REF
)
6598 /* TLS symbols are never valid. */
6599 if (SYMBOL_REF_TLS_MODEL (x
))
6604 if (GET_MODE (x
) == TImode
6605 && x
!= CONST0_RTX (TImode
)
6611 if (x
== CONST0_RTX (GET_MODE (x
)))
6619 /* Otherwise we handle everything else in the move patterns. */
6623 /* Determine if it's legal to put X into the constant pool. This
6624 is not possible for the address of thread-local symbols, which
6625 is checked above. */
6628 ix86_cannot_force_const_mem (rtx x
)
6630 /* We can always put integral constants and vectors in memory. */
6631 switch (GET_CODE (x
))
6641 return !legitimate_constant_p (x
);
6644 /* Determine if a given RTX is a valid constant address. */
6647 constant_address_p (rtx x
)
6649 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6652 /* Nonzero if the constant value X is a legitimate general operand
6653 when generating PIC code. It is given that flag_pic is on and
6654 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6657 legitimate_pic_operand_p (rtx x
)
6661 switch (GET_CODE (x
))
6664 inner
= XEXP (x
, 0);
6665 if (GET_CODE (inner
) == PLUS
6666 && CONST_INT_P (XEXP (inner
, 1)))
6667 inner
= XEXP (inner
, 0);
6669 /* Only some unspecs are valid as "constants". */
6670 if (GET_CODE (inner
) == UNSPEC
)
6671 switch (XINT (inner
, 1))
6676 return TARGET_64BIT
;
6678 x
= XVECEXP (inner
, 0, 0);
6679 return (GET_CODE (x
) == SYMBOL_REF
6680 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6688 return legitimate_pic_address_disp_p (x
);
6695 /* Determine if a given CONST RTX is a valid memory displacement
6699 legitimate_pic_address_disp_p (rtx disp
)
6703 /* In 64bit mode we can allow direct addresses of symbols and labels
6704 when they are not dynamic symbols. */
6707 rtx op0
= disp
, op1
;
6709 switch (GET_CODE (disp
))
6715 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6717 op0
= XEXP (XEXP (disp
, 0), 0);
6718 op1
= XEXP (XEXP (disp
, 0), 1);
6719 if (!CONST_INT_P (op1
)
6720 || INTVAL (op1
) >= 16*1024*1024
6721 || INTVAL (op1
) < -16*1024*1024)
6723 if (GET_CODE (op0
) == LABEL_REF
)
6725 if (GET_CODE (op0
) != SYMBOL_REF
)
6730 /* TLS references should always be enclosed in UNSPEC. */
6731 if (SYMBOL_REF_TLS_MODEL (op0
))
6733 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6734 && ix86_cmodel
!= CM_LARGE_PIC
)
6742 if (GET_CODE (disp
) != CONST
)
6744 disp
= XEXP (disp
, 0);
6748 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6749 of GOT tables. We should not need these anyway. */
6750 if (GET_CODE (disp
) != UNSPEC
6751 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6752 && XINT (disp
, 1) != UNSPEC_GOTOFF
6753 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6756 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6757 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6763 if (GET_CODE (disp
) == PLUS
)
6765 if (!CONST_INT_P (XEXP (disp
, 1)))
6767 disp
= XEXP (disp
, 0);
6771 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6774 if (GET_CODE (disp
) != UNSPEC
)
6777 switch (XINT (disp
, 1))
6782 /* We need to check for both symbols and labels because VxWorks loads
6783 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6785 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6786 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6788 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6789 While ABI specify also 32bit relocation but we don't produce it in
6790 small PIC model at all. */
6791 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6792 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6794 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6796 case UNSPEC_GOTTPOFF
:
6797 case UNSPEC_GOTNTPOFF
:
6798 case UNSPEC_INDNTPOFF
:
6801 disp
= XVECEXP (disp
, 0, 0);
6802 return (GET_CODE (disp
) == SYMBOL_REF
6803 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6805 disp
= XVECEXP (disp
, 0, 0);
6806 return (GET_CODE (disp
) == SYMBOL_REF
6807 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6809 disp
= XVECEXP (disp
, 0, 0);
6810 return (GET_CODE (disp
) == SYMBOL_REF
6811 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6817 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6818 memory address for an instruction. The MODE argument is the machine mode
6819 for the MEM expression that wants to use this address.
6821 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6822 convert common non-canonical forms to canonical form so that they will
6826 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6828 struct ix86_address parts
;
6829 rtx base
, index
, disp
;
6830 HOST_WIDE_INT scale
;
6831 const char *reason
= NULL
;
6832 rtx reason_rtx
= NULL_RTX
;
6834 if (TARGET_DEBUG_ADDR
)
6837 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6838 GET_MODE_NAME (mode
), strict
);
6842 if (ix86_decompose_address (addr
, &parts
) <= 0)
6844 reason
= "decomposition failed";
6849 index
= parts
.index
;
6851 scale
= parts
.scale
;
6853 /* Validate base register.
6855 Don't allow SUBREG's that span more than a word here. It can lead to spill
6856 failures when the base is one word out of a two word structure, which is
6857 represented internally as a DImode int. */
6866 else if (GET_CODE (base
) == SUBREG
6867 && REG_P (SUBREG_REG (base
))
6868 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6870 reg
= SUBREG_REG (base
);
6873 reason
= "base is not a register";
6877 if (GET_MODE (base
) != Pmode
)
6879 reason
= "base is not in Pmode";
6883 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6884 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6886 reason
= "base is not valid";
6891 /* Validate index register.
6893 Don't allow SUBREG's that span more than a word here -- same as above. */
6902 else if (GET_CODE (index
) == SUBREG
6903 && REG_P (SUBREG_REG (index
))
6904 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6906 reg
= SUBREG_REG (index
);
6909 reason
= "index is not a register";
6913 if (GET_MODE (index
) != Pmode
)
6915 reason
= "index is not in Pmode";
6919 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6920 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6922 reason
= "index is not valid";
6927 /* Validate scale factor. */
6930 reason_rtx
= GEN_INT (scale
);
6933 reason
= "scale without index";
6937 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6939 reason
= "scale is not a valid multiplier";
6944 /* Validate displacement. */
6949 if (GET_CODE (disp
) == CONST
6950 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6951 switch (XINT (XEXP (disp
, 0), 1))
6953 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6954 used. While ABI specify also 32bit relocations, we don't produce
6955 them at all and use IP relative instead. */
6958 gcc_assert (flag_pic
);
6960 goto is_legitimate_pic
;
6961 reason
= "64bit address unspec";
6964 case UNSPEC_GOTPCREL
:
6965 gcc_assert (flag_pic
);
6966 goto is_legitimate_pic
;
6968 case UNSPEC_GOTTPOFF
:
6969 case UNSPEC_GOTNTPOFF
:
6970 case UNSPEC_INDNTPOFF
:
6976 reason
= "invalid address unspec";
6980 else if (SYMBOLIC_CONST (disp
)
6984 && MACHOPIC_INDIRECT
6985 && !machopic_operand_p (disp
)
6991 if (TARGET_64BIT
&& (index
|| base
))
6993 /* foo@dtpoff(%rX) is ok. */
6994 if (GET_CODE (disp
) != CONST
6995 || GET_CODE (XEXP (disp
, 0)) != PLUS
6996 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6997 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
6998 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6999 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7001 reason
= "non-constant pic memory reference";
7005 else if (! legitimate_pic_address_disp_p (disp
))
7007 reason
= "displacement is an invalid pic construct";
7011 /* This code used to verify that a symbolic pic displacement
7012 includes the pic_offset_table_rtx register.
7014 While this is good idea, unfortunately these constructs may
7015 be created by "adds using lea" optimization for incorrect
7024 This code is nonsensical, but results in addressing
7025 GOT table with pic_offset_table_rtx base. We can't
7026 just refuse it easily, since it gets matched by
7027 "addsi3" pattern, that later gets split to lea in the
7028 case output register differs from input. While this
7029 can be handled by separate addsi pattern for this case
7030 that never results in lea, this seems to be easier and
7031 correct fix for crash to disable this test. */
7033 else if (GET_CODE (disp
) != LABEL_REF
7034 && !CONST_INT_P (disp
)
7035 && (GET_CODE (disp
) != CONST
7036 || !legitimate_constant_p (disp
))
7037 && (GET_CODE (disp
) != SYMBOL_REF
7038 || !legitimate_constant_p (disp
)))
7040 reason
= "displacement is not constant";
7043 else if (TARGET_64BIT
7044 && !x86_64_immediate_operand (disp
, VOIDmode
))
7046 reason
= "displacement is out of range";
7051 /* Everything looks valid. */
7052 if (TARGET_DEBUG_ADDR
)
7053 fprintf (stderr
, "Success.\n");
7057 if (TARGET_DEBUG_ADDR
)
7059 fprintf (stderr
, "Error: %s\n", reason
);
7060 debug_rtx (reason_rtx
);
7065 /* Return a unique alias set for the GOT. */
7067 static HOST_WIDE_INT
7068 ix86_GOT_alias_set (void)
7070 static HOST_WIDE_INT set
= -1;
7072 set
= new_alias_set ();
7076 /* Return a legitimate reference for ORIG (an address) using the
7077 register REG. If REG is 0, a new pseudo is generated.
7079 There are two types of references that must be handled:
7081 1. Global data references must load the address from the GOT, via
7082 the PIC reg. An insn is emitted to do this load, and the reg is
7085 2. Static data references, constant pool addresses, and code labels
7086 compute the address as an offset from the GOT, whose base is in
7087 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7088 differentiate them from global data objects. The returned
7089 address is the PIC reg + an unspec constant.
7091 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7092 reg also appears in the address. */
7095 legitimize_pic_address (rtx orig
, rtx reg
)
7102 if (TARGET_MACHO
&& !TARGET_64BIT
)
7105 reg
= gen_reg_rtx (Pmode
);
7106 /* Use the generic Mach-O PIC machinery. */
7107 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7111 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7113 else if (TARGET_64BIT
7114 && ix86_cmodel
!= CM_SMALL_PIC
7115 && gotoff_operand (addr
, Pmode
))
7118 /* This symbol may be referenced via a displacement from the PIC
7119 base address (@GOTOFF). */
7121 if (reload_in_progress
)
7122 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7123 if (GET_CODE (addr
) == CONST
)
7124 addr
= XEXP (addr
, 0);
7125 if (GET_CODE (addr
) == PLUS
)
7127 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7128 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7131 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7132 new = gen_rtx_CONST (Pmode
, new);
7134 tmpreg
= gen_reg_rtx (Pmode
);
7137 emit_move_insn (tmpreg
, new);
7141 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7142 tmpreg
, 1, OPTAB_DIRECT
);
7145 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7147 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7149 /* This symbol may be referenced via a displacement from the PIC
7150 base address (@GOTOFF). */
7152 if (reload_in_progress
)
7153 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7154 if (GET_CODE (addr
) == CONST
)
7155 addr
= XEXP (addr
, 0);
7156 if (GET_CODE (addr
) == PLUS
)
7158 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7159 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7162 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7163 new = gen_rtx_CONST (Pmode
, new);
7164 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7168 emit_move_insn (reg
, new);
7172 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7173 /* We can't use @GOTOFF for text labels on VxWorks;
7174 see gotoff_operand. */
7175 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7177 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7179 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7180 new = gen_rtx_CONST (Pmode
, new);
7181 new = gen_const_mem (Pmode
, new);
7182 set_mem_alias_set (new, ix86_GOT_alias_set ());
7185 reg
= gen_reg_rtx (Pmode
);
7186 /* Use directly gen_movsi, otherwise the address is loaded
7187 into register for CSE. We don't want to CSE this addresses,
7188 instead we CSE addresses from the GOT table, so skip this. */
7189 emit_insn (gen_movsi (reg
, new));
7194 /* This symbol must be referenced via a load from the
7195 Global Offset Table (@GOT). */
7197 if (reload_in_progress
)
7198 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7199 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7200 new = gen_rtx_CONST (Pmode
, new);
7202 new = force_reg (Pmode
, new);
7203 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7204 new = gen_const_mem (Pmode
, new);
7205 set_mem_alias_set (new, ix86_GOT_alias_set ());
7208 reg
= gen_reg_rtx (Pmode
);
7209 emit_move_insn (reg
, new);
7215 if (CONST_INT_P (addr
)
7216 && !x86_64_immediate_operand (addr
, VOIDmode
))
7220 emit_move_insn (reg
, addr
);
7224 new = force_reg (Pmode
, addr
);
7226 else if (GET_CODE (addr
) == CONST
)
7228 addr
= XEXP (addr
, 0);
7230 /* We must match stuff we generate before. Assume the only
7231 unspecs that can get here are ours. Not that we could do
7232 anything with them anyway.... */
7233 if (GET_CODE (addr
) == UNSPEC
7234 || (GET_CODE (addr
) == PLUS
7235 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7237 gcc_assert (GET_CODE (addr
) == PLUS
);
7239 if (GET_CODE (addr
) == PLUS
)
7241 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7243 /* Check first to see if this is a constant offset from a @GOTOFF
7244 symbol reference. */
7245 if (gotoff_operand (op0
, Pmode
)
7246 && CONST_INT_P (op1
))
7250 if (reload_in_progress
)
7251 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7252 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7254 new = gen_rtx_PLUS (Pmode
, new, op1
);
7255 new = gen_rtx_CONST (Pmode
, new);
7256 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7260 emit_move_insn (reg
, new);
7266 if (INTVAL (op1
) < -16*1024*1024
7267 || INTVAL (op1
) >= 16*1024*1024)
7269 if (!x86_64_immediate_operand (op1
, Pmode
))
7270 op1
= force_reg (Pmode
, op1
);
7271 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7277 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7278 new = legitimize_pic_address (XEXP (addr
, 1),
7279 base
== reg
? NULL_RTX
: reg
);
7281 if (CONST_INT_P (new))
7282 new = plus_constant (base
, INTVAL (new));
7285 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7287 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7288 new = XEXP (new, 1);
7290 new = gen_rtx_PLUS (Pmode
, base
, new);
7298 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7301 get_thread_pointer (int to_reg
)
7305 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7309 reg
= gen_reg_rtx (Pmode
);
7310 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7311 insn
= emit_insn (insn
);
7316 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7317 false if we expect this to be used for a memory address and true if
7318 we expect to load the address into a register. */
7321 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7323 rtx dest
, base
, off
, pic
, tp
;
7328 case TLS_MODEL_GLOBAL_DYNAMIC
:
7329 dest
= gen_reg_rtx (Pmode
);
7330 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7332 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7334 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7337 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7338 insns
= get_insns ();
7341 emit_libcall_block (insns
, dest
, rax
, x
);
7343 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7344 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7346 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7348 if (TARGET_GNU2_TLS
)
7350 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7352 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7356 case TLS_MODEL_LOCAL_DYNAMIC
:
7357 base
= gen_reg_rtx (Pmode
);
7358 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7360 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7362 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7365 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7366 insns
= get_insns ();
7369 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7370 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7371 emit_libcall_block (insns
, base
, rax
, note
);
7373 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7374 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7376 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7378 if (TARGET_GNU2_TLS
)
7380 rtx x
= ix86_tls_module_base ();
7382 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7383 gen_rtx_MINUS (Pmode
, x
, tp
));
7386 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7387 off
= gen_rtx_CONST (Pmode
, off
);
7389 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7391 if (TARGET_GNU2_TLS
)
7393 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7395 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7400 case TLS_MODEL_INITIAL_EXEC
:
7404 type
= UNSPEC_GOTNTPOFF
;
7408 if (reload_in_progress
)
7409 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7410 pic
= pic_offset_table_rtx
;
7411 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7413 else if (!TARGET_ANY_GNU_TLS
)
7415 pic
= gen_reg_rtx (Pmode
);
7416 emit_insn (gen_set_got (pic
));
7417 type
= UNSPEC_GOTTPOFF
;
7422 type
= UNSPEC_INDNTPOFF
;
7425 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7426 off
= gen_rtx_CONST (Pmode
, off
);
7428 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7429 off
= gen_const_mem (Pmode
, off
);
7430 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7432 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7434 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7435 off
= force_reg (Pmode
, off
);
7436 return gen_rtx_PLUS (Pmode
, base
, off
);
7440 base
= get_thread_pointer (true);
7441 dest
= gen_reg_rtx (Pmode
);
7442 emit_insn (gen_subsi3 (dest
, base
, off
));
7446 case TLS_MODEL_LOCAL_EXEC
:
7447 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7448 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7449 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7450 off
= gen_rtx_CONST (Pmode
, off
);
7452 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7454 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7455 return gen_rtx_PLUS (Pmode
, base
, off
);
7459 base
= get_thread_pointer (true);
7460 dest
= gen_reg_rtx (Pmode
);
7461 emit_insn (gen_subsi3 (dest
, base
, off
));
7472 /* Try machine-dependent ways of modifying an illegitimate address
7473 to be legitimate. If we find one, return the new, valid address.
7474 This macro is used in only one place: `memory_address' in explow.c.
7476 OLDX is the address as it was before break_out_memory_refs was called.
7477 In some cases it is useful to look at this to decide what needs to be done.
7479 MODE and WIN are passed so that this macro can use
7480 GO_IF_LEGITIMATE_ADDRESS.
7482 It is always safe for this macro to do nothing. It exists to recognize
7483 opportunities to optimize the output.
7485 For the 80386, we handle X+REG by loading X into a register R and
7486 using R+REG. R will go in a general reg and indexing will be used.
7487 However, if REG is a broken-out memory address or multiplication,
7488 nothing needs to be done because REG can certainly go in a general reg.
7490 When -fpic is used, special handling is needed for symbolic references.
7491 See comments by legitimize_pic_address in i386.c for details. */
7494 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7499 if (TARGET_DEBUG_ADDR
)
7501 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7502 GET_MODE_NAME (mode
));
7506 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7508 return legitimize_tls_address (x
, log
, false);
7509 if (GET_CODE (x
) == CONST
7510 && GET_CODE (XEXP (x
, 0)) == PLUS
7511 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7512 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7514 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7515 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7518 if (flag_pic
&& SYMBOLIC_CONST (x
))
7519 return legitimize_pic_address (x
, 0);
7521 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7522 if (GET_CODE (x
) == ASHIFT
7523 && CONST_INT_P (XEXP (x
, 1))
7524 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7527 log
= INTVAL (XEXP (x
, 1));
7528 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7529 GEN_INT (1 << log
));
7532 if (GET_CODE (x
) == PLUS
)
7534 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7536 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7537 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7538 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7541 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7542 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7543 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7544 GEN_INT (1 << log
));
7547 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7548 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7549 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7552 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7553 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7554 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7555 GEN_INT (1 << log
));
7558 /* Put multiply first if it isn't already. */
7559 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7561 rtx tmp
= XEXP (x
, 0);
7562 XEXP (x
, 0) = XEXP (x
, 1);
7567 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7568 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7569 created by virtual register instantiation, register elimination, and
7570 similar optimizations. */
7571 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7574 x
= gen_rtx_PLUS (Pmode
,
7575 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7576 XEXP (XEXP (x
, 1), 0)),
7577 XEXP (XEXP (x
, 1), 1));
7581 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7582 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7583 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7584 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7585 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7586 && CONSTANT_P (XEXP (x
, 1)))
7589 rtx other
= NULL_RTX
;
7591 if (CONST_INT_P (XEXP (x
, 1)))
7593 constant
= XEXP (x
, 1);
7594 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7596 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7598 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7599 other
= XEXP (x
, 1);
7607 x
= gen_rtx_PLUS (Pmode
,
7608 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7609 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7610 plus_constant (other
, INTVAL (constant
)));
7614 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7617 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7620 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7623 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7626 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7630 && REG_P (XEXP (x
, 1))
7631 && REG_P (XEXP (x
, 0)))
7634 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7637 x
= legitimize_pic_address (x
, 0);
7640 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7643 if (REG_P (XEXP (x
, 0)))
7645 rtx temp
= gen_reg_rtx (Pmode
);
7646 rtx val
= force_operand (XEXP (x
, 1), temp
);
7648 emit_move_insn (temp
, val
);
7654 else if (REG_P (XEXP (x
, 1)))
7656 rtx temp
= gen_reg_rtx (Pmode
);
7657 rtx val
= force_operand (XEXP (x
, 0), temp
);
7659 emit_move_insn (temp
, val
);
7669 /* Print an integer constant expression in assembler syntax. Addition
7670 and subtraction are the only arithmetic that may appear in these
7671 expressions. FILE is the stdio stream to write to, X is the rtx, and
7672 CODE is the operand print code from the output string. */
7675 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7679 switch (GET_CODE (x
))
7682 gcc_assert (flag_pic
);
7687 if (! TARGET_MACHO
|| TARGET_64BIT
)
7688 output_addr_const (file
, x
);
7691 const char *name
= XSTR (x
, 0);
7693 /* Mark the decl as referenced so that cgraph will output the function. */
7694 if (SYMBOL_REF_DECL (x
))
7695 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7698 if (MACHOPIC_INDIRECT
7699 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7700 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7702 assemble_name (file
, name
);
7704 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7705 fputs ("@PLT", file
);
7712 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7713 assemble_name (asm_out_file
, buf
);
7717 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7721 /* This used to output parentheses around the expression,
7722 but that does not work on the 386 (either ATT or BSD assembler). */
7723 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7727 if (GET_MODE (x
) == VOIDmode
)
7729 /* We can use %d if the number is <32 bits and positive. */
7730 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7731 fprintf (file
, "0x%lx%08lx",
7732 (unsigned long) CONST_DOUBLE_HIGH (x
),
7733 (unsigned long) CONST_DOUBLE_LOW (x
));
7735 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7738 /* We can't handle floating point constants;
7739 PRINT_OPERAND must handle them. */
7740 output_operand_lossage ("floating constant misused");
7744 /* Some assemblers need integer constants to appear first. */
7745 if (CONST_INT_P (XEXP (x
, 0)))
7747 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7749 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7753 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7754 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7756 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7762 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7763 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7765 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7767 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7771 gcc_assert (XVECLEN (x
, 0) == 1);
7772 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7773 switch (XINT (x
, 1))
7776 fputs ("@GOT", file
);
7779 fputs ("@GOTOFF", file
);
7782 fputs ("@PLTOFF", file
);
7784 case UNSPEC_GOTPCREL
:
7785 fputs ("@GOTPCREL(%rip)", file
);
7787 case UNSPEC_GOTTPOFF
:
7788 /* FIXME: This might be @TPOFF in Sun ld too. */
7789 fputs ("@GOTTPOFF", file
);
7792 fputs ("@TPOFF", file
);
7796 fputs ("@TPOFF", file
);
7798 fputs ("@NTPOFF", file
);
7801 fputs ("@DTPOFF", file
);
7803 case UNSPEC_GOTNTPOFF
:
7805 fputs ("@GOTTPOFF(%rip)", file
);
7807 fputs ("@GOTNTPOFF", file
);
7809 case UNSPEC_INDNTPOFF
:
7810 fputs ("@INDNTPOFF", file
);
7813 output_operand_lossage ("invalid UNSPEC as operand");
7819 output_operand_lossage ("invalid expression as operand");
7823 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7824 We need to emit DTP-relative relocations. */
7827 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7829 fputs (ASM_LONG
, file
);
7830 output_addr_const (file
, x
);
7831 fputs ("@DTPOFF", file
);
7837 fputs (", 0", file
);
7844 /* In the name of slightly smaller debug output, and to cater to
7845 general assembler lossage, recognize PIC+GOTOFF and turn it back
7846 into a direct symbol reference.
7848 On Darwin, this is necessary to avoid a crash, because Darwin
7849 has a different PIC label for each routine but the DWARF debugging
7850 information is not associated with any particular routine, so it's
7851 necessary to remove references to the PIC label from RTL stored by
7852 the DWARF output code. */
7855 ix86_delegitimize_address (rtx orig_x
)
7858 /* reg_addend is NULL or a multiple of some register. */
7859 rtx reg_addend
= NULL_RTX
;
7860 /* const_addend is NULL or a const_int. */
7861 rtx const_addend
= NULL_RTX
;
7862 /* This is the result, or NULL. */
7863 rtx result
= NULL_RTX
;
7870 if (GET_CODE (x
) != CONST
7871 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7872 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7875 return XVECEXP (XEXP (x
, 0), 0, 0);
7878 if (GET_CODE (x
) != PLUS
7879 || GET_CODE (XEXP (x
, 1)) != CONST
)
7882 if (REG_P (XEXP (x
, 0))
7883 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7884 /* %ebx + GOT/GOTOFF */
7886 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7888 /* %ebx + %reg * scale + GOT/GOTOFF */
7889 reg_addend
= XEXP (x
, 0);
7890 if (REG_P (XEXP (reg_addend
, 0))
7891 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7892 reg_addend
= XEXP (reg_addend
, 1);
7893 else if (REG_P (XEXP (reg_addend
, 1))
7894 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7895 reg_addend
= XEXP (reg_addend
, 0);
7898 if (!REG_P (reg_addend
)
7899 && GET_CODE (reg_addend
) != MULT
7900 && GET_CODE (reg_addend
) != ASHIFT
)
7906 x
= XEXP (XEXP (x
, 1), 0);
7907 if (GET_CODE (x
) == PLUS
7908 && CONST_INT_P (XEXP (x
, 1)))
7910 const_addend
= XEXP (x
, 1);
7914 if (GET_CODE (x
) == UNSPEC
7915 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7916 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7917 result
= XVECEXP (x
, 0, 0);
7919 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7921 result
= XEXP (x
, 0);
7927 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7929 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7934 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7939 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7941 enum rtx_code second_code
, bypass_code
;
7942 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7943 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7944 code
= ix86_fp_compare_code_to_integer (code
);
7948 code
= reverse_condition (code
);
7959 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7963 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7964 Those same assemblers have the same but opposite lossage on cmov. */
7965 gcc_assert (mode
== CCmode
);
7966 suffix
= fp
? "nbe" : "a";
7986 gcc_assert (mode
== CCmode
);
8008 gcc_assert (mode
== CCmode
);
8009 suffix
= fp
? "nb" : "ae";
8012 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8016 gcc_assert (mode
== CCmode
);
8020 suffix
= fp
? "u" : "p";
8023 suffix
= fp
? "nu" : "np";
8028 fputs (suffix
, file
);
8031 /* Print the name of register X to FILE based on its machine mode and number.
8032 If CODE is 'w', pretend the mode is HImode.
8033 If CODE is 'b', pretend the mode is QImode.
8034 If CODE is 'k', pretend the mode is SImode.
8035 If CODE is 'q', pretend the mode is DImode.
8036 If CODE is 'h', pretend the reg is the 'high' byte register.
8037 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8040 print_reg (rtx x
, int code
, FILE *file
)
8042 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8043 && REGNO (x
) != FRAME_POINTER_REGNUM
8044 && REGNO (x
) != FLAGS_REG
8045 && REGNO (x
) != FPSR_REG
8046 && REGNO (x
) != FPCR_REG
);
8048 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8051 if (code
== 'w' || MMX_REG_P (x
))
8053 else if (code
== 'b')
8055 else if (code
== 'k')
8057 else if (code
== 'q')
8059 else if (code
== 'y')
8061 else if (code
== 'h')
8064 code
= GET_MODE_SIZE (GET_MODE (x
));
8066 /* Irritatingly, AMD extended registers use different naming convention
8067 from the normal registers. */
8068 if (REX_INT_REG_P (x
))
8070 gcc_assert (TARGET_64BIT
);
8074 error ("extended registers have no high halves");
8077 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8080 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8083 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8086 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8089 error ("unsupported operand size for extended register");
8097 if (STACK_TOP_P (x
))
8099 fputs ("st(0)", file
);
8106 if (! ANY_FP_REG_P (x
))
8107 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8112 fputs (hi_reg_name
[REGNO (x
)], file
);
8115 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8117 fputs (qi_reg_name
[REGNO (x
)], file
);
8120 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8122 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8129 /* Locate some local-dynamic symbol still in use by this function
8130 so that we can print its name in some tls_local_dynamic_base
8134 get_some_local_dynamic_name (void)
8138 if (cfun
->machine
->some_ld_name
)
8139 return cfun
->machine
->some_ld_name
;
8141 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8143 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8144 return cfun
->machine
->some_ld_name
;
8150 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8154 if (GET_CODE (x
) == SYMBOL_REF
8155 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8157 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8165 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8166 C -- print opcode suffix for set/cmov insn.
8167 c -- like C, but print reversed condition
8168 F,f -- likewise, but for floating-point.
8169 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8171 R -- print the prefix for register names.
8172 z -- print the opcode suffix for the size of the current operand.
8173 * -- print a star (in certain assembler syntax)
8174 A -- print an absolute memory reference.
8175 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8176 s -- print a shift double count, followed by the assemblers argument
8178 b -- print the QImode name of the register for the indicated operand.
8179 %b0 would print %al if operands[0] is reg 0.
8180 w -- likewise, print the HImode name of the register.
8181 k -- likewise, print the SImode name of the register.
8182 q -- likewise, print the DImode name of the register.
8183 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8184 y -- print "st(0)" instead of "st" as a register.
8185 D -- print condition for SSE cmp instruction.
8186 P -- if PIC, print an @PLT suffix.
8187 X -- don't print any sort of PIC '@' suffix for a symbol.
8188 & -- print some in-use local-dynamic symbol name.
8189 H -- print a memory address offset by 8; used for sse high-parts
8193 print_operand (FILE *file
, rtx x
, int code
)
8200 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8205 assemble_name (file
, get_some_local_dynamic_name ());
8209 switch (ASSEMBLER_DIALECT
)
8216 /* Intel syntax. For absolute addresses, registers should not
8217 be surrounded by braces. */
8221 PRINT_OPERAND (file
, x
, 0);
8231 PRINT_OPERAND (file
, x
, 0);
8236 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8241 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8246 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8251 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8256 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8261 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8266 /* 387 opcodes don't get size suffixes if the operands are
8268 if (STACK_REG_P (x
))
8271 /* Likewise if using Intel opcodes. */
8272 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8275 /* This is the size of op from size of operand. */
8276 switch (GET_MODE_SIZE (GET_MODE (x
)))
8283 #ifdef HAVE_GAS_FILDS_FISTS
8289 if (GET_MODE (x
) == SFmode
)
8304 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8306 #ifdef GAS_MNEMONICS
8332 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8334 PRINT_OPERAND (file
, x
, 0);
8340 /* Little bit of braindamage here. The SSE compare instructions
8341 does use completely different names for the comparisons that the
8342 fp conditional moves. */
8343 switch (GET_CODE (x
))
8358 fputs ("unord", file
);
8362 fputs ("neq", file
);
8366 fputs ("nlt", file
);
8370 fputs ("nle", file
);
8373 fputs ("ord", file
);
8380 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8381 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8383 switch (GET_MODE (x
))
8385 case HImode
: putc ('w', file
); break;
8387 case SFmode
: putc ('l', file
); break;
8389 case DFmode
: putc ('q', file
); break;
8390 default: gcc_unreachable ();
8397 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8400 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8401 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8404 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8407 /* Like above, but reverse condition */
8409 /* Check to see if argument to %c is really a constant
8410 and not a condition code which needs to be reversed. */
8411 if (!COMPARISON_P (x
))
8413 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8416 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8419 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8420 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8423 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8427 /* It doesn't actually matter what mode we use here, as we're
8428 only going to use this for printing. */
8429 x
= adjust_address_nv (x
, DImode
, 8);
8436 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8439 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8442 int pred_val
= INTVAL (XEXP (x
, 0));
8444 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8445 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8447 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8448 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8450 /* Emit hints only in the case default branch prediction
8451 heuristics would fail. */
8452 if (taken
!= cputaken
)
8454 /* We use 3e (DS) prefix for taken branches and
8455 2e (CS) prefix for not taken branches. */
8457 fputs ("ds ; ", file
);
8459 fputs ("cs ; ", file
);
8466 output_operand_lossage ("invalid operand code '%c'", code
);
8471 print_reg (x
, code
, file
);
8475 /* No `byte ptr' prefix for call instructions. */
8476 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8479 switch (GET_MODE_SIZE (GET_MODE (x
)))
8481 case 1: size
= "BYTE"; break;
8482 case 2: size
= "WORD"; break;
8483 case 4: size
= "DWORD"; break;
8484 case 8: size
= "QWORD"; break;
8485 case 12: size
= "XWORD"; break;
8486 case 16: size
= "XMMWORD"; break;
8491 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8494 else if (code
== 'w')
8496 else if (code
== 'k')
8500 fputs (" PTR ", file
);
8504 /* Avoid (%rip) for call operands. */
8505 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8506 && !CONST_INT_P (x
))
8507 output_addr_const (file
, x
);
8508 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8509 output_operand_lossage ("invalid constraints for operand");
8514 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8519 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8520 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8522 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8524 fprintf (file
, "0x%08lx", l
);
8527 /* These float cases don't actually occur as immediate operands. */
8528 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8532 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8533 fprintf (file
, "%s", dstr
);
8536 else if (GET_CODE (x
) == CONST_DOUBLE
8537 && GET_MODE (x
) == XFmode
)
8541 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8542 fprintf (file
, "%s", dstr
);
8547 /* We have patterns that allow zero sets of memory, for instance.
8548 In 64-bit mode, we should probably support all 8-byte vectors,
8549 since we can in fact encode that into an immediate. */
8550 if (GET_CODE (x
) == CONST_VECTOR
)
8552 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8558 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8560 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8563 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8564 || GET_CODE (x
) == LABEL_REF
)
8566 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8569 fputs ("OFFSET FLAT:", file
);
8572 if (CONST_INT_P (x
))
8573 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8575 output_pic_addr_const (file
, x
, code
);
8577 output_addr_const (file
, x
);
8581 /* Print a memory operand whose address is ADDR. */
8584 print_operand_address (FILE *file
, rtx addr
)
8586 struct ix86_address parts
;
8587 rtx base
, index
, disp
;
8589 int ok
= ix86_decompose_address (addr
, &parts
);
8594 index
= parts
.index
;
8596 scale
= parts
.scale
;
8604 if (USER_LABEL_PREFIX
[0] == 0)
8606 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8612 if (!base
&& !index
)
8614 /* Displacement only requires special attention. */
8616 if (CONST_INT_P (disp
))
8618 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8620 if (USER_LABEL_PREFIX
[0] == 0)
8622 fputs ("ds:", file
);
8624 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8627 output_pic_addr_const (file
, disp
, 0);
8629 output_addr_const (file
, disp
);
8631 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8634 if (GET_CODE (disp
) == CONST
8635 && GET_CODE (XEXP (disp
, 0)) == PLUS
8636 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8637 disp
= XEXP (XEXP (disp
, 0), 0);
8638 if (GET_CODE (disp
) == LABEL_REF
8639 || (GET_CODE (disp
) == SYMBOL_REF
8640 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8641 fputs ("(%rip)", file
);
8646 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8651 output_pic_addr_const (file
, disp
, 0);
8652 else if (GET_CODE (disp
) == LABEL_REF
)
8653 output_asm_label (disp
);
8655 output_addr_const (file
, disp
);
8660 print_reg (base
, 0, file
);
8664 print_reg (index
, 0, file
);
8666 fprintf (file
, ",%d", scale
);
8672 rtx offset
= NULL_RTX
;
8676 /* Pull out the offset of a symbol; print any symbol itself. */
8677 if (GET_CODE (disp
) == CONST
8678 && GET_CODE (XEXP (disp
, 0)) == PLUS
8679 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8681 offset
= XEXP (XEXP (disp
, 0), 1);
8682 disp
= gen_rtx_CONST (VOIDmode
,
8683 XEXP (XEXP (disp
, 0), 0));
8687 output_pic_addr_const (file
, disp
, 0);
8688 else if (GET_CODE (disp
) == LABEL_REF
)
8689 output_asm_label (disp
);
8690 else if (CONST_INT_P (disp
))
8693 output_addr_const (file
, disp
);
8699 print_reg (base
, 0, file
);
8702 if (INTVAL (offset
) >= 0)
8704 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8708 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8715 print_reg (index
, 0, file
);
8717 fprintf (file
, "*%d", scale
);
8725 output_addr_const_extra (FILE *file
, rtx x
)
8729 if (GET_CODE (x
) != UNSPEC
)
8732 op
= XVECEXP (x
, 0, 0);
8733 switch (XINT (x
, 1))
8735 case UNSPEC_GOTTPOFF
:
8736 output_addr_const (file
, op
);
8737 /* FIXME: This might be @TPOFF in Sun ld. */
8738 fputs ("@GOTTPOFF", file
);
8741 output_addr_const (file
, op
);
8742 fputs ("@TPOFF", file
);
8745 output_addr_const (file
, op
);
8747 fputs ("@TPOFF", file
);
8749 fputs ("@NTPOFF", file
);
8752 output_addr_const (file
, op
);
8753 fputs ("@DTPOFF", file
);
8755 case UNSPEC_GOTNTPOFF
:
8756 output_addr_const (file
, op
);
8758 fputs ("@GOTTPOFF(%rip)", file
);
8760 fputs ("@GOTNTPOFF", file
);
8762 case UNSPEC_INDNTPOFF
:
8763 output_addr_const (file
, op
);
8764 fputs ("@INDNTPOFF", file
);
8774 /* Split one or more DImode RTL references into pairs of SImode
8775 references. The RTL can be REG, offsettable MEM, integer constant, or
8776 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8777 split and "num" is its length. lo_half and hi_half are output arrays
8778 that parallel "operands". */
8781 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8785 rtx op
= operands
[num
];
8787 /* simplify_subreg refuse to split volatile memory addresses,
8788 but we still have to handle it. */
8791 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8792 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8796 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8797 GET_MODE (op
) == VOIDmode
8798 ? DImode
: GET_MODE (op
), 0);
8799 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8800 GET_MODE (op
) == VOIDmode
8801 ? DImode
: GET_MODE (op
), 4);
8805 /* Split one or more TImode RTL references into pairs of DImode
8806 references. The RTL can be REG, offsettable MEM, integer constant, or
8807 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8808 split and "num" is its length. lo_half and hi_half are output arrays
8809 that parallel "operands". */
8812 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8816 rtx op
= operands
[num
];
8818 /* simplify_subreg refuse to split volatile memory addresses, but we
8819 still have to handle it. */
8822 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8823 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8827 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8828 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8833 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8834 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8835 is the expression of the binary operation. The output may either be
8836 emitted here, or returned to the caller, like all output_* functions.
8838 There is no guarantee that the operands are the same mode, as they
8839 might be within FLOAT or FLOAT_EXTEND expressions. */
8841 #ifndef SYSV386_COMPAT
8842 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8843 wants to fix the assemblers because that causes incompatibility
8844 with gcc. No-one wants to fix gcc because that causes
8845 incompatibility with assemblers... You can use the option of
8846 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8847 #define SYSV386_COMPAT 1
8851 output_387_binary_op (rtx insn
, rtx
*operands
)
8853 static char buf
[30];
8856 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8858 #ifdef ENABLE_CHECKING
8859 /* Even if we do not want to check the inputs, this documents input
8860 constraints. Which helps in understanding the following code. */
8861 if (STACK_REG_P (operands
[0])
8862 && ((REG_P (operands
[1])
8863 && REGNO (operands
[0]) == REGNO (operands
[1])
8864 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8865 || (REG_P (operands
[2])
8866 && REGNO (operands
[0]) == REGNO (operands
[2])
8867 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8868 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8871 gcc_assert (is_sse
);
8874 switch (GET_CODE (operands
[3]))
8877 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8878 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8886 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8887 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8895 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8896 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8904 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8905 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8919 if (GET_MODE (operands
[0]) == SFmode
)
8920 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8922 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8927 switch (GET_CODE (operands
[3]))
8931 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8933 rtx temp
= operands
[2];
8934 operands
[2] = operands
[1];
8938 /* know operands[0] == operands[1]. */
8940 if (MEM_P (operands
[2]))
8946 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8948 if (STACK_TOP_P (operands
[0]))
8949 /* How is it that we are storing to a dead operand[2]?
8950 Well, presumably operands[1] is dead too. We can't
8951 store the result to st(0) as st(0) gets popped on this
8952 instruction. Instead store to operands[2] (which I
8953 think has to be st(1)). st(1) will be popped later.
8954 gcc <= 2.8.1 didn't have this check and generated
8955 assembly code that the Unixware assembler rejected. */
8956 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8958 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8962 if (STACK_TOP_P (operands
[0]))
8963 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8965 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8970 if (MEM_P (operands
[1]))
8976 if (MEM_P (operands
[2]))
8982 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8985 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8986 derived assemblers, confusingly reverse the direction of
8987 the operation for fsub{r} and fdiv{r} when the
8988 destination register is not st(0). The Intel assembler
8989 doesn't have this brain damage. Read !SYSV386_COMPAT to
8990 figure out what the hardware really does. */
8991 if (STACK_TOP_P (operands
[0]))
8992 p
= "{p\t%0, %2|rp\t%2, %0}";
8994 p
= "{rp\t%2, %0|p\t%0, %2}";
8996 if (STACK_TOP_P (operands
[0]))
8997 /* As above for fmul/fadd, we can't store to st(0). */
8998 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9000 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9005 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9008 if (STACK_TOP_P (operands
[0]))
9009 p
= "{rp\t%0, %1|p\t%1, %0}";
9011 p
= "{p\t%1, %0|rp\t%0, %1}";
9013 if (STACK_TOP_P (operands
[0]))
9014 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9016 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9021 if (STACK_TOP_P (operands
[0]))
9023 if (STACK_TOP_P (operands
[1]))
9024 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9026 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9029 else if (STACK_TOP_P (operands
[1]))
9032 p
= "{\t%1, %0|r\t%0, %1}";
9034 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9040 p
= "{r\t%2, %0|\t%0, %2}";
9042 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9055 /* Return needed mode for entity in optimize_mode_switching pass. */
9058 ix86_mode_needed (int entity
, rtx insn
)
9060 enum attr_i387_cw mode
;
9062 /* The mode UNINITIALIZED is used to store control word after a
9063 function call or ASM pattern. The mode ANY specify that function
9064 has no requirements on the control word and make no changes in the
9065 bits we are interested in. */
9068 || (NONJUMP_INSN_P (insn
)
9069 && (asm_noperands (PATTERN (insn
)) >= 0
9070 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9071 return I387_CW_UNINITIALIZED
;
9073 if (recog_memoized (insn
) < 0)
9076 mode
= get_attr_i387_cw (insn
);
9081 if (mode
== I387_CW_TRUNC
)
9086 if (mode
== I387_CW_FLOOR
)
9091 if (mode
== I387_CW_CEIL
)
9096 if (mode
== I387_CW_MASK_PM
)
9107 /* Output code to initialize control word copies used by trunc?f?i and
9108 rounding patterns. CURRENT_MODE is set to current control word,
9109 while NEW_MODE is set to new control word. */
9112 emit_i387_cw_initialization (int mode
)
9114 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9119 rtx reg
= gen_reg_rtx (HImode
);
9121 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9122 emit_move_insn (reg
, copy_rtx (stored_mode
));
9124 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9129 /* round toward zero (truncate) */
9130 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9131 slot
= SLOT_CW_TRUNC
;
9135 /* round down toward -oo */
9136 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9137 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9138 slot
= SLOT_CW_FLOOR
;
9142 /* round up toward +oo */
9143 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9144 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9145 slot
= SLOT_CW_CEIL
;
9148 case I387_CW_MASK_PM
:
9149 /* mask precision exception for nearbyint() */
9150 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9151 slot
= SLOT_CW_MASK_PM
;
9163 /* round toward zero (truncate) */
9164 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9165 slot
= SLOT_CW_TRUNC
;
9169 /* round down toward -oo */
9170 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9171 slot
= SLOT_CW_FLOOR
;
9175 /* round up toward +oo */
9176 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9177 slot
= SLOT_CW_CEIL
;
9180 case I387_CW_MASK_PM
:
9181 /* mask precision exception for nearbyint() */
9182 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9183 slot
= SLOT_CW_MASK_PM
;
9191 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9193 new_mode
= assign_386_stack_local (HImode
, slot
);
9194 emit_move_insn (new_mode
, reg
);
9197 /* Output code for INSN to convert a float to a signed int. OPERANDS
9198 are the insn operands. The output may be [HSD]Imode and the input
9199 operand may be [SDX]Fmode. */
9202 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9204 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9205 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9206 int round_mode
= get_attr_i387_cw (insn
);
9208 /* Jump through a hoop or two for DImode, since the hardware has no
9209 non-popping instruction. We used to do this a different way, but
9210 that was somewhat fragile and broke with post-reload splitters. */
9211 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9212 output_asm_insn ("fld\t%y1", operands
);
9214 gcc_assert (STACK_TOP_P (operands
[1]));
9215 gcc_assert (MEM_P (operands
[0]));
9218 output_asm_insn ("fisttp%z0\t%0", operands
);
9221 if (round_mode
!= I387_CW_ANY
)
9222 output_asm_insn ("fldcw\t%3", operands
);
9223 if (stack_top_dies
|| dimode_p
)
9224 output_asm_insn ("fistp%z0\t%0", operands
);
9226 output_asm_insn ("fist%z0\t%0", operands
);
9227 if (round_mode
!= I387_CW_ANY
)
9228 output_asm_insn ("fldcw\t%2", operands
);
9234 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9235 have the values zero or one, indicates the ffreep insn's operand
9236 from the OPERANDS array. */
9239 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9241 if (TARGET_USE_FFREEP
)
9242 #if HAVE_AS_IX86_FFREEP
9243 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9246 static char retval
[] = ".word\t0xc_df";
9247 int regno
= REGNO (operands
[opno
]);
9249 gcc_assert (FP_REGNO_P (regno
));
9251 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9256 return opno
? "fstp\t%y1" : "fstp\t%y0";
9260 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9261 should be used. UNORDERED_P is true when fucom should be used. */
9264 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9267 rtx cmp_op0
, cmp_op1
;
9268 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9272 cmp_op0
= operands
[0];
9273 cmp_op1
= operands
[1];
9277 cmp_op0
= operands
[1];
9278 cmp_op1
= operands
[2];
9283 if (GET_MODE (operands
[0]) == SFmode
)
9285 return "ucomiss\t{%1, %0|%0, %1}";
9287 return "comiss\t{%1, %0|%0, %1}";
9290 return "ucomisd\t{%1, %0|%0, %1}";
9292 return "comisd\t{%1, %0|%0, %1}";
9295 gcc_assert (STACK_TOP_P (cmp_op0
));
9297 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9299 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9303 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9304 return output_387_ffreep (operands
, 1);
9307 return "ftst\n\tfnstsw\t%0";
9310 if (STACK_REG_P (cmp_op1
)
9312 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9313 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9315 /* If both the top of the 387 stack dies, and the other operand
9316 is also a stack register that dies, then this must be a
9317 `fcompp' float compare */
9321 /* There is no double popping fcomi variant. Fortunately,
9322 eflags is immune from the fstp's cc clobbering. */
9324 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9326 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9327 return output_387_ffreep (operands
, 0);
9332 return "fucompp\n\tfnstsw\t%0";
9334 return "fcompp\n\tfnstsw\t%0";
9339 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9341 static const char * const alt
[16] =
9343 "fcom%z2\t%y2\n\tfnstsw\t%0",
9344 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9345 "fucom%z2\t%y2\n\tfnstsw\t%0",
9346 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9348 "ficom%z2\t%y2\n\tfnstsw\t%0",
9349 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9353 "fcomi\t{%y1, %0|%0, %y1}",
9354 "fcomip\t{%y1, %0|%0, %y1}",
9355 "fucomi\t{%y1, %0|%0, %y1}",
9356 "fucomip\t{%y1, %0|%0, %y1}",
9367 mask
= eflags_p
<< 3;
9368 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9369 mask
|= unordered_p
<< 1;
9370 mask
|= stack_top_dies
;
9372 gcc_assert (mask
< 16);
9381 ix86_output_addr_vec_elt (FILE *file
, int value
)
9383 const char *directive
= ASM_LONG
;
9387 directive
= ASM_QUAD
;
9389 gcc_assert (!TARGET_64BIT
);
9392 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9396 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9398 const char *directive
= ASM_LONG
;
9401 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9402 directive
= ASM_QUAD
;
9404 gcc_assert (!TARGET_64BIT
);
9406 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9407 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9408 fprintf (file
, "%s%s%d-%s%d\n",
9409 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9410 else if (HAVE_AS_GOTOFF_IN_DATA
)
9411 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9413 else if (TARGET_MACHO
)
9415 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9416 machopic_output_function_base_name (file
);
9417 fprintf(file
, "\n");
9421 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9422 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9425 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9429 ix86_expand_clear (rtx dest
)
9433 /* We play register width games, which are only valid after reload. */
9434 gcc_assert (reload_completed
);
9436 /* Avoid HImode and its attendant prefix byte. */
9437 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9438 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9440 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9442 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9443 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9445 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9446 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9452 /* X is an unchanging MEM. If it is a constant pool reference, return
9453 the constant pool rtx, else NULL. */
9456 maybe_get_pool_constant (rtx x
)
9458 x
= ix86_delegitimize_address (XEXP (x
, 0));
9460 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9461 return get_pool_constant (x
);
9467 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9469 int strict
= (reload_in_progress
|| reload_completed
);
9471 enum tls_model model
;
9476 if (GET_CODE (op1
) == SYMBOL_REF
)
9478 model
= SYMBOL_REF_TLS_MODEL (op1
);
9481 op1
= legitimize_tls_address (op1
, model
, true);
9482 op1
= force_operand (op1
, op0
);
9487 else if (GET_CODE (op1
) == CONST
9488 && GET_CODE (XEXP (op1
, 0)) == PLUS
9489 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9491 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9494 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9495 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9496 op1
= force_operand (op1
, NULL
);
9497 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9498 op0
, 1, OPTAB_DIRECT
);
9504 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9506 if (TARGET_MACHO
&& !TARGET_64BIT
)
9511 rtx temp
= ((reload_in_progress
9512 || ((op0
&& REG_P (op0
))
9514 ? op0
: gen_reg_rtx (Pmode
));
9515 op1
= machopic_indirect_data_reference (op1
, temp
);
9516 op1
= machopic_legitimize_pic_address (op1
, mode
,
9517 temp
== op1
? 0 : temp
);
9519 else if (MACHOPIC_INDIRECT
)
9520 op1
= machopic_indirect_data_reference (op1
, 0);
9528 op1
= force_reg (Pmode
, op1
);
9529 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9531 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9532 op1
= legitimize_pic_address (op1
, reg
);
9541 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9542 || !push_operand (op0
, mode
))
9544 op1
= force_reg (mode
, op1
);
9546 if (push_operand (op0
, mode
)
9547 && ! general_no_elim_operand (op1
, mode
))
9548 op1
= copy_to_mode_reg (mode
, op1
);
9550 /* Force large constants in 64bit compilation into register
9551 to get them CSEed. */
9552 if (TARGET_64BIT
&& mode
== DImode
9553 && immediate_operand (op1
, mode
)
9554 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9555 && !register_operand (op0
, mode
)
9556 && optimize
&& !reload_completed
&& !reload_in_progress
)
9557 op1
= copy_to_mode_reg (mode
, op1
);
9559 if (FLOAT_MODE_P (mode
))
9561 /* If we are loading a floating point constant to a register,
9562 force the value to memory now, since we'll get better code
9563 out the back end. */
9567 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9569 op1
= validize_mem (force_const_mem (mode
, op1
));
9570 if (!register_operand (op0
, mode
))
9572 rtx temp
= gen_reg_rtx (mode
);
9573 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9574 emit_move_insn (op0
, temp
);
9581 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9585 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9587 rtx op0
= operands
[0], op1
= operands
[1];
9589 /* Force constants other than zero into memory. We do not know how
9590 the instructions used to build constants modify the upper 64 bits
9591 of the register, once we have that information we may be able
9592 to handle some of them more efficiently. */
9593 if ((reload_in_progress
| reload_completed
) == 0
9594 && register_operand (op0
, mode
)
9596 && standard_sse_constant_p (op1
) <= 0)
9597 op1
= validize_mem (force_const_mem (mode
, op1
));
9599 /* Make operand1 a register if it isn't already. */
9601 && !register_operand (op0
, mode
)
9602 && !register_operand (op1
, mode
))
9604 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9608 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9611 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9612 straight to ix86_expand_vector_move. */
9613 /* Code generation for scalar reg-reg moves of single and double precision data:
9614 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9618 if (x86_sse_partial_reg_dependency == true)
9623 Code generation for scalar loads of double precision data:
9624 if (x86_sse_split_regs == true)
9625 movlpd mem, reg (gas syntax)
9629 Code generation for unaligned packed loads of single precision data
9630 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9631 if (x86_sse_unaligned_move_optimal)
9634 if (x86_sse_partial_reg_dependency == true)
9646 Code generation for unaligned packed loads of double precision data
9647 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9648 if (x86_sse_unaligned_move_optimal)
9651 if (x86_sse_split_regs == true)
9664 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9673 /* If we're optimizing for size, movups is the smallest. */
9676 op0
= gen_lowpart (V4SFmode
, op0
);
9677 op1
= gen_lowpart (V4SFmode
, op1
);
9678 emit_insn (gen_sse_movups (op0
, op1
));
9682 /* ??? If we have typed data, then it would appear that using
9683 movdqu is the only way to get unaligned data loaded with
9685 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9687 op0
= gen_lowpart (V16QImode
, op0
);
9688 op1
= gen_lowpart (V16QImode
, op1
);
9689 emit_insn (gen_sse2_movdqu (op0
, op1
));
9693 if (TARGET_SSE2
&& mode
== V2DFmode
)
9697 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9699 op0
= gen_lowpart (V2DFmode
, op0
);
9700 op1
= gen_lowpart (V2DFmode
, op1
);
9701 emit_insn (gen_sse2_movupd (op0
, op1
));
9705 /* When SSE registers are split into halves, we can avoid
9706 writing to the top half twice. */
9707 if (TARGET_SSE_SPLIT_REGS
)
9709 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9714 /* ??? Not sure about the best option for the Intel chips.
9715 The following would seem to satisfy; the register is
9716 entirely cleared, breaking the dependency chain. We
9717 then store to the upper half, with a dependency depth
9718 of one. A rumor has it that Intel recommends two movsd
9719 followed by an unpacklpd, but this is unconfirmed. And
9720 given that the dependency depth of the unpacklpd would
9721 still be one, I'm not sure why this would be better. */
9722 zero
= CONST0_RTX (V2DFmode
);
9725 m
= adjust_address (op1
, DFmode
, 0);
9726 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9727 m
= adjust_address (op1
, DFmode
, 8);
9728 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9732 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9734 op0
= gen_lowpart (V4SFmode
, op0
);
9735 op1
= gen_lowpart (V4SFmode
, op1
);
9736 emit_insn (gen_sse_movups (op0
, op1
));
9740 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9741 emit_move_insn (op0
, CONST0_RTX (mode
));
9743 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9745 if (mode
!= V4SFmode
)
9746 op0
= gen_lowpart (V4SFmode
, op0
);
9747 m
= adjust_address (op1
, V2SFmode
, 0);
9748 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9749 m
= adjust_address (op1
, V2SFmode
, 8);
9750 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9753 else if (MEM_P (op0
))
9755 /* If we're optimizing for size, movups is the smallest. */
9758 op0
= gen_lowpart (V4SFmode
, op0
);
9759 op1
= gen_lowpart (V4SFmode
, op1
);
9760 emit_insn (gen_sse_movups (op0
, op1
));
9764 /* ??? Similar to above, only less clear because of quote
9765 typeless stores unquote. */
9766 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9767 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9769 op0
= gen_lowpart (V16QImode
, op0
);
9770 op1
= gen_lowpart (V16QImode
, op1
);
9771 emit_insn (gen_sse2_movdqu (op0
, op1
));
9775 if (TARGET_SSE2
&& mode
== V2DFmode
)
9777 m
= adjust_address (op0
, DFmode
, 0);
9778 emit_insn (gen_sse2_storelpd (m
, op1
));
9779 m
= adjust_address (op0
, DFmode
, 8);
9780 emit_insn (gen_sse2_storehpd (m
, op1
));
9784 if (mode
!= V4SFmode
)
9785 op1
= gen_lowpart (V4SFmode
, op1
);
9786 m
= adjust_address (op0
, V2SFmode
, 0);
9787 emit_insn (gen_sse_storelps (m
, op1
));
9788 m
= adjust_address (op0
, V2SFmode
, 8);
9789 emit_insn (gen_sse_storehps (m
, op1
));
9796 /* Expand a push in MODE. This is some mode for which we do not support
9797 proper push instructions, at least from the registers that we expect
9798 the value to live in. */
9801 ix86_expand_push (enum machine_mode mode
, rtx x
)
9805 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9806 GEN_INT (-GET_MODE_SIZE (mode
)),
9807 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9808 if (tmp
!= stack_pointer_rtx
)
9809 emit_move_insn (stack_pointer_rtx
, tmp
);
9811 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9812 emit_move_insn (tmp
, x
);
9815 /* Helper function of ix86_fixup_binary_operands to canonicalize
9816 operand order. Returns true if the operands should be swapped. */
9819 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9822 rtx dst
= operands
[0];
9823 rtx src1
= operands
[1];
9824 rtx src2
= operands
[2];
9826 /* If the operation is not commutative, we can't do anything. */
9827 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9830 /* Highest priority is that src1 should match dst. */
9831 if (rtx_equal_p (dst
, src1
))
9833 if (rtx_equal_p (dst
, src2
))
9836 /* Next highest priority is that immediate constants come second. */
9837 if (immediate_operand (src2
, mode
))
9839 if (immediate_operand (src1
, mode
))
9842 /* Lowest priority is that memory references should come second. */
9852 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9853 destination to use for the operation. If different from the true
9854 destination in operands[0], a copy operation will be required. */
9857 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9860 rtx dst
= operands
[0];
9861 rtx src1
= operands
[1];
9862 rtx src2
= operands
[2];
9864 /* Canonicalize operand order. */
9865 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9872 /* Both source operands cannot be in memory. */
9873 if (MEM_P (src1
) && MEM_P (src2
))
9875 /* Optimization: Only read from memory once. */
9876 if (rtx_equal_p (src1
, src2
))
9878 src2
= force_reg (mode
, src2
);
9882 src2
= force_reg (mode
, src2
);
9885 /* If the destination is memory, and we do not have matching source
9886 operands, do things in registers. */
9887 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9888 dst
= gen_reg_rtx (mode
);
9890 /* Source 1 cannot be a constant. */
9891 if (CONSTANT_P (src1
))
9892 src1
= force_reg (mode
, src1
);
9894 /* Source 1 cannot be a non-matching memory. */
9895 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9896 src1
= force_reg (mode
, src1
);
9903 /* Similarly, but assume that the destination has already been
9907 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9908 enum machine_mode mode
, rtx operands
[])
9910 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9911 gcc_assert (dst
== operands
[0]);
9914 /* Attempt to expand a binary operator. Make the expansion closer to the
9915 actual machine, then just general_operand, which will allow 3 separate
9916 memory references (one output, two input) in a single insn. */
9919 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9922 rtx src1
, src2
, dst
, op
, clob
;
9924 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9928 /* Emit the instruction. */
9930 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9931 if (reload_in_progress
)
9933 /* Reload doesn't know about the flags register, and doesn't know that
9934 it doesn't want to clobber it. We can only do this with PLUS. */
9935 gcc_assert (code
== PLUS
);
9940 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9941 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9944 /* Fix up the destination if needed. */
9945 if (dst
!= operands
[0])
9946 emit_move_insn (operands
[0], dst
);
9949 /* Return TRUE or FALSE depending on whether the binary operator meets the
9950 appropriate constraints. */
9953 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9956 rtx dst
= operands
[0];
9957 rtx src1
= operands
[1];
9958 rtx src2
= operands
[2];
9960 /* Both source operands cannot be in memory. */
9961 if (MEM_P (src1
) && MEM_P (src2
))
9964 /* Canonicalize operand order for commutative operators. */
9965 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9972 /* If the destination is memory, we must have a matching source operand. */
9973 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9976 /* Source 1 cannot be a constant. */
9977 if (CONSTANT_P (src1
))
9980 /* Source 1 cannot be a non-matching memory. */
9981 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9987 /* Attempt to expand a unary operator. Make the expansion closer to the
9988 actual machine, then just general_operand, which will allow 2 separate
9989 memory references (one output, one input) in a single insn. */
9992 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9995 int matching_memory
;
9996 rtx src
, dst
, op
, clob
;
10001 /* If the destination is memory, and we do not have matching source
10002 operands, do things in registers. */
10003 matching_memory
= 0;
10006 if (rtx_equal_p (dst
, src
))
10007 matching_memory
= 1;
10009 dst
= gen_reg_rtx (mode
);
10012 /* When source operand is memory, destination must match. */
10013 if (MEM_P (src
) && !matching_memory
)
10014 src
= force_reg (mode
, src
);
10016 /* Emit the instruction. */
10018 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10019 if (reload_in_progress
|| code
== NOT
)
10021 /* Reload doesn't know about the flags register, and doesn't know that
10022 it doesn't want to clobber it. */
10023 gcc_assert (code
== NOT
);
10028 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10029 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10032 /* Fix up the destination if needed. */
10033 if (dst
!= operands
[0])
10034 emit_move_insn (operands
[0], dst
);
10037 /* Return TRUE or FALSE depending on whether the unary operator meets the
10038 appropriate constraints. */
10041 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10042 enum machine_mode mode ATTRIBUTE_UNUSED
,
10043 rtx operands
[2] ATTRIBUTE_UNUSED
)
10045 /* If one of operands is memory, source and destination must match. */
10046 if ((MEM_P (operands
[0])
10047 || MEM_P (operands
[1]))
10048 && ! rtx_equal_p (operands
[0], operands
[1]))
10053 /* Post-reload splitter for converting an SF or DFmode value in an
10054 SSE register into an unsigned SImode. */
10057 ix86_split_convert_uns_si_sse (rtx operands
[])
10059 enum machine_mode vecmode
;
10060 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10062 large
= operands
[1];
10063 zero_or_two31
= operands
[2];
10064 input
= operands
[3];
10065 two31
= operands
[4];
10066 vecmode
= GET_MODE (large
);
10067 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10069 /* Load up the value into the low element. We must ensure that the other
10070 elements are valid floats -- zero is the easiest such value. */
10073 if (vecmode
== V4SFmode
)
10074 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10076 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10080 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10081 emit_move_insn (value
, CONST0_RTX (vecmode
));
10082 if (vecmode
== V4SFmode
)
10083 emit_insn (gen_sse_movss (value
, value
, input
));
10085 emit_insn (gen_sse2_movsd (value
, value
, input
));
10088 emit_move_insn (large
, two31
);
10089 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10091 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10092 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10094 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10095 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10097 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10098 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10100 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10101 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10103 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10104 if (vecmode
== V4SFmode
)
10105 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10107 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10110 emit_insn (gen_xorv4si3 (value
, value
, large
));
10113 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10114 Expects the 64-bit DImode to be supplied in a pair of integral
10115 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10116 -mfpmath=sse, !optimize_size only. */
10119 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10121 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10122 rtx int_xmm
, fp_xmm
;
10123 rtx biases
, exponents
;
10126 int_xmm
= gen_reg_rtx (V4SImode
);
10127 if (TARGET_INTER_UNIT_MOVES
)
10128 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10129 else if (TARGET_SSE_SPLIT_REGS
)
10131 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10132 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10136 x
= gen_reg_rtx (V2DImode
);
10137 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10138 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10141 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10142 gen_rtvec (4, GEN_INT (0x43300000UL
),
10143 GEN_INT (0x45300000UL
),
10144 const0_rtx
, const0_rtx
));
10145 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10147 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10148 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10150 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10151 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10152 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10153 (0x1.0p84 + double(fp_value_hi_xmm)).
10154 Note these exponents differ by 32. */
10156 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10158 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10159 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10160 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10161 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10162 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10163 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10164 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10165 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10166 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10168 /* Add the upper and lower DFmode values together. */
10170 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10173 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10174 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10175 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10178 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10181 /* Convert an unsigned SImode value into a DFmode. Only currently used
10182 for SSE, but applicable anywhere. */
10185 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10187 REAL_VALUE_TYPE TWO31r
;
10190 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10191 NULL
, 1, OPTAB_DIRECT
);
10193 fp
= gen_reg_rtx (DFmode
);
10194 emit_insn (gen_floatsidf2 (fp
, x
));
10196 real_ldexp (&TWO31r
, &dconst1
, 31);
10197 x
= const_double_from_real_value (TWO31r
, DFmode
);
10199 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10201 emit_move_insn (target
, x
);
10204 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10205 32-bit mode; otherwise we have a direct convert instruction. */
10208 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10210 REAL_VALUE_TYPE TWO32r
;
10211 rtx fp_lo
, fp_hi
, x
;
10213 fp_lo
= gen_reg_rtx (DFmode
);
10214 fp_hi
= gen_reg_rtx (DFmode
);
10216 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10218 real_ldexp (&TWO32r
, &dconst1
, 32);
10219 x
= const_double_from_real_value (TWO32r
, DFmode
);
10220 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10222 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10224 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10227 emit_move_insn (target
, x
);
10230 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10231 For x86_32, -mfpmath=sse, !optimize_size only. */
10233 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10235 REAL_VALUE_TYPE ONE16r
;
10236 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10238 real_ldexp (&ONE16r
, &dconst1
, 16);
10239 x
= const_double_from_real_value (ONE16r
, SFmode
);
10240 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10241 NULL
, 0, OPTAB_DIRECT
);
10242 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10243 NULL
, 0, OPTAB_DIRECT
);
10244 fp_hi
= gen_reg_rtx (SFmode
);
10245 fp_lo
= gen_reg_rtx (SFmode
);
10246 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10247 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10248 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10250 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10252 if (!rtx_equal_p (target
, fp_hi
))
10253 emit_move_insn (target
, fp_hi
);
10256 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10257 then replicate the value for all elements of the vector
10261 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10268 v
= gen_rtvec (4, value
, value
, value
, value
);
10270 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10271 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10272 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10276 v
= gen_rtvec (2, value
, value
);
10278 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10279 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10282 gcc_unreachable ();
10286 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10287 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10288 true, then replicate the mask for all elements of the vector register.
10289 If INVERT is true, then create a mask excluding the sign bit. */
10292 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10294 enum machine_mode vec_mode
;
10295 HOST_WIDE_INT hi
, lo
;
10300 /* Find the sign bit, sign extended to 2*HWI. */
10301 if (mode
== SFmode
)
10302 lo
= 0x80000000, hi
= lo
< 0;
10303 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10304 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10306 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10309 lo
= ~lo
, hi
= ~hi
;
10311 /* Force this value into the low part of a fp vector constant. */
10312 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10313 mask
= gen_lowpart (mode
, mask
);
10315 v
= ix86_build_const_vector (mode
, vect
, mask
);
10316 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10317 return force_reg (vec_mode
, v
);
10320 /* Generate code for floating point ABS or NEG. */
10323 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10326 rtx mask
, set
, use
, clob
, dst
, src
;
10327 bool matching_memory
;
10328 bool use_sse
= false;
10329 bool vector_mode
= VECTOR_MODE_P (mode
);
10330 enum machine_mode elt_mode
= mode
;
10334 elt_mode
= GET_MODE_INNER (mode
);
10337 else if (TARGET_SSE_MATH
)
10338 use_sse
= SSE_FLOAT_MODE_P (mode
);
10340 /* NEG and ABS performed with SSE use bitwise mask operations.
10341 Create the appropriate mask now. */
10343 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10350 /* If the destination is memory, and we don't have matching source
10351 operands or we're using the x87, do things in registers. */
10352 matching_memory
= false;
10355 if (use_sse
&& rtx_equal_p (dst
, src
))
10356 matching_memory
= true;
10358 dst
= gen_reg_rtx (mode
);
10360 if (MEM_P (src
) && !matching_memory
)
10361 src
= force_reg (mode
, src
);
10365 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10366 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10371 set
= gen_rtx_fmt_e (code
, mode
, src
);
10372 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10375 use
= gen_rtx_USE (VOIDmode
, mask
);
10376 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10377 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10378 gen_rtvec (3, set
, use
, clob
)));
10384 if (dst
!= operands
[0])
10385 emit_move_insn (operands
[0], dst
);
10388 /* Expand a copysign operation. Special case operand 0 being a constant. */
10391 ix86_expand_copysign (rtx operands
[])
10393 enum machine_mode mode
, vmode
;
10394 rtx dest
, op0
, op1
, mask
, nmask
;
10396 dest
= operands
[0];
10400 mode
= GET_MODE (dest
);
10401 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10403 if (GET_CODE (op0
) == CONST_DOUBLE
)
10407 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10408 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10410 if (op0
== CONST0_RTX (mode
))
10411 op0
= CONST0_RTX (vmode
);
10414 if (mode
== SFmode
)
10415 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10416 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10418 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10419 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10422 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10424 if (mode
== SFmode
)
10425 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10427 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10431 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10432 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10434 if (mode
== SFmode
)
10435 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10437 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10441 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10442 be a constant, and so has already been expanded into a vector constant. */
10445 ix86_split_copysign_const (rtx operands
[])
10447 enum machine_mode mode
, vmode
;
10448 rtx dest
, op0
, op1
, mask
, x
;
10450 dest
= operands
[0];
10453 mask
= operands
[3];
10455 mode
= GET_MODE (dest
);
10456 vmode
= GET_MODE (mask
);
10458 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10459 x
= gen_rtx_AND (vmode
, dest
, mask
);
10460 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10462 if (op0
!= CONST0_RTX (vmode
))
10464 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10465 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10469 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10470 so we have to do two masks. */
10473 ix86_split_copysign_var (rtx operands
[])
10475 enum machine_mode mode
, vmode
;
10476 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10478 dest
= operands
[0];
10479 scratch
= operands
[1];
10482 nmask
= operands
[4];
10483 mask
= operands
[5];
10485 mode
= GET_MODE (dest
);
10486 vmode
= GET_MODE (mask
);
10488 if (rtx_equal_p (op0
, op1
))
10490 /* Shouldn't happen often (it's useless, obviously), but when it does
10491 we'd generate incorrect code if we continue below. */
10492 emit_move_insn (dest
, op0
);
10496 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10498 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10500 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10501 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10504 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10505 x
= gen_rtx_NOT (vmode
, dest
);
10506 x
= gen_rtx_AND (vmode
, x
, op0
);
10507 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10511 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10513 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10515 else /* alternative 2,4 */
10517 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10518 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10519 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10521 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10523 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10525 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10526 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10528 else /* alternative 3,4 */
10530 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10532 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10533 x
= gen_rtx_AND (vmode
, dest
, op0
);
10535 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10538 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10539 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10542 /* Return TRUE or FALSE depending on whether the first SET in INSN
10543 has source and destination with matching CC modes, and that the
10544 CC mode is at least as constrained as REQ_MODE. */
10547 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10550 enum machine_mode set_mode
;
10552 set
= PATTERN (insn
);
10553 if (GET_CODE (set
) == PARALLEL
)
10554 set
= XVECEXP (set
, 0, 0);
10555 gcc_assert (GET_CODE (set
) == SET
);
10556 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10558 set_mode
= GET_MODE (SET_DEST (set
));
10562 if (req_mode
!= CCNOmode
10563 && (req_mode
!= CCmode
10564 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10568 if (req_mode
== CCGCmode
)
10572 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10576 if (req_mode
== CCZmode
)
10583 gcc_unreachable ();
10586 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10589 /* Generate insn patterns to do an integer compare of OPERANDS. */
10592 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10594 enum machine_mode cmpmode
;
10597 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10598 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10600 /* This is very simple, but making the interface the same as in the
10601 FP case makes the rest of the code easier. */
10602 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10603 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10605 /* Return the test that should be put into the flags user, i.e.
10606 the bcc, scc, or cmov instruction. */
10607 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10610 /* Figure out whether to use ordered or unordered fp comparisons.
10611 Return the appropriate mode to use. */
10614 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10616 /* ??? In order to make all comparisons reversible, we do all comparisons
10617 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10618 all forms trapping and nontrapping comparisons, we can make inequality
10619 comparisons trapping again, since it results in better code when using
10620 FCOM based compares. */
10621 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10625 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10627 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10628 return ix86_fp_compare_mode (code
);
10631 /* Only zero flag is needed. */
10632 case EQ
: /* ZF=0 */
10633 case NE
: /* ZF!=0 */
10635 /* Codes needing carry flag. */
10636 case GEU
: /* CF=0 */
10637 case GTU
: /* CF=0 & ZF=0 */
10638 case LTU
: /* CF=1 */
10639 case LEU
: /* CF=1 | ZF=1 */
10641 /* Codes possibly doable only with sign flag when
10642 comparing against zero. */
10643 case GE
: /* SF=OF or SF=0 */
10644 case LT
: /* SF<>OF or SF=1 */
10645 if (op1
== const0_rtx
)
10648 /* For other cases Carry flag is not required. */
10650 /* Codes doable only with sign flag when comparing
10651 against zero, but we miss jump instruction for it
10652 so we need to use relational tests against overflow
10653 that thus needs to be zero. */
10654 case GT
: /* ZF=0 & SF=OF */
10655 case LE
: /* ZF=1 | SF<>OF */
10656 if (op1
== const0_rtx
)
10660 /* strcmp pattern do (use flags) and combine may ask us for proper
10665 gcc_unreachable ();
10669 /* Return the fixed registers used for condition codes. */
10672 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10679 /* If two condition code modes are compatible, return a condition code
10680 mode which is compatible with both. Otherwise, return
10683 static enum machine_mode
10684 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10689 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10692 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10693 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10699 gcc_unreachable ();
10721 /* These are only compatible with themselves, which we already
10727 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10730 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10732 enum rtx_code swapped_code
= swap_condition (code
);
10733 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10734 || (ix86_fp_comparison_cost (swapped_code
)
10735 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10738 /* Swap, force into registers, or otherwise massage the two operands
10739 to a fp comparison. The operands are updated in place; the new
10740 comparison code is returned. */
10742 static enum rtx_code
10743 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10745 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10746 rtx op0
= *pop0
, op1
= *pop1
;
10747 enum machine_mode op_mode
= GET_MODE (op0
);
10748 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10750 /* All of the unordered compare instructions only work on registers.
10751 The same is true of the fcomi compare instructions. The XFmode
10752 compare instructions require registers except when comparing
10753 against zero or when converting operand 1 from fixed point to
10757 && (fpcmp_mode
== CCFPUmode
10758 || (op_mode
== XFmode
10759 && ! (standard_80387_constant_p (op0
) == 1
10760 || standard_80387_constant_p (op1
) == 1)
10761 && GET_CODE (op1
) != FLOAT
)
10762 || ix86_use_fcomi_compare (code
)))
10764 op0
= force_reg (op_mode
, op0
);
10765 op1
= force_reg (op_mode
, op1
);
10769 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10770 things around if they appear profitable, otherwise force op0
10771 into a register. */
10773 if (standard_80387_constant_p (op0
) == 0
10775 && ! (standard_80387_constant_p (op1
) == 0
10779 tmp
= op0
, op0
= op1
, op1
= tmp
;
10780 code
= swap_condition (code
);
10784 op0
= force_reg (op_mode
, op0
);
10786 if (CONSTANT_P (op1
))
10788 int tmp
= standard_80387_constant_p (op1
);
10790 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10794 op1
= force_reg (op_mode
, op1
);
10797 op1
= force_reg (op_mode
, op1
);
10801 /* Try to rearrange the comparison to make it cheaper. */
10802 if (ix86_fp_comparison_cost (code
)
10803 > ix86_fp_comparison_cost (swap_condition (code
))
10804 && (REG_P (op1
) || !no_new_pseudos
))
10807 tmp
= op0
, op0
= op1
, op1
= tmp
;
10808 code
= swap_condition (code
);
10810 op0
= force_reg (op_mode
, op0
);
10818 /* Convert comparison codes we use to represent FP comparison to integer
10819 code that will result in proper branch. Return UNKNOWN if no such code
10823 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10852 /* Split comparison code CODE into comparisons we can do using branch
10853 instructions. BYPASS_CODE is comparison code for branch that will
10854 branch around FIRST_CODE and SECOND_CODE. If some of branches
10855 is not required, set value to UNKNOWN.
10856 We never require more than two branches. */
10859 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10860 enum rtx_code
*first_code
,
10861 enum rtx_code
*second_code
)
10863 *first_code
= code
;
10864 *bypass_code
= UNKNOWN
;
10865 *second_code
= UNKNOWN
;
10867 /* The fcomi comparison sets flags as follows:
10877 case GT
: /* GTU - CF=0 & ZF=0 */
10878 case GE
: /* GEU - CF=0 */
10879 case ORDERED
: /* PF=0 */
10880 case UNORDERED
: /* PF=1 */
10881 case UNEQ
: /* EQ - ZF=1 */
10882 case UNLT
: /* LTU - CF=1 */
10883 case UNLE
: /* LEU - CF=1 | ZF=1 */
10884 case LTGT
: /* EQ - ZF=0 */
10886 case LT
: /* LTU - CF=1 - fails on unordered */
10887 *first_code
= UNLT
;
10888 *bypass_code
= UNORDERED
;
10890 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10891 *first_code
= UNLE
;
10892 *bypass_code
= UNORDERED
;
10894 case EQ
: /* EQ - ZF=1 - fails on unordered */
10895 *first_code
= UNEQ
;
10896 *bypass_code
= UNORDERED
;
10898 case NE
: /* NE - ZF=0 - fails on unordered */
10899 *first_code
= LTGT
;
10900 *second_code
= UNORDERED
;
10902 case UNGE
: /* GEU - CF=0 - fails on unordered */
10904 *second_code
= UNORDERED
;
10906 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10908 *second_code
= UNORDERED
;
10911 gcc_unreachable ();
10913 if (!TARGET_IEEE_FP
)
10915 *second_code
= UNKNOWN
;
10916 *bypass_code
= UNKNOWN
;
10920 /* Return cost of comparison done fcom + arithmetics operations on AX.
10921 All following functions do use number of instructions as a cost metrics.
10922 In future this should be tweaked to compute bytes for optimize_size and
10923 take into account performance of various instructions on various CPUs. */
10925 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10927 if (!TARGET_IEEE_FP
)
10929 /* The cost of code output by ix86_expand_fp_compare. */
10953 gcc_unreachable ();
10957 /* Return cost of comparison done using fcomi operation.
10958 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10960 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10962 enum rtx_code bypass_code
, first_code
, second_code
;
10963 /* Return arbitrarily high cost when instruction is not supported - this
10964 prevents gcc from using it. */
10967 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10968 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10971 /* Return cost of comparison done using sahf operation.
10972 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10974 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10976 enum rtx_code bypass_code
, first_code
, second_code
;
10977 /* Return arbitrarily high cost when instruction is not preferred - this
10978 avoids gcc from using it. */
10979 if (!TARGET_USE_SAHF
&& !optimize_size
)
10981 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10982 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10985 /* Compute cost of the comparison done using any method.
10986 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10988 ix86_fp_comparison_cost (enum rtx_code code
)
10990 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10993 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10994 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10996 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10997 if (min
> sahf_cost
)
10999 if (min
> fcomi_cost
)
11004 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11007 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11008 rtx
*second_test
, rtx
*bypass_test
)
11010 enum machine_mode fpcmp_mode
, intcmp_mode
;
11012 int cost
= ix86_fp_comparison_cost (code
);
11013 enum rtx_code bypass_code
, first_code
, second_code
;
11015 fpcmp_mode
= ix86_fp_compare_mode (code
);
11016 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11019 *second_test
= NULL_RTX
;
11021 *bypass_test
= NULL_RTX
;
11023 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11025 /* Do fcomi/sahf based test when profitable. */
11026 if ((bypass_code
== UNKNOWN
|| bypass_test
)
11027 && (second_code
== UNKNOWN
|| second_test
)
11028 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
11032 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11033 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11039 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11040 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11042 scratch
= gen_reg_rtx (HImode
);
11043 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11044 emit_insn (gen_x86_sahf_1 (scratch
));
11047 /* The FP codes work out to act like unsigned. */
11048 intcmp_mode
= fpcmp_mode
;
11050 if (bypass_code
!= UNKNOWN
)
11051 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11052 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11054 if (second_code
!= UNKNOWN
)
11055 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11056 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11061 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11062 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11063 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11065 scratch
= gen_reg_rtx (HImode
);
11066 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11068 /* In the unordered case, we have to check C2 for NaN's, which
11069 doesn't happen to work out to anything nice combination-wise.
11070 So do some bit twiddling on the value we've got in AH to come
11071 up with an appropriate set of condition codes. */
11073 intcmp_mode
= CCNOmode
;
11078 if (code
== GT
|| !TARGET_IEEE_FP
)
11080 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11085 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11086 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11087 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11088 intcmp_mode
= CCmode
;
11094 if (code
== LT
&& TARGET_IEEE_FP
)
11096 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11097 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11098 intcmp_mode
= CCmode
;
11103 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11109 if (code
== GE
|| !TARGET_IEEE_FP
)
11111 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11116 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11117 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11124 if (code
== LE
&& TARGET_IEEE_FP
)
11126 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11127 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11128 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11129 intcmp_mode
= CCmode
;
11134 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11140 if (code
== EQ
&& TARGET_IEEE_FP
)
11142 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11143 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11144 intcmp_mode
= CCmode
;
11149 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11156 if (code
== NE
&& TARGET_IEEE_FP
)
11158 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11159 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11165 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11171 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11175 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11180 gcc_unreachable ();
11184 /* Return the test that should be put into the flags user, i.e.
11185 the bcc, scc, or cmov instruction. */
11186 return gen_rtx_fmt_ee (code
, VOIDmode
,
11187 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11192 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11195 op0
= ix86_compare_op0
;
11196 op1
= ix86_compare_op1
;
11199 *second_test
= NULL_RTX
;
11201 *bypass_test
= NULL_RTX
;
11203 if (ix86_compare_emitted
)
11205 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11206 ix86_compare_emitted
= NULL_RTX
;
11208 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11209 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11210 second_test
, bypass_test
);
11212 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11217 /* Return true if the CODE will result in nontrivial jump sequence. */
11219 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11221 enum rtx_code bypass_code
, first_code
, second_code
;
11224 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11225 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11229 ix86_expand_branch (enum rtx_code code
, rtx label
)
11233 /* If we have emitted a compare insn, go straight to simple.
11234 ix86_expand_compare won't emit anything if ix86_compare_emitted
11236 if (ix86_compare_emitted
)
11239 switch (GET_MODE (ix86_compare_op0
))
11245 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11246 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11247 gen_rtx_LABEL_REF (VOIDmode
, label
),
11249 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11258 enum rtx_code bypass_code
, first_code
, second_code
;
11260 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11261 &ix86_compare_op1
);
11263 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11265 /* Check whether we will use the natural sequence with one jump. If
11266 so, we can expand jump early. Otherwise delay expansion by
11267 creating compound insn to not confuse optimizers. */
11268 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11271 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11272 gen_rtx_LABEL_REF (VOIDmode
, label
),
11273 pc_rtx
, NULL_RTX
, NULL_RTX
);
11277 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11278 ix86_compare_op0
, ix86_compare_op1
);
11279 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11280 gen_rtx_LABEL_REF (VOIDmode
, label
),
11282 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11284 use_fcomi
= ix86_use_fcomi_compare (code
);
11285 vec
= rtvec_alloc (3 + !use_fcomi
);
11286 RTVEC_ELT (vec
, 0) = tmp
;
11288 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11290 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11293 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11295 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11304 /* Expand DImode branch into multiple compare+branch. */
11306 rtx lo
[2], hi
[2], label2
;
11307 enum rtx_code code1
, code2
, code3
;
11308 enum machine_mode submode
;
11310 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11312 tmp
= ix86_compare_op0
;
11313 ix86_compare_op0
= ix86_compare_op1
;
11314 ix86_compare_op1
= tmp
;
11315 code
= swap_condition (code
);
11317 if (GET_MODE (ix86_compare_op0
) == DImode
)
11319 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11320 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11325 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11326 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11330 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11331 avoid two branches. This costs one extra insn, so disable when
11332 optimizing for size. */
11334 if ((code
== EQ
|| code
== NE
)
11336 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11341 if (hi
[1] != const0_rtx
)
11342 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11343 NULL_RTX
, 0, OPTAB_WIDEN
);
11346 if (lo
[1] != const0_rtx
)
11347 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11348 NULL_RTX
, 0, OPTAB_WIDEN
);
11350 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11351 NULL_RTX
, 0, OPTAB_WIDEN
);
11353 ix86_compare_op0
= tmp
;
11354 ix86_compare_op1
= const0_rtx
;
11355 ix86_expand_branch (code
, label
);
11359 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11360 op1 is a constant and the low word is zero, then we can just
11361 examine the high word. */
11363 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11366 case LT
: case LTU
: case GE
: case GEU
:
11367 ix86_compare_op0
= hi
[0];
11368 ix86_compare_op1
= hi
[1];
11369 ix86_expand_branch (code
, label
);
11375 /* Otherwise, we need two or three jumps. */
11377 label2
= gen_label_rtx ();
11380 code2
= swap_condition (code
);
11381 code3
= unsigned_condition (code
);
11385 case LT
: case GT
: case LTU
: case GTU
:
11388 case LE
: code1
= LT
; code2
= GT
; break;
11389 case GE
: code1
= GT
; code2
= LT
; break;
11390 case LEU
: code1
= LTU
; code2
= GTU
; break;
11391 case GEU
: code1
= GTU
; code2
= LTU
; break;
11393 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11394 case NE
: code2
= UNKNOWN
; break;
11397 gcc_unreachable ();
11402 * if (hi(a) < hi(b)) goto true;
11403 * if (hi(a) > hi(b)) goto false;
11404 * if (lo(a) < lo(b)) goto true;
11408 ix86_compare_op0
= hi
[0];
11409 ix86_compare_op1
= hi
[1];
11411 if (code1
!= UNKNOWN
)
11412 ix86_expand_branch (code1
, label
);
11413 if (code2
!= UNKNOWN
)
11414 ix86_expand_branch (code2
, label2
);
11416 ix86_compare_op0
= lo
[0];
11417 ix86_compare_op1
= lo
[1];
11418 ix86_expand_branch (code3
, label
);
11420 if (code2
!= UNKNOWN
)
11421 emit_label (label2
);
11426 gcc_unreachable ();
11430 /* Split branch based on floating point condition. */
11432 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11433 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11435 rtx second
, bypass
;
11436 rtx label
= NULL_RTX
;
11438 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11441 if (target2
!= pc_rtx
)
11444 code
= reverse_condition_maybe_unordered (code
);
11449 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11450 tmp
, &second
, &bypass
);
11452 /* Remove pushed operand from stack. */
11454 ix86_free_from_memory (GET_MODE (pushed
));
11456 if (split_branch_probability
>= 0)
11458 /* Distribute the probabilities across the jumps.
11459 Assume the BYPASS and SECOND to be always test
11461 probability
= split_branch_probability
;
11463 /* Value of 1 is low enough to make no need for probability
11464 to be updated. Later we may run some experiments and see
11465 if unordered values are more frequent in practice. */
11467 bypass_probability
= 1;
11469 second_probability
= 1;
11471 if (bypass
!= NULL_RTX
)
11473 label
= gen_label_rtx ();
11474 i
= emit_jump_insn (gen_rtx_SET
11476 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11478 gen_rtx_LABEL_REF (VOIDmode
,
11481 if (bypass_probability
>= 0)
11483 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11484 GEN_INT (bypass_probability
),
11487 i
= emit_jump_insn (gen_rtx_SET
11489 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11490 condition
, target1
, target2
)));
11491 if (probability
>= 0)
11493 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11494 GEN_INT (probability
),
11496 if (second
!= NULL_RTX
)
11498 i
= emit_jump_insn (gen_rtx_SET
11500 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11502 if (second_probability
>= 0)
11504 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11505 GEN_INT (second_probability
),
11508 if (label
!= NULL_RTX
)
11509 emit_label (label
);
11513 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11515 rtx ret
, tmp
, tmpreg
, equiv
;
11516 rtx second_test
, bypass_test
;
11518 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11519 return 0; /* FAIL */
11521 gcc_assert (GET_MODE (dest
) == QImode
);
11523 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11524 PUT_MODE (ret
, QImode
);
11529 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11530 if (bypass_test
|| second_test
)
11532 rtx test
= second_test
;
11534 rtx tmp2
= gen_reg_rtx (QImode
);
11537 gcc_assert (!second_test
);
11538 test
= bypass_test
;
11540 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11542 PUT_MODE (test
, QImode
);
11543 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11546 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11548 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11551 /* Attach a REG_EQUAL note describing the comparison result. */
11552 if (ix86_compare_op0
&& ix86_compare_op1
)
11554 equiv
= simplify_gen_relational (code
, QImode
,
11555 GET_MODE (ix86_compare_op0
),
11556 ix86_compare_op0
, ix86_compare_op1
);
11557 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11560 return 1; /* DONE */
11563 /* Expand comparison setting or clearing carry flag. Return true when
11564 successful and set pop for the operation. */
11566 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11568 enum machine_mode mode
=
11569 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11571 /* Do not handle DImode compares that go through special path. Also we can't
11572 deal with FP compares yet. This is possible to add. */
11573 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11575 if (FLOAT_MODE_P (mode
))
11577 rtx second_test
= NULL
, bypass_test
= NULL
;
11578 rtx compare_op
, compare_seq
;
11580 /* Shortcut: following common codes never translate into carry flag compares. */
11581 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11582 || code
== ORDERED
|| code
== UNORDERED
)
11585 /* These comparisons require zero flag; swap operands so they won't. */
11586 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11587 && !TARGET_IEEE_FP
)
11592 code
= swap_condition (code
);
11595 /* Try to expand the comparison and verify that we end up with carry flag
11596 based comparison. This is fails to be true only when we decide to expand
11597 comparison using arithmetic that is not too common scenario. */
11599 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11600 &second_test
, &bypass_test
);
11601 compare_seq
= get_insns ();
11604 if (second_test
|| bypass_test
)
11606 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11607 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11608 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11610 code
= GET_CODE (compare_op
);
11611 if (code
!= LTU
&& code
!= GEU
)
11613 emit_insn (compare_seq
);
11617 if (!INTEGRAL_MODE_P (mode
))
11625 /* Convert a==0 into (unsigned)a<1. */
11628 if (op1
!= const0_rtx
)
11631 code
= (code
== EQ
? LTU
: GEU
);
11634 /* Convert a>b into b<a or a>=b-1. */
11637 if (CONST_INT_P (op1
))
11639 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11640 /* Bail out on overflow. We still can swap operands but that
11641 would force loading of the constant into register. */
11642 if (op1
== const0_rtx
11643 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11645 code
= (code
== GTU
? GEU
: LTU
);
11652 code
= (code
== GTU
? LTU
: GEU
);
11656 /* Convert a>=0 into (unsigned)a<0x80000000. */
11659 if (mode
== DImode
|| op1
!= const0_rtx
)
11661 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11662 code
= (code
== LT
? GEU
: LTU
);
11666 if (mode
== DImode
|| op1
!= constm1_rtx
)
11668 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11669 code
= (code
== LE
? GEU
: LTU
);
11675 /* Swapping operands may cause constant to appear as first operand. */
11676 if (!nonimmediate_operand (op0
, VOIDmode
))
11678 if (no_new_pseudos
)
11680 op0
= force_reg (mode
, op0
);
11682 ix86_compare_op0
= op0
;
11683 ix86_compare_op1
= op1
;
11684 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11685 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11690 ix86_expand_int_movcc (rtx operands
[])
11692 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11693 rtx compare_seq
, compare_op
;
11694 rtx second_test
, bypass_test
;
11695 enum machine_mode mode
= GET_MODE (operands
[0]);
11696 bool sign_bit_compare_p
= false;;
11699 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11700 compare_seq
= get_insns ();
11703 compare_code
= GET_CODE (compare_op
);
11705 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11706 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11707 sign_bit_compare_p
= true;
11709 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11710 HImode insns, we'd be swallowed in word prefix ops. */
11712 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11713 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11714 && CONST_INT_P (operands
[2])
11715 && CONST_INT_P (operands
[3]))
11717 rtx out
= operands
[0];
11718 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11719 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11720 HOST_WIDE_INT diff
;
11723 /* Sign bit compares are better done using shifts than we do by using
11725 if (sign_bit_compare_p
11726 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11727 ix86_compare_op1
, &compare_op
))
11729 /* Detect overlap between destination and compare sources. */
11732 if (!sign_bit_compare_p
)
11734 bool fpcmp
= false;
11736 compare_code
= GET_CODE (compare_op
);
11738 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11739 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11742 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11745 /* To simplify rest of code, restrict to the GEU case. */
11746 if (compare_code
== LTU
)
11748 HOST_WIDE_INT tmp
= ct
;
11751 compare_code
= reverse_condition (compare_code
);
11752 code
= reverse_condition (code
);
11757 PUT_CODE (compare_op
,
11758 reverse_condition_maybe_unordered
11759 (GET_CODE (compare_op
)));
11761 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11765 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11766 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11767 tmp
= gen_reg_rtx (mode
);
11769 if (mode
== DImode
)
11770 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11772 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11776 if (code
== GT
|| code
== GE
)
11777 code
= reverse_condition (code
);
11780 HOST_WIDE_INT tmp
= ct
;
11785 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11786 ix86_compare_op1
, VOIDmode
, 0, -1);
11799 tmp
= expand_simple_binop (mode
, PLUS
,
11801 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11812 tmp
= expand_simple_binop (mode
, IOR
,
11814 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11816 else if (diff
== -1 && ct
)
11826 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11828 tmp
= expand_simple_binop (mode
, PLUS
,
11829 copy_rtx (tmp
), GEN_INT (cf
),
11830 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11838 * andl cf - ct, dest
11848 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11851 tmp
= expand_simple_binop (mode
, AND
,
11853 gen_int_mode (cf
- ct
, mode
),
11854 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11856 tmp
= expand_simple_binop (mode
, PLUS
,
11857 copy_rtx (tmp
), GEN_INT (ct
),
11858 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11861 if (!rtx_equal_p (tmp
, out
))
11862 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11864 return 1; /* DONE */
11870 tmp
= ct
, ct
= cf
, cf
= tmp
;
11872 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11874 /* We may be reversing unordered compare to normal compare, that
11875 is not valid in general (we may convert non-trapping condition
11876 to trapping one), however on i386 we currently emit all
11877 comparisons unordered. */
11878 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11879 code
= reverse_condition_maybe_unordered (code
);
11883 compare_code
= reverse_condition (compare_code
);
11884 code
= reverse_condition (code
);
11888 compare_code
= UNKNOWN
;
11889 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11890 && CONST_INT_P (ix86_compare_op1
))
11892 if (ix86_compare_op1
== const0_rtx
11893 && (code
== LT
|| code
== GE
))
11894 compare_code
= code
;
11895 else if (ix86_compare_op1
== constm1_rtx
)
11899 else if (code
== GT
)
11904 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11905 if (compare_code
!= UNKNOWN
11906 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11907 && (cf
== -1 || ct
== -1))
11909 /* If lea code below could be used, only optimize
11910 if it results in a 2 insn sequence. */
11912 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11913 || diff
== 3 || diff
== 5 || diff
== 9)
11914 || (compare_code
== LT
&& ct
== -1)
11915 || (compare_code
== GE
&& cf
== -1))
11918 * notl op1 (if necessary)
11926 code
= reverse_condition (code
);
11929 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11930 ix86_compare_op1
, VOIDmode
, 0, -1);
11932 out
= expand_simple_binop (mode
, IOR
,
11934 out
, 1, OPTAB_DIRECT
);
11935 if (out
!= operands
[0])
11936 emit_move_insn (operands
[0], out
);
11938 return 1; /* DONE */
11943 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11944 || diff
== 3 || diff
== 5 || diff
== 9)
11945 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11947 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11953 * lea cf(dest*(ct-cf)),dest
11957 * This also catches the degenerate setcc-only case.
11963 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11964 ix86_compare_op1
, VOIDmode
, 0, 1);
11967 /* On x86_64 the lea instruction operates on Pmode, so we need
11968 to get arithmetics done in proper mode to match. */
11970 tmp
= copy_rtx (out
);
11974 out1
= copy_rtx (out
);
11975 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11979 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11985 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11988 if (!rtx_equal_p (tmp
, out
))
11991 out
= force_operand (tmp
, copy_rtx (out
));
11993 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11995 if (!rtx_equal_p (out
, operands
[0]))
11996 emit_move_insn (operands
[0], copy_rtx (out
));
11998 return 1; /* DONE */
12002 * General case: Jumpful:
12003 * xorl dest,dest cmpl op1, op2
12004 * cmpl op1, op2 movl ct, dest
12005 * setcc dest jcc 1f
12006 * decl dest movl cf, dest
12007 * andl (cf-ct),dest 1:
12010 * Size 20. Size 14.
12012 * This is reasonably steep, but branch mispredict costs are
12013 * high on modern cpus, so consider failing only if optimizing
12017 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12018 && BRANCH_COST
>= 2)
12024 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
12025 /* We may be reversing unordered compare to normal compare,
12026 that is not valid in general (we may convert non-trapping
12027 condition to trapping one), however on i386 we currently
12028 emit all comparisons unordered. */
12029 code
= reverse_condition_maybe_unordered (code
);
12032 code
= reverse_condition (code
);
12033 if (compare_code
!= UNKNOWN
)
12034 compare_code
= reverse_condition (compare_code
);
12038 if (compare_code
!= UNKNOWN
)
12040 /* notl op1 (if needed)
12045 For x < 0 (resp. x <= -1) there will be no notl,
12046 so if possible swap the constants to get rid of the
12048 True/false will be -1/0 while code below (store flag
12049 followed by decrement) is 0/-1, so the constants need
12050 to be exchanged once more. */
12052 if (compare_code
== GE
|| !cf
)
12054 code
= reverse_condition (code
);
12059 HOST_WIDE_INT tmp
= cf
;
12064 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12065 ix86_compare_op1
, VOIDmode
, 0, -1);
12069 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12070 ix86_compare_op1
, VOIDmode
, 0, 1);
12072 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12073 copy_rtx (out
), 1, OPTAB_DIRECT
);
12076 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12077 gen_int_mode (cf
- ct
, mode
),
12078 copy_rtx (out
), 1, OPTAB_DIRECT
);
12080 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12081 copy_rtx (out
), 1, OPTAB_DIRECT
);
12082 if (!rtx_equal_p (out
, operands
[0]))
12083 emit_move_insn (operands
[0], copy_rtx (out
));
12085 return 1; /* DONE */
12089 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12091 /* Try a few things more with specific constants and a variable. */
12094 rtx var
, orig_out
, out
, tmp
;
12096 if (BRANCH_COST
<= 2)
12097 return 0; /* FAIL */
12099 /* If one of the two operands is an interesting constant, load a
12100 constant with the above and mask it in with a logical operation. */
12102 if (CONST_INT_P (operands
[2]))
12105 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12106 operands
[3] = constm1_rtx
, op
= and_optab
;
12107 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12108 operands
[3] = const0_rtx
, op
= ior_optab
;
12110 return 0; /* FAIL */
12112 else if (CONST_INT_P (operands
[3]))
12115 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12116 operands
[2] = constm1_rtx
, op
= and_optab
;
12117 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12118 operands
[2] = const0_rtx
, op
= ior_optab
;
12120 return 0; /* FAIL */
12123 return 0; /* FAIL */
12125 orig_out
= operands
[0];
12126 tmp
= gen_reg_rtx (mode
);
12129 /* Recurse to get the constant loaded. */
12130 if (ix86_expand_int_movcc (operands
) == 0)
12131 return 0; /* FAIL */
12133 /* Mask in the interesting variable. */
12134 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12136 if (!rtx_equal_p (out
, orig_out
))
12137 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12139 return 1; /* DONE */
12143 * For comparison with above,
12153 if (! nonimmediate_operand (operands
[2], mode
))
12154 operands
[2] = force_reg (mode
, operands
[2]);
12155 if (! nonimmediate_operand (operands
[3], mode
))
12156 operands
[3] = force_reg (mode
, operands
[3]);
12158 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12160 rtx tmp
= gen_reg_rtx (mode
);
12161 emit_move_insn (tmp
, operands
[3]);
12164 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12166 rtx tmp
= gen_reg_rtx (mode
);
12167 emit_move_insn (tmp
, operands
[2]);
12171 if (! register_operand (operands
[2], VOIDmode
)
12173 || ! register_operand (operands
[3], VOIDmode
)))
12174 operands
[2] = force_reg (mode
, operands
[2]);
12177 && ! register_operand (operands
[3], VOIDmode
))
12178 operands
[3] = force_reg (mode
, operands
[3]);
12180 emit_insn (compare_seq
);
12181 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12182 gen_rtx_IF_THEN_ELSE (mode
,
12183 compare_op
, operands
[2],
12186 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12187 gen_rtx_IF_THEN_ELSE (mode
,
12189 copy_rtx (operands
[3]),
12190 copy_rtx (operands
[0]))));
12192 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12193 gen_rtx_IF_THEN_ELSE (mode
,
12195 copy_rtx (operands
[2]),
12196 copy_rtx (operands
[0]))));
12198 return 1; /* DONE */
12201 /* Swap, force into registers, or otherwise massage the two operands
12202 to an sse comparison with a mask result. Thus we differ a bit from
12203 ix86_prepare_fp_compare_args which expects to produce a flags result.
12205 The DEST operand exists to help determine whether to commute commutative
12206 operators. The POP0/POP1 operands are updated in place. The new
12207 comparison code is returned, or UNKNOWN if not implementable. */
12209 static enum rtx_code
12210 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12211 rtx
*pop0
, rtx
*pop1
)
12219 /* We have no LTGT as an operator. We could implement it with
12220 NE & ORDERED, but this requires an extra temporary. It's
12221 not clear that it's worth it. */
12228 /* These are supported directly. */
12235 /* For commutative operators, try to canonicalize the destination
12236 operand to be first in the comparison - this helps reload to
12237 avoid extra moves. */
12238 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12246 /* These are not supported directly. Swap the comparison operands
12247 to transform into something that is supported. */
12251 code
= swap_condition (code
);
12255 gcc_unreachable ();
12261 /* Detect conditional moves that exactly match min/max operational
12262 semantics. Note that this is IEEE safe, as long as we don't
12263 interchange the operands.
12265 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12266 and TRUE if the operation is successful and instructions are emitted. */
12269 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12270 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12272 enum machine_mode mode
;
12278 else if (code
== UNGE
)
12281 if_true
= if_false
;
12287 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12289 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12294 mode
= GET_MODE (dest
);
12296 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12297 but MODE may be a vector mode and thus not appropriate. */
12298 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12300 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12303 if_true
= force_reg (mode
, if_true
);
12304 v
= gen_rtvec (2, if_true
, if_false
);
12305 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12309 code
= is_min
? SMIN
: SMAX
;
12310 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12313 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12317 /* Expand an sse vector comparison. Return the register with the result. */
12320 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12321 rtx op_true
, rtx op_false
)
12323 enum machine_mode mode
= GET_MODE (dest
);
12326 cmp_op0
= force_reg (mode
, cmp_op0
);
12327 if (!nonimmediate_operand (cmp_op1
, mode
))
12328 cmp_op1
= force_reg (mode
, cmp_op1
);
12331 || reg_overlap_mentioned_p (dest
, op_true
)
12332 || reg_overlap_mentioned_p (dest
, op_false
))
12333 dest
= gen_reg_rtx (mode
);
12335 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12336 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12341 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12342 operations. This is used for both scalar and vector conditional moves. */
12345 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12347 enum machine_mode mode
= GET_MODE (dest
);
12350 if (op_false
== CONST0_RTX (mode
))
12352 op_true
= force_reg (mode
, op_true
);
12353 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12354 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12356 else if (op_true
== CONST0_RTX (mode
))
12358 op_false
= force_reg (mode
, op_false
);
12359 x
= gen_rtx_NOT (mode
, cmp
);
12360 x
= gen_rtx_AND (mode
, x
, op_false
);
12361 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12365 op_true
= force_reg (mode
, op_true
);
12366 op_false
= force_reg (mode
, op_false
);
12368 t2
= gen_reg_rtx (mode
);
12370 t3
= gen_reg_rtx (mode
);
12374 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12375 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12377 x
= gen_rtx_NOT (mode
, cmp
);
12378 x
= gen_rtx_AND (mode
, x
, op_false
);
12379 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12381 x
= gen_rtx_IOR (mode
, t3
, t2
);
12382 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12386 /* Expand a floating-point conditional move. Return true if successful. */
12389 ix86_expand_fp_movcc (rtx operands
[])
12391 enum machine_mode mode
= GET_MODE (operands
[0]);
12392 enum rtx_code code
= GET_CODE (operands
[1]);
12393 rtx tmp
, compare_op
, second_test
, bypass_test
;
12395 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12397 enum machine_mode cmode
;
12399 /* Since we've no cmove for sse registers, don't force bad register
12400 allocation just to gain access to it. Deny movcc when the
12401 comparison mode doesn't match the move mode. */
12402 cmode
= GET_MODE (ix86_compare_op0
);
12403 if (cmode
== VOIDmode
)
12404 cmode
= GET_MODE (ix86_compare_op1
);
12408 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12410 &ix86_compare_op1
);
12411 if (code
== UNKNOWN
)
12414 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12415 ix86_compare_op1
, operands
[2],
12419 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12420 ix86_compare_op1
, operands
[2], operands
[3]);
12421 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12425 /* The floating point conditional move instructions don't directly
12426 support conditions resulting from a signed integer comparison. */
12428 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12430 /* The floating point conditional move instructions don't directly
12431 support signed integer comparisons. */
12433 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12435 gcc_assert (!second_test
&& !bypass_test
);
12436 tmp
= gen_reg_rtx (QImode
);
12437 ix86_expand_setcc (code
, tmp
);
12439 ix86_compare_op0
= tmp
;
12440 ix86_compare_op1
= const0_rtx
;
12441 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12443 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12445 tmp
= gen_reg_rtx (mode
);
12446 emit_move_insn (tmp
, operands
[3]);
12449 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12451 tmp
= gen_reg_rtx (mode
);
12452 emit_move_insn (tmp
, operands
[2]);
12456 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12457 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12458 operands
[2], operands
[3])));
12460 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12461 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12462 operands
[3], operands
[0])));
12464 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12465 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12466 operands
[2], operands
[0])));
12471 /* Expand a floating-point vector conditional move; a vcond operation
12472 rather than a movcc operation. */
12475 ix86_expand_fp_vcond (rtx operands
[])
12477 enum rtx_code code
= GET_CODE (operands
[3]);
12480 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12481 &operands
[4], &operands
[5]);
12482 if (code
== UNKNOWN
)
12485 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12486 operands
[5], operands
[1], operands
[2]))
12489 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12490 operands
[1], operands
[2]);
12491 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12495 /* Expand a signed integral vector conditional move. */
12498 ix86_expand_int_vcond (rtx operands
[])
12500 enum machine_mode mode
= GET_MODE (operands
[0]);
12501 enum rtx_code code
= GET_CODE (operands
[3]);
12502 bool negate
= false;
12505 cop0
= operands
[4];
12506 cop1
= operands
[5];
12508 /* Canonicalize the comparison to EQ, GT, GTU. */
12519 code
= reverse_condition (code
);
12525 code
= reverse_condition (code
);
12531 code
= swap_condition (code
);
12532 x
= cop0
, cop0
= cop1
, cop1
= x
;
12536 gcc_unreachable ();
12539 /* Unsigned parallel compare is not supported by the hardware. Play some
12540 tricks to turn this into a signed comparison against 0. */
12543 cop0
= force_reg (mode
, cop0
);
12551 /* Perform a parallel modulo subtraction. */
12552 t1
= gen_reg_rtx (mode
);
12553 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12555 /* Extract the original sign bit of op0. */
12556 mask
= GEN_INT (-0x80000000);
12557 mask
= gen_rtx_CONST_VECTOR (mode
,
12558 gen_rtvec (4, mask
, mask
, mask
, mask
));
12559 mask
= force_reg (mode
, mask
);
12560 t2
= gen_reg_rtx (mode
);
12561 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12563 /* XOR it back into the result of the subtraction. This results
12564 in the sign bit set iff we saw unsigned underflow. */
12565 x
= gen_reg_rtx (mode
);
12566 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12574 /* Perform a parallel unsigned saturating subtraction. */
12575 x
= gen_reg_rtx (mode
);
12576 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12577 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12584 gcc_unreachable ();
12588 cop1
= CONST0_RTX (mode
);
12591 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12592 operands
[1+negate
], operands
[2-negate
]);
12594 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12595 operands
[2-negate
]);
12599 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12600 true if we should do zero extension, else sign extension. HIGH_P is
12601 true if we want the N/2 high elements, else the low elements. */
12604 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12606 enum machine_mode imode
= GET_MODE (operands
[1]);
12607 rtx (*unpack
)(rtx
, rtx
, rtx
);
12614 unpack
= gen_vec_interleave_highv16qi
;
12616 unpack
= gen_vec_interleave_lowv16qi
;
12620 unpack
= gen_vec_interleave_highv8hi
;
12622 unpack
= gen_vec_interleave_lowv8hi
;
12626 unpack
= gen_vec_interleave_highv4si
;
12628 unpack
= gen_vec_interleave_lowv4si
;
12631 gcc_unreachable ();
12634 dest
= gen_lowpart (imode
, operands
[0]);
12637 se
= force_reg (imode
, CONST0_RTX (imode
));
12639 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12640 operands
[1], pc_rtx
, pc_rtx
);
12642 emit_insn (unpack (dest
, operands
[1], se
));
12645 /* Expand conditional increment or decrement using adb/sbb instructions.
12646 The default case using setcc followed by the conditional move can be
12647 done by generic code. */
12649 ix86_expand_int_addcc (rtx operands
[])
12651 enum rtx_code code
= GET_CODE (operands
[1]);
12653 rtx val
= const0_rtx
;
12654 bool fpcmp
= false;
12655 enum machine_mode mode
= GET_MODE (operands
[0]);
12657 if (operands
[3] != const1_rtx
12658 && operands
[3] != constm1_rtx
)
12660 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12661 ix86_compare_op1
, &compare_op
))
12663 code
= GET_CODE (compare_op
);
12665 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12666 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12669 code
= ix86_fp_compare_code_to_integer (code
);
12676 PUT_CODE (compare_op
,
12677 reverse_condition_maybe_unordered
12678 (GET_CODE (compare_op
)));
12680 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12682 PUT_MODE (compare_op
, mode
);
12684 /* Construct either adc or sbb insn. */
12685 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12687 switch (GET_MODE (operands
[0]))
12690 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12693 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12696 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12699 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12702 gcc_unreachable ();
12707 switch (GET_MODE (operands
[0]))
12710 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12713 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12716 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12719 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12722 gcc_unreachable ();
12725 return 1; /* DONE */
12729 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12730 works for floating pointer parameters and nonoffsetable memories.
12731 For pushes, it returns just stack offsets; the values will be saved
12732 in the right order. Maximally three parts are generated. */
12735 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12740 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12742 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12744 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12745 gcc_assert (size
>= 2 && size
<= 3);
12747 /* Optimize constant pool reference to immediates. This is used by fp
12748 moves, that force all constants to memory to allow combining. */
12749 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12751 rtx tmp
= maybe_get_pool_constant (operand
);
12756 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12758 /* The only non-offsetable memories we handle are pushes. */
12759 int ok
= push_operand (operand
, VOIDmode
);
12763 operand
= copy_rtx (operand
);
12764 PUT_MODE (operand
, Pmode
);
12765 parts
[0] = parts
[1] = parts
[2] = operand
;
12769 if (GET_CODE (operand
) == CONST_VECTOR
)
12771 enum machine_mode imode
= int_mode_for_mode (mode
);
12772 /* Caution: if we looked through a constant pool memory above,
12773 the operand may actually have a different mode now. That's
12774 ok, since we want to pun this all the way back to an integer. */
12775 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12776 gcc_assert (operand
!= NULL
);
12782 if (mode
== DImode
)
12783 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12786 if (REG_P (operand
))
12788 gcc_assert (reload_completed
);
12789 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12790 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12792 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12794 else if (offsettable_memref_p (operand
))
12796 operand
= adjust_address (operand
, SImode
, 0);
12797 parts
[0] = operand
;
12798 parts
[1] = adjust_address (operand
, SImode
, 4);
12800 parts
[2] = adjust_address (operand
, SImode
, 8);
12802 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12807 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12811 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12812 parts
[2] = gen_int_mode (l
[2], SImode
);
12815 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12818 gcc_unreachable ();
12820 parts
[1] = gen_int_mode (l
[1], SImode
);
12821 parts
[0] = gen_int_mode (l
[0], SImode
);
12824 gcc_unreachable ();
12829 if (mode
== TImode
)
12830 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12831 if (mode
== XFmode
|| mode
== TFmode
)
12833 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12834 if (REG_P (operand
))
12836 gcc_assert (reload_completed
);
12837 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12838 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12840 else if (offsettable_memref_p (operand
))
12842 operand
= adjust_address (operand
, DImode
, 0);
12843 parts
[0] = operand
;
12844 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12846 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12851 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12852 real_to_target (l
, &r
, mode
);
12854 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12855 if (HOST_BITS_PER_WIDE_INT
>= 64)
12858 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12859 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12862 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12864 if (upper_mode
== SImode
)
12865 parts
[1] = gen_int_mode (l
[2], SImode
);
12866 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12869 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12870 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12873 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12876 gcc_unreachable ();
12883 /* Emit insns to perform a move or push of DI, DF, and XF values.
12884 Return false when normal moves are needed; true when all required
12885 insns have been emitted. Operands 2-4 contain the input values
12886 int the correct order; operands 5-7 contain the output values. */
12889 ix86_split_long_move (rtx operands
[])
12894 int collisions
= 0;
12895 enum machine_mode mode
= GET_MODE (operands
[0]);
12897 /* The DFmode expanders may ask us to move double.
12898 For 64bit target this is single move. By hiding the fact
12899 here we simplify i386.md splitters. */
12900 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12902 /* Optimize constant pool reference to immediates. This is used by
12903 fp moves, that force all constants to memory to allow combining. */
12905 if (MEM_P (operands
[1])
12906 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12907 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12908 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12909 if (push_operand (operands
[0], VOIDmode
))
12911 operands
[0] = copy_rtx (operands
[0]);
12912 PUT_MODE (operands
[0], Pmode
);
12915 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12916 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12917 emit_move_insn (operands
[0], operands
[1]);
12921 /* The only non-offsettable memory we handle is push. */
12922 if (push_operand (operands
[0], VOIDmode
))
12925 gcc_assert (!MEM_P (operands
[0])
12926 || offsettable_memref_p (operands
[0]));
12928 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12929 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12931 /* When emitting push, take care for source operands on the stack. */
12932 if (push
&& MEM_P (operands
[1])
12933 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12936 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12937 XEXP (part
[1][2], 0));
12938 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12939 XEXP (part
[1][1], 0));
12942 /* We need to do copy in the right order in case an address register
12943 of the source overlaps the destination. */
12944 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12946 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12948 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12951 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12954 /* Collision in the middle part can be handled by reordering. */
12955 if (collisions
== 1 && nparts
== 3
12956 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12959 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12960 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12963 /* If there are more collisions, we can't handle it by reordering.
12964 Do an lea to the last part and use only one colliding move. */
12965 else if (collisions
> 1)
12971 base
= part
[0][nparts
- 1];
12973 /* Handle the case when the last part isn't valid for lea.
12974 Happens in 64-bit mode storing the 12-byte XFmode. */
12975 if (GET_MODE (base
) != Pmode
)
12976 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12978 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12979 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12980 part
[1][1] = replace_equiv_address (part
[1][1],
12981 plus_constant (base
, UNITS_PER_WORD
));
12983 part
[1][2] = replace_equiv_address (part
[1][2],
12984 plus_constant (base
, 8));
12994 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12995 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12996 emit_move_insn (part
[0][2], part
[1][2]);
13001 /* In 64bit mode we don't have 32bit push available. In case this is
13002 register, it is OK - we will just use larger counterpart. We also
13003 retype memory - these comes from attempt to avoid REX prefix on
13004 moving of second half of TFmode value. */
13005 if (GET_MODE (part
[1][1]) == SImode
)
13007 switch (GET_CODE (part
[1][1]))
13010 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13014 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13018 gcc_unreachable ();
13021 if (GET_MODE (part
[1][0]) == SImode
)
13022 part
[1][0] = part
[1][1];
13025 emit_move_insn (part
[0][1], part
[1][1]);
13026 emit_move_insn (part
[0][0], part
[1][0]);
13030 /* Choose correct order to not overwrite the source before it is copied. */
13031 if ((REG_P (part
[0][0])
13032 && REG_P (part
[1][1])
13033 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13035 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13037 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13041 operands
[2] = part
[0][2];
13042 operands
[3] = part
[0][1];
13043 operands
[4] = part
[0][0];
13044 operands
[5] = part
[1][2];
13045 operands
[6] = part
[1][1];
13046 operands
[7] = part
[1][0];
13050 operands
[2] = part
[0][1];
13051 operands
[3] = part
[0][0];
13052 operands
[5] = part
[1][1];
13053 operands
[6] = part
[1][0];
13060 operands
[2] = part
[0][0];
13061 operands
[3] = part
[0][1];
13062 operands
[4] = part
[0][2];
13063 operands
[5] = part
[1][0];
13064 operands
[6] = part
[1][1];
13065 operands
[7] = part
[1][2];
13069 operands
[2] = part
[0][0];
13070 operands
[3] = part
[0][1];
13071 operands
[5] = part
[1][0];
13072 operands
[6] = part
[1][1];
13076 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13079 if (CONST_INT_P (operands
[5])
13080 && operands
[5] != const0_rtx
13081 && REG_P (operands
[2]))
13083 if (CONST_INT_P (operands
[6])
13084 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13085 operands
[6] = operands
[2];
13088 && CONST_INT_P (operands
[7])
13089 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13090 operands
[7] = operands
[2];
13094 && CONST_INT_P (operands
[6])
13095 && operands
[6] != const0_rtx
13096 && REG_P (operands
[3])
13097 && CONST_INT_P (operands
[7])
13098 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13099 operands
[7] = operands
[3];
13102 emit_move_insn (operands
[2], operands
[5]);
13103 emit_move_insn (operands
[3], operands
[6]);
13105 emit_move_insn (operands
[4], operands
[7]);
13110 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13111 left shift by a constant, either using a single shift or
13112 a sequence of add instructions. */
13115 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13119 emit_insn ((mode
== DImode
13121 : gen_adddi3
) (operand
, operand
, operand
));
13123 else if (!optimize_size
13124 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13127 for (i
=0; i
<count
; i
++)
13129 emit_insn ((mode
== DImode
13131 : gen_adddi3
) (operand
, operand
, operand
));
13135 emit_insn ((mode
== DImode
13137 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13141 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13143 rtx low
[2], high
[2];
13145 const int single_width
= mode
== DImode
? 32 : 64;
13147 if (CONST_INT_P (operands
[2]))
13149 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13150 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13152 if (count
>= single_width
)
13154 emit_move_insn (high
[0], low
[1]);
13155 emit_move_insn (low
[0], const0_rtx
);
13157 if (count
> single_width
)
13158 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13162 if (!rtx_equal_p (operands
[0], operands
[1]))
13163 emit_move_insn (operands
[0], operands
[1]);
13164 emit_insn ((mode
== DImode
13166 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13167 ix86_expand_ashl_const (low
[0], count
, mode
);
13172 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13174 if (operands
[1] == const1_rtx
)
13176 /* Assuming we've chosen a QImode capable registers, then 1 << N
13177 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13178 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13180 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13182 ix86_expand_clear (low
[0]);
13183 ix86_expand_clear (high
[0]);
13184 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13186 d
= gen_lowpart (QImode
, low
[0]);
13187 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13188 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13189 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13191 d
= gen_lowpart (QImode
, high
[0]);
13192 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13193 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13194 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13197 /* Otherwise, we can get the same results by manually performing
13198 a bit extract operation on bit 5/6, and then performing the two
13199 shifts. The two methods of getting 0/1 into low/high are exactly
13200 the same size. Avoiding the shift in the bit extract case helps
13201 pentium4 a bit; no one else seems to care much either way. */
13206 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13207 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13209 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13210 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13212 emit_insn ((mode
== DImode
13214 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13215 emit_insn ((mode
== DImode
13217 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13218 emit_move_insn (low
[0], high
[0]);
13219 emit_insn ((mode
== DImode
13221 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13224 emit_insn ((mode
== DImode
13226 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13227 emit_insn ((mode
== DImode
13229 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13233 if (operands
[1] == constm1_rtx
)
13235 /* For -1 << N, we can avoid the shld instruction, because we
13236 know that we're shifting 0...31/63 ones into a -1. */
13237 emit_move_insn (low
[0], constm1_rtx
);
13239 emit_move_insn (high
[0], low
[0]);
13241 emit_move_insn (high
[0], constm1_rtx
);
13245 if (!rtx_equal_p (operands
[0], operands
[1]))
13246 emit_move_insn (operands
[0], operands
[1]);
13248 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13249 emit_insn ((mode
== DImode
13251 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13254 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13256 if (TARGET_CMOVE
&& scratch
)
13258 ix86_expand_clear (scratch
);
13259 emit_insn ((mode
== DImode
13260 ? gen_x86_shift_adj_1
13261 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13264 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13268 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13270 rtx low
[2], high
[2];
13272 const int single_width
= mode
== DImode
? 32 : 64;
13274 if (CONST_INT_P (operands
[2]))
13276 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13277 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13279 if (count
== single_width
* 2 - 1)
13281 emit_move_insn (high
[0], high
[1]);
13282 emit_insn ((mode
== DImode
13284 : gen_ashrdi3
) (high
[0], high
[0],
13285 GEN_INT (single_width
- 1)));
13286 emit_move_insn (low
[0], high
[0]);
13289 else if (count
>= single_width
)
13291 emit_move_insn (low
[0], high
[1]);
13292 emit_move_insn (high
[0], low
[0]);
13293 emit_insn ((mode
== DImode
13295 : gen_ashrdi3
) (high
[0], high
[0],
13296 GEN_INT (single_width
- 1)));
13297 if (count
> single_width
)
13298 emit_insn ((mode
== DImode
13300 : gen_ashrdi3
) (low
[0], low
[0],
13301 GEN_INT (count
- single_width
)));
13305 if (!rtx_equal_p (operands
[0], operands
[1]))
13306 emit_move_insn (operands
[0], operands
[1]);
13307 emit_insn ((mode
== DImode
13309 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13310 emit_insn ((mode
== DImode
13312 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13317 if (!rtx_equal_p (operands
[0], operands
[1]))
13318 emit_move_insn (operands
[0], operands
[1]);
13320 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13322 emit_insn ((mode
== DImode
13324 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13325 emit_insn ((mode
== DImode
13327 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13329 if (TARGET_CMOVE
&& scratch
)
13331 emit_move_insn (scratch
, high
[0]);
13332 emit_insn ((mode
== DImode
13334 : gen_ashrdi3
) (scratch
, scratch
,
13335 GEN_INT (single_width
- 1)));
13336 emit_insn ((mode
== DImode
13337 ? gen_x86_shift_adj_1
13338 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13342 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13347 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13349 rtx low
[2], high
[2];
13351 const int single_width
= mode
== DImode
? 32 : 64;
13353 if (CONST_INT_P (operands
[2]))
13355 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13356 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13358 if (count
>= single_width
)
13360 emit_move_insn (low
[0], high
[1]);
13361 ix86_expand_clear (high
[0]);
13363 if (count
> single_width
)
13364 emit_insn ((mode
== DImode
13366 : gen_lshrdi3
) (low
[0], low
[0],
13367 GEN_INT (count
- single_width
)));
13371 if (!rtx_equal_p (operands
[0], operands
[1]))
13372 emit_move_insn (operands
[0], operands
[1]);
13373 emit_insn ((mode
== DImode
13375 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13376 emit_insn ((mode
== DImode
13378 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13383 if (!rtx_equal_p (operands
[0], operands
[1]))
13384 emit_move_insn (operands
[0], operands
[1]);
13386 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13388 emit_insn ((mode
== DImode
13390 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13391 emit_insn ((mode
== DImode
13393 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13395 /* Heh. By reversing the arguments, we can reuse this pattern. */
13396 if (TARGET_CMOVE
&& scratch
)
13398 ix86_expand_clear (scratch
);
13399 emit_insn ((mode
== DImode
13400 ? gen_x86_shift_adj_1
13401 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13405 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13409 /* Predict just emitted jump instruction to be taken with probability PROB. */
13411 predict_jump (int prob
)
13413 rtx insn
= get_last_insn ();
13414 gcc_assert (JUMP_P (insn
));
13416 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13421 /* Helper function for the string operations below. Dest VARIABLE whether
13422 it is aligned to VALUE bytes. If true, jump to the label. */
13424 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13426 rtx label
= gen_label_rtx ();
13427 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13428 if (GET_MODE (variable
) == DImode
)
13429 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13431 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13432 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13435 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13437 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13441 /* Adjust COUNTER by the VALUE. */
13443 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13445 if (GET_MODE (countreg
) == DImode
)
13446 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13448 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13451 /* Zero extend possibly SImode EXP to Pmode register. */
13453 ix86_zero_extend_to_Pmode (rtx exp
)
13456 if (GET_MODE (exp
) == VOIDmode
)
13457 return force_reg (Pmode
, exp
);
13458 if (GET_MODE (exp
) == Pmode
)
13459 return copy_to_mode_reg (Pmode
, exp
);
13460 r
= gen_reg_rtx (Pmode
);
13461 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13465 /* Divide COUNTREG by SCALE. */
13467 scale_counter (rtx countreg
, int scale
)
13470 rtx piece_size_mask
;
13474 if (CONST_INT_P (countreg
))
13475 return GEN_INT (INTVAL (countreg
) / scale
);
13476 gcc_assert (REG_P (countreg
));
13478 piece_size_mask
= GEN_INT (scale
- 1);
13479 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13480 GEN_INT (exact_log2 (scale
)),
13481 NULL
, 1, OPTAB_DIRECT
);
13485 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13486 for constant loop counts. */
13488 static enum machine_mode
13489 counter_mode (rtx count_exp
)
13491 if (GET_MODE (count_exp
) != VOIDmode
)
13492 return GET_MODE (count_exp
);
13493 if (GET_CODE (count_exp
) != CONST_INT
)
13495 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13500 /* When SRCPTR is non-NULL, output simple loop to move memory
13501 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13502 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13503 equivalent loop to set memory by VALUE (supposed to be in MODE).
13505 The size is rounded down to whole number of chunk size moved at once.
13506 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13510 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13511 rtx destptr
, rtx srcptr
, rtx value
,
13512 rtx count
, enum machine_mode mode
, int unroll
,
13515 rtx out_label
, top_label
, iter
, tmp
;
13516 enum machine_mode iter_mode
= counter_mode (count
);
13517 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13518 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13524 top_label
= gen_label_rtx ();
13525 out_label
= gen_label_rtx ();
13526 iter
= gen_reg_rtx (iter_mode
);
13528 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13529 NULL
, 1, OPTAB_DIRECT
);
13530 /* Those two should combine. */
13531 if (piece_size
== const1_rtx
)
13533 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13535 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13537 emit_move_insn (iter
, const0_rtx
);
13539 emit_label (top_label
);
13541 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13542 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13543 destmem
= change_address (destmem
, mode
, x_addr
);
13547 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13548 srcmem
= change_address (srcmem
, mode
, y_addr
);
13550 /* When unrolling for chips that reorder memory reads and writes,
13551 we can save registers by using single temporary.
13552 Also using 4 temporaries is overkill in 32bit mode. */
13553 if (!TARGET_64BIT
&& 0)
13555 for (i
= 0; i
< unroll
; i
++)
13560 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13562 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13564 emit_move_insn (destmem
, srcmem
);
13570 gcc_assert (unroll
<= 4);
13571 for (i
= 0; i
< unroll
; i
++)
13573 tmpreg
[i
] = gen_reg_rtx (mode
);
13577 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13579 emit_move_insn (tmpreg
[i
], srcmem
);
13581 for (i
= 0; i
< unroll
; i
++)
13586 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13588 emit_move_insn (destmem
, tmpreg
[i
]);
13593 for (i
= 0; i
< unroll
; i
++)
13597 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13598 emit_move_insn (destmem
, value
);
13601 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13602 true, OPTAB_LIB_WIDEN
);
13604 emit_move_insn (iter
, tmp
);
13606 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13608 if (expected_size
!= -1)
13610 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13611 if (expected_size
== 0)
13613 else if (expected_size
> REG_BR_PROB_BASE
)
13614 predict_jump (REG_BR_PROB_BASE
- 1);
13616 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13619 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13620 iter
= ix86_zero_extend_to_Pmode (iter
);
13621 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13622 true, OPTAB_LIB_WIDEN
);
13623 if (tmp
!= destptr
)
13624 emit_move_insn (destptr
, tmp
);
13627 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13628 true, OPTAB_LIB_WIDEN
);
13630 emit_move_insn (srcptr
, tmp
);
13632 emit_label (out_label
);
13635 /* Output "rep; mov" instruction.
13636 Arguments have same meaning as for previous function */
13638 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13639 rtx destptr
, rtx srcptr
,
13641 enum machine_mode mode
)
13647 /* If the size is known, it is shorter to use rep movs. */
13648 if (mode
== QImode
&& CONST_INT_P (count
)
13649 && !(INTVAL (count
) & 3))
13652 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13653 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13654 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13655 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13656 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13657 if (mode
!= QImode
)
13659 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13660 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13661 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13662 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13663 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13664 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13668 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13669 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13671 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13675 /* Output "rep; stos" instruction.
13676 Arguments have same meaning as for previous function */
13678 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13680 enum machine_mode mode
)
13685 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13686 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13687 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13688 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13689 if (mode
!= QImode
)
13691 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13692 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13693 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13696 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13697 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13701 emit_strmov (rtx destmem
, rtx srcmem
,
13702 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13704 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13705 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13706 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13709 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13711 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13712 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13715 if (CONST_INT_P (count
))
13717 HOST_WIDE_INT countval
= INTVAL (count
);
13720 if ((countval
& 0x10) && max_size
> 16)
13724 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13725 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13728 gcc_unreachable ();
13731 if ((countval
& 0x08) && max_size
> 8)
13734 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13737 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13738 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13742 if ((countval
& 0x04) && max_size
> 4)
13744 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13747 if ((countval
& 0x02) && max_size
> 2)
13749 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13752 if ((countval
& 0x01) && max_size
> 1)
13754 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13761 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13762 count
, 1, OPTAB_DIRECT
);
13763 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13764 count
, QImode
, 1, 4);
13768 /* When there are stringops, we can cheaply increase dest and src pointers.
13769 Otherwise we save code size by maintaining offset (zero is readily
13770 available from preceding rep operation) and using x86 addressing modes.
13772 if (TARGET_SINGLE_STRINGOP
)
13776 rtx label
= ix86_expand_aligntest (count
, 4, true);
13777 src
= change_address (srcmem
, SImode
, srcptr
);
13778 dest
= change_address (destmem
, SImode
, destptr
);
13779 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13780 emit_label (label
);
13781 LABEL_NUSES (label
) = 1;
13785 rtx label
= ix86_expand_aligntest (count
, 2, true);
13786 src
= change_address (srcmem
, HImode
, srcptr
);
13787 dest
= change_address (destmem
, HImode
, destptr
);
13788 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13789 emit_label (label
);
13790 LABEL_NUSES (label
) = 1;
13794 rtx label
= ix86_expand_aligntest (count
, 1, true);
13795 src
= change_address (srcmem
, QImode
, srcptr
);
13796 dest
= change_address (destmem
, QImode
, destptr
);
13797 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13798 emit_label (label
);
13799 LABEL_NUSES (label
) = 1;
13804 rtx offset
= force_reg (Pmode
, const0_rtx
);
13809 rtx label
= ix86_expand_aligntest (count
, 4, true);
13810 src
= change_address (srcmem
, SImode
, srcptr
);
13811 dest
= change_address (destmem
, SImode
, destptr
);
13812 emit_move_insn (dest
, src
);
13813 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13814 true, OPTAB_LIB_WIDEN
);
13816 emit_move_insn (offset
, tmp
);
13817 emit_label (label
);
13818 LABEL_NUSES (label
) = 1;
13822 rtx label
= ix86_expand_aligntest (count
, 2, true);
13823 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13824 src
= change_address (srcmem
, HImode
, tmp
);
13825 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13826 dest
= change_address (destmem
, HImode
, tmp
);
13827 emit_move_insn (dest
, src
);
13828 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13829 true, OPTAB_LIB_WIDEN
);
13831 emit_move_insn (offset
, tmp
);
13832 emit_label (label
);
13833 LABEL_NUSES (label
) = 1;
13837 rtx label
= ix86_expand_aligntest (count
, 1, true);
13838 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13839 src
= change_address (srcmem
, QImode
, tmp
);
13840 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13841 dest
= change_address (destmem
, QImode
, tmp
);
13842 emit_move_insn (dest
, src
);
13843 emit_label (label
);
13844 LABEL_NUSES (label
) = 1;
13849 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13851 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13852 rtx count
, int max_size
)
13855 expand_simple_binop (counter_mode (count
), AND
, count
,
13856 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13857 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13858 gen_lowpart (QImode
, value
), count
, QImode
,
13862 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13864 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13868 if (CONST_INT_P (count
))
13870 HOST_WIDE_INT countval
= INTVAL (count
);
13873 if ((countval
& 0x10) && max_size
> 16)
13877 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13878 emit_insn (gen_strset (destptr
, dest
, value
));
13879 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13880 emit_insn (gen_strset (destptr
, dest
, value
));
13883 gcc_unreachable ();
13886 if ((countval
& 0x08) && max_size
> 8)
13890 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13891 emit_insn (gen_strset (destptr
, dest
, value
));
13895 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13896 emit_insn (gen_strset (destptr
, dest
, value
));
13897 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13898 emit_insn (gen_strset (destptr
, dest
, value
));
13902 if ((countval
& 0x04) && max_size
> 4)
13904 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13905 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13908 if ((countval
& 0x02) && max_size
> 2)
13910 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13911 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13914 if ((countval
& 0x01) && max_size
> 1)
13916 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13917 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13924 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13929 rtx label
= ix86_expand_aligntest (count
, 16, true);
13932 dest
= change_address (destmem
, DImode
, destptr
);
13933 emit_insn (gen_strset (destptr
, dest
, value
));
13934 emit_insn (gen_strset (destptr
, dest
, value
));
13938 dest
= change_address (destmem
, SImode
, destptr
);
13939 emit_insn (gen_strset (destptr
, dest
, value
));
13940 emit_insn (gen_strset (destptr
, dest
, value
));
13941 emit_insn (gen_strset (destptr
, dest
, value
));
13942 emit_insn (gen_strset (destptr
, dest
, value
));
13944 emit_label (label
);
13945 LABEL_NUSES (label
) = 1;
13949 rtx label
= ix86_expand_aligntest (count
, 8, true);
13952 dest
= change_address (destmem
, DImode
, destptr
);
13953 emit_insn (gen_strset (destptr
, dest
, value
));
13957 dest
= change_address (destmem
, SImode
, destptr
);
13958 emit_insn (gen_strset (destptr
, dest
, value
));
13959 emit_insn (gen_strset (destptr
, dest
, value
));
13961 emit_label (label
);
13962 LABEL_NUSES (label
) = 1;
13966 rtx label
= ix86_expand_aligntest (count
, 4, true);
13967 dest
= change_address (destmem
, SImode
, destptr
);
13968 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13969 emit_label (label
);
13970 LABEL_NUSES (label
) = 1;
13974 rtx label
= ix86_expand_aligntest (count
, 2, true);
13975 dest
= change_address (destmem
, HImode
, destptr
);
13976 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13977 emit_label (label
);
13978 LABEL_NUSES (label
) = 1;
13982 rtx label
= ix86_expand_aligntest (count
, 1, true);
13983 dest
= change_address (destmem
, QImode
, destptr
);
13984 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13985 emit_label (label
);
13986 LABEL_NUSES (label
) = 1;
13990 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13991 DESIRED_ALIGNMENT. */
13993 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
13994 rtx destptr
, rtx srcptr
, rtx count
,
13995 int align
, int desired_alignment
)
13997 if (align
<= 1 && desired_alignment
> 1)
13999 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14000 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14001 destmem
= change_address (destmem
, QImode
, destptr
);
14002 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14003 ix86_adjust_counter (count
, 1);
14004 emit_label (label
);
14005 LABEL_NUSES (label
) = 1;
14007 if (align
<= 2 && desired_alignment
> 2)
14009 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14010 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14011 destmem
= change_address (destmem
, HImode
, destptr
);
14012 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14013 ix86_adjust_counter (count
, 2);
14014 emit_label (label
);
14015 LABEL_NUSES (label
) = 1;
14017 if (align
<= 4 && desired_alignment
> 4)
14019 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14020 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14021 destmem
= change_address (destmem
, SImode
, destptr
);
14022 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14023 ix86_adjust_counter (count
, 4);
14024 emit_label (label
);
14025 LABEL_NUSES (label
) = 1;
14027 gcc_assert (desired_alignment
<= 8);
14030 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14031 DESIRED_ALIGNMENT. */
14033 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14034 int align
, int desired_alignment
)
14036 if (align
<= 1 && desired_alignment
> 1)
14038 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14039 destmem
= change_address (destmem
, QImode
, destptr
);
14040 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14041 ix86_adjust_counter (count
, 1);
14042 emit_label (label
);
14043 LABEL_NUSES (label
) = 1;
14045 if (align
<= 2 && desired_alignment
> 2)
14047 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14048 destmem
= change_address (destmem
, HImode
, destptr
);
14049 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14050 ix86_adjust_counter (count
, 2);
14051 emit_label (label
);
14052 LABEL_NUSES (label
) = 1;
14054 if (align
<= 4 && desired_alignment
> 4)
14056 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14057 destmem
= change_address (destmem
, SImode
, destptr
);
14058 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14059 ix86_adjust_counter (count
, 4);
14060 emit_label (label
);
14061 LABEL_NUSES (label
) = 1;
14063 gcc_assert (desired_alignment
<= 8);
14066 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14067 static enum stringop_alg
14068 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14069 int *dynamic_check
)
14071 const struct stringop_algs
* algs
;
14073 *dynamic_check
= -1;
14075 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14077 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14078 if (stringop_alg
!= no_stringop
)
14079 return stringop_alg
;
14080 /* rep; movq or rep; movl is the smallest variant. */
14081 else if (optimize_size
)
14083 if (!count
|| (count
& 3))
14084 return rep_prefix_1_byte
;
14086 return rep_prefix_4_byte
;
14088 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14090 else if (expected_size
!= -1 && expected_size
< 4)
14091 return loop_1_byte
;
14092 else if (expected_size
!= -1)
14095 enum stringop_alg alg
= libcall
;
14096 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14098 gcc_assert (algs
->size
[i
].max
);
14099 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14101 if (algs
->size
[i
].alg
!= libcall
)
14102 alg
= algs
->size
[i
].alg
;
14103 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14104 last non-libcall inline algorithm. */
14105 if (TARGET_INLINE_ALL_STRINGOPS
)
14107 /* When the current size is best to be copied by a libcall,
14108 but we are still forced to inline, run the heuristic bellow
14109 that will pick code for medium sized blocks. */
14110 if (alg
!= libcall
)
14115 return algs
->size
[i
].alg
;
14118 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14120 /* When asked to inline the call anyway, try to pick meaningful choice.
14121 We look for maximal size of block that is faster to copy by hand and
14122 take blocks of at most of that size guessing that average size will
14123 be roughly half of the block.
14125 If this turns out to be bad, we might simply specify the preferred
14126 choice in ix86_costs. */
14127 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14128 && algs
->unknown_size
== libcall
)
14131 enum stringop_alg alg
;
14134 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14135 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14136 max
= algs
->size
[i
].max
;
14139 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14140 gcc_assert (*dynamic_check
== -1);
14141 gcc_assert (alg
!= libcall
);
14142 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14143 *dynamic_check
= max
;
14146 return algs
->unknown_size
;
14149 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14150 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14152 decide_alignment (int align
,
14153 enum stringop_alg alg
,
14156 int desired_align
= 0;
14160 gcc_unreachable ();
14162 case unrolled_loop
:
14163 desired_align
= GET_MODE_SIZE (Pmode
);
14165 case rep_prefix_8_byte
:
14168 case rep_prefix_4_byte
:
14169 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14170 copying whole cacheline at once. */
14171 if (TARGET_PENTIUMPRO
)
14176 case rep_prefix_1_byte
:
14177 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14178 copying whole cacheline at once. */
14179 if (TARGET_PENTIUMPRO
)
14193 if (desired_align
< align
)
14194 desired_align
= align
;
14195 if (expected_size
!= -1 && expected_size
< 4)
14196 desired_align
= align
;
14197 return desired_align
;
14200 /* Return the smallest power of 2 greater than VAL. */
14202 smallest_pow2_greater_than (int val
)
14210 /* Expand string move (memcpy) operation. Use i386 string operations when
14211 profitable. expand_clrmem contains similar code. The code depends upon
14212 architecture, block size and alignment, but always has the same
14215 1) Prologue guard: Conditional that jumps up to epilogues for small
14216 blocks that can be handled by epilogue alone. This is faster but
14217 also needed for correctness, since prologue assume the block is larger
14218 than the desired alignment.
14220 Optional dynamic check for size and libcall for large
14221 blocks is emitted here too, with -minline-stringops-dynamically.
14223 2) Prologue: copy first few bytes in order to get destination aligned
14224 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14225 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14226 We emit either a jump tree on power of two sized blocks, or a byte loop.
14228 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14229 with specified algorithm.
14231 4) Epilogue: code copying tail of the block that is too small to be
14232 handled by main body (or up to size guarded by prologue guard). */
14235 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14236 rtx expected_align_exp
, rtx expected_size_exp
)
14242 rtx jump_around_label
= NULL
;
14243 HOST_WIDE_INT align
= 1;
14244 unsigned HOST_WIDE_INT count
= 0;
14245 HOST_WIDE_INT expected_size
= -1;
14246 int size_needed
= 0, epilogue_size_needed
;
14247 int desired_align
= 0;
14248 enum stringop_alg alg
;
14251 if (CONST_INT_P (align_exp
))
14252 align
= INTVAL (align_exp
);
14253 /* i386 can do misaligned access on reasonably increased cost. */
14254 if (CONST_INT_P (expected_align_exp
)
14255 && INTVAL (expected_align_exp
) > align
)
14256 align
= INTVAL (expected_align_exp
);
14257 if (CONST_INT_P (count_exp
))
14258 count
= expected_size
= INTVAL (count_exp
);
14259 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14260 expected_size
= INTVAL (expected_size_exp
);
14262 /* Step 0: Decide on preferred algorithm, desired alignment and
14263 size of chunks to be copied by main loop. */
14265 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14266 desired_align
= decide_alignment (align
, alg
, expected_size
);
14268 if (!TARGET_ALIGN_STRINGOPS
)
14269 align
= desired_align
;
14271 if (alg
== libcall
)
14273 gcc_assert (alg
!= no_stringop
);
14275 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14276 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14277 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14282 gcc_unreachable ();
14284 size_needed
= GET_MODE_SIZE (Pmode
);
14286 case unrolled_loop
:
14287 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14289 case rep_prefix_8_byte
:
14292 case rep_prefix_4_byte
:
14295 case rep_prefix_1_byte
:
14301 epilogue_size_needed
= size_needed
;
14303 /* Step 1: Prologue guard. */
14305 /* Alignment code needs count to be in register. */
14306 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14308 enum machine_mode mode
= SImode
;
14309 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14311 count_exp
= force_reg (mode
, count_exp
);
14313 gcc_assert (desired_align
>= 1 && align
>= 1);
14315 /* Ensure that alignment prologue won't copy past end of block. */
14316 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14318 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14319 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14320 Make sure it is power of 2. */
14321 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14323 label
= gen_label_rtx ();
14324 emit_cmp_and_jump_insns (count_exp
,
14325 GEN_INT (epilogue_size_needed
),
14326 LTU
, 0, counter_mode (count_exp
), 1, label
);
14327 if (GET_CODE (count_exp
) == CONST_INT
)
14329 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14330 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14332 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14334 /* Emit code to decide on runtime whether library call or inline should be
14336 if (dynamic_check
!= -1)
14338 rtx hot_label
= gen_label_rtx ();
14339 jump_around_label
= gen_label_rtx ();
14340 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14341 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14342 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14343 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14344 emit_jump (jump_around_label
);
14345 emit_label (hot_label
);
14348 /* Step 2: Alignment prologue. */
14350 if (desired_align
> align
)
14352 /* Except for the first move in epilogue, we no longer know
14353 constant offset in aliasing info. It don't seems to worth
14354 the pain to maintain it for the first move, so throw away
14356 src
= change_address (src
, BLKmode
, srcreg
);
14357 dst
= change_address (dst
, BLKmode
, destreg
);
14358 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14361 if (label
&& size_needed
== 1)
14363 emit_label (label
);
14364 LABEL_NUSES (label
) = 1;
14368 /* Step 3: Main loop. */
14374 gcc_unreachable ();
14376 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14377 count_exp
, QImode
, 1, expected_size
);
14380 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14381 count_exp
, Pmode
, 1, expected_size
);
14383 case unrolled_loop
:
14384 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14385 registers for 4 temporaries anyway. */
14386 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14387 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14390 case rep_prefix_8_byte
:
14391 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14394 case rep_prefix_4_byte
:
14395 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14398 case rep_prefix_1_byte
:
14399 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14403 /* Adjust properly the offset of src and dest memory for aliasing. */
14404 if (CONST_INT_P (count_exp
))
14406 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14407 (count
/ size_needed
) * size_needed
);
14408 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14409 (count
/ size_needed
) * size_needed
);
14413 src
= change_address (src
, BLKmode
, srcreg
);
14414 dst
= change_address (dst
, BLKmode
, destreg
);
14417 /* Step 4: Epilogue to copy the remaining bytes. */
14421 /* When the main loop is done, COUNT_EXP might hold original count,
14422 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14423 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14424 bytes. Compensate if needed. */
14426 if (size_needed
< epilogue_size_needed
)
14429 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14430 GEN_INT (size_needed
- 1), count_exp
, 1,
14432 if (tmp
!= count_exp
)
14433 emit_move_insn (count_exp
, tmp
);
14435 emit_label (label
);
14436 LABEL_NUSES (label
) = 1;
14439 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14440 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14441 epilogue_size_needed
);
14442 if (jump_around_label
)
14443 emit_label (jump_around_label
);
14447 /* Helper function for memcpy. For QImode value 0xXY produce
14448 0xXYXYXYXY of wide specified by MODE. This is essentially
14449 a * 0x10101010, but we can do slightly better than
14450 synth_mult by unwinding the sequence by hand on CPUs with
14453 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14455 enum machine_mode valmode
= GET_MODE (val
);
14457 int nops
= mode
== DImode
? 3 : 2;
14459 gcc_assert (mode
== SImode
|| mode
== DImode
);
14460 if (val
== const0_rtx
)
14461 return copy_to_mode_reg (mode
, const0_rtx
);
14462 if (CONST_INT_P (val
))
14464 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14468 if (mode
== DImode
)
14469 v
|= (v
<< 16) << 16;
14470 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14473 if (valmode
== VOIDmode
)
14475 if (valmode
!= QImode
)
14476 val
= gen_lowpart (QImode
, val
);
14477 if (mode
== QImode
)
14479 if (!TARGET_PARTIAL_REG_STALL
)
14481 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14482 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14483 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14484 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14486 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14487 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14488 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14493 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14495 if (!TARGET_PARTIAL_REG_STALL
)
14496 if (mode
== SImode
)
14497 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14499 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14502 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14503 NULL
, 1, OPTAB_DIRECT
);
14505 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14507 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14508 NULL
, 1, OPTAB_DIRECT
);
14509 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14510 if (mode
== SImode
)
14512 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14513 NULL
, 1, OPTAB_DIRECT
);
14514 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14519 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14520 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14521 alignment from ALIGN to DESIRED_ALIGN. */
14523 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14528 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14529 promoted_val
= promote_duplicated_reg (DImode
, val
);
14530 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14531 promoted_val
= promote_duplicated_reg (SImode
, val
);
14532 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14533 promoted_val
= promote_duplicated_reg (HImode
, val
);
14535 promoted_val
= val
;
14537 return promoted_val
;
14540 /* Expand string clear operation (bzero). Use i386 string operations when
14541 profitable. See expand_movmem comment for explanation of individual
14542 steps performed. */
14544 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14545 rtx expected_align_exp
, rtx expected_size_exp
)
14550 rtx jump_around_label
= NULL
;
14551 HOST_WIDE_INT align
= 1;
14552 unsigned HOST_WIDE_INT count
= 0;
14553 HOST_WIDE_INT expected_size
= -1;
14554 int size_needed
= 0, epilogue_size_needed
;
14555 int desired_align
= 0;
14556 enum stringop_alg alg
;
14557 rtx promoted_val
= NULL
;
14558 bool force_loopy_epilogue
= false;
14561 if (CONST_INT_P (align_exp
))
14562 align
= INTVAL (align_exp
);
14563 /* i386 can do misaligned access on reasonably increased cost. */
14564 if (CONST_INT_P (expected_align_exp
)
14565 && INTVAL (expected_align_exp
) > align
)
14566 align
= INTVAL (expected_align_exp
);
14567 if (CONST_INT_P (count_exp
))
14568 count
= expected_size
= INTVAL (count_exp
);
14569 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14570 expected_size
= INTVAL (expected_size_exp
);
14572 /* Step 0: Decide on preferred algorithm, desired alignment and
14573 size of chunks to be copied by main loop. */
14575 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14576 desired_align
= decide_alignment (align
, alg
, expected_size
);
14578 if (!TARGET_ALIGN_STRINGOPS
)
14579 align
= desired_align
;
14581 if (alg
== libcall
)
14583 gcc_assert (alg
!= no_stringop
);
14585 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14586 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14591 gcc_unreachable ();
14593 size_needed
= GET_MODE_SIZE (Pmode
);
14595 case unrolled_loop
:
14596 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14598 case rep_prefix_8_byte
:
14601 case rep_prefix_4_byte
:
14604 case rep_prefix_1_byte
:
14609 epilogue_size_needed
= size_needed
;
14611 /* Step 1: Prologue guard. */
14613 /* Alignment code needs count to be in register. */
14614 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14616 enum machine_mode mode
= SImode
;
14617 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14619 count_exp
= force_reg (mode
, count_exp
);
14621 /* Do the cheap promotion to allow better CSE across the
14622 main loop and epilogue (ie one load of the big constant in the
14623 front of all code. */
14624 if (CONST_INT_P (val_exp
))
14625 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14626 desired_align
, align
);
14627 /* Ensure that alignment prologue won't copy past end of block. */
14628 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14630 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14631 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14632 Make sure it is power of 2. */
14633 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14635 /* To improve performance of small blocks, we jump around the VAL
14636 promoting mode. This mean that if the promoted VAL is not constant,
14637 we might not use it in the epilogue and have to use byte
14639 if (epilogue_size_needed
> 2 && !promoted_val
)
14640 force_loopy_epilogue
= true;
14641 label
= gen_label_rtx ();
14642 emit_cmp_and_jump_insns (count_exp
,
14643 GEN_INT (epilogue_size_needed
),
14644 LTU
, 0, counter_mode (count_exp
), 1, label
);
14645 if (GET_CODE (count_exp
) == CONST_INT
)
14647 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14648 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14650 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14652 if (dynamic_check
!= -1)
14654 rtx hot_label
= gen_label_rtx ();
14655 jump_around_label
= gen_label_rtx ();
14656 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14657 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14658 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14659 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14660 emit_jump (jump_around_label
);
14661 emit_label (hot_label
);
14664 /* Step 2: Alignment prologue. */
14666 /* Do the expensive promotion once we branched off the small blocks. */
14668 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14669 desired_align
, align
);
14670 gcc_assert (desired_align
>= 1 && align
>= 1);
14672 if (desired_align
> align
)
14674 /* Except for the first move in epilogue, we no longer know
14675 constant offset in aliasing info. It don't seems to worth
14676 the pain to maintain it for the first move, so throw away
14678 dst
= change_address (dst
, BLKmode
, destreg
);
14679 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14682 if (label
&& size_needed
== 1)
14684 emit_label (label
);
14685 LABEL_NUSES (label
) = 1;
14689 /* Step 3: Main loop. */
14695 gcc_unreachable ();
14697 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14698 count_exp
, QImode
, 1, expected_size
);
14701 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14702 count_exp
, Pmode
, 1, expected_size
);
14704 case unrolled_loop
:
14705 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14706 count_exp
, Pmode
, 4, expected_size
);
14708 case rep_prefix_8_byte
:
14709 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14712 case rep_prefix_4_byte
:
14713 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14716 case rep_prefix_1_byte
:
14717 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14721 /* Adjust properly the offset of src and dest memory for aliasing. */
14722 if (CONST_INT_P (count_exp
))
14723 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14724 (count
/ size_needed
) * size_needed
);
14726 dst
= change_address (dst
, BLKmode
, destreg
);
14728 /* Step 4: Epilogue to copy the remaining bytes. */
14732 /* When the main loop is done, COUNT_EXP might hold original count,
14733 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14734 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14735 bytes. Compensate if needed. */
14737 if (size_needed
< desired_align
- align
)
14740 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14741 GEN_INT (size_needed
- 1), count_exp
, 1,
14743 size_needed
= desired_align
- align
+ 1;
14744 if (tmp
!= count_exp
)
14745 emit_move_insn (count_exp
, tmp
);
14747 emit_label (label
);
14748 LABEL_NUSES (label
) = 1;
14750 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14752 if (force_loopy_epilogue
)
14753 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14756 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14759 if (jump_around_label
)
14760 emit_label (jump_around_label
);
14764 /* Expand strlen. */
14766 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14768 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14770 /* The generic case of strlen expander is long. Avoid it's
14771 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14773 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14774 && !TARGET_INLINE_ALL_STRINGOPS
14776 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14779 addr
= force_reg (Pmode
, XEXP (src
, 0));
14780 scratch1
= gen_reg_rtx (Pmode
);
14782 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14785 /* Well it seems that some optimizer does not combine a call like
14786 foo(strlen(bar), strlen(bar));
14787 when the move and the subtraction is done here. It does calculate
14788 the length just once when these instructions are done inside of
14789 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14790 often used and I use one fewer register for the lifetime of
14791 output_strlen_unroll() this is better. */
14793 emit_move_insn (out
, addr
);
14795 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14797 /* strlensi_unroll_1 returns the address of the zero at the end of
14798 the string, like memchr(), so compute the length by subtracting
14799 the start address. */
14801 emit_insn (gen_subdi3 (out
, out
, addr
));
14803 emit_insn (gen_subsi3 (out
, out
, addr
));
14808 scratch2
= gen_reg_rtx (Pmode
);
14809 scratch3
= gen_reg_rtx (Pmode
);
14810 scratch4
= force_reg (Pmode
, constm1_rtx
);
14812 emit_move_insn (scratch3
, addr
);
14813 eoschar
= force_reg (QImode
, eoschar
);
14815 src
= replace_equiv_address_nv (src
, scratch3
);
14817 /* If .md starts supporting :P, this can be done in .md. */
14818 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14819 scratch4
), UNSPEC_SCAS
);
14820 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14823 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14824 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14828 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14829 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14835 /* Expand the appropriate insns for doing strlen if not just doing
14838 out = result, initialized with the start address
14839 align_rtx = alignment of the address.
14840 scratch = scratch register, initialized with the startaddress when
14841 not aligned, otherwise undefined
14843 This is just the body. It needs the initializations mentioned above and
14844 some address computing at the end. These things are done in i386.md. */
14847 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14851 rtx align_2_label
= NULL_RTX
;
14852 rtx align_3_label
= NULL_RTX
;
14853 rtx align_4_label
= gen_label_rtx ();
14854 rtx end_0_label
= gen_label_rtx ();
14856 rtx tmpreg
= gen_reg_rtx (SImode
);
14857 rtx scratch
= gen_reg_rtx (SImode
);
14861 if (CONST_INT_P (align_rtx
))
14862 align
= INTVAL (align_rtx
);
14864 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14866 /* Is there a known alignment and is it less than 4? */
14869 rtx scratch1
= gen_reg_rtx (Pmode
);
14870 emit_move_insn (scratch1
, out
);
14871 /* Is there a known alignment and is it not 2? */
14874 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14875 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14877 /* Leave just the 3 lower bits. */
14878 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14879 NULL_RTX
, 0, OPTAB_WIDEN
);
14881 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14882 Pmode
, 1, align_4_label
);
14883 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14884 Pmode
, 1, align_2_label
);
14885 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14886 Pmode
, 1, align_3_label
);
14890 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14891 check if is aligned to 4 - byte. */
14893 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14894 NULL_RTX
, 0, OPTAB_WIDEN
);
14896 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14897 Pmode
, 1, align_4_label
);
14900 mem
= change_address (src
, QImode
, out
);
14902 /* Now compare the bytes. */
14904 /* Compare the first n unaligned byte on a byte per byte basis. */
14905 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14906 QImode
, 1, end_0_label
);
14908 /* Increment the address. */
14910 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14912 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14914 /* Not needed with an alignment of 2 */
14917 emit_label (align_2_label
);
14919 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14923 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14925 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14927 emit_label (align_3_label
);
14930 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14934 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14936 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14939 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14940 align this loop. It gives only huge programs, but does not help to
14942 emit_label (align_4_label
);
14944 mem
= change_address (src
, SImode
, out
);
14945 emit_move_insn (scratch
, mem
);
14947 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14949 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14951 /* This formula yields a nonzero result iff one of the bytes is zero.
14952 This saves three branches inside loop and many cycles. */
14954 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14955 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14956 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14957 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14958 gen_int_mode (0x80808080, SImode
)));
14959 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14964 rtx reg
= gen_reg_rtx (SImode
);
14965 rtx reg2
= gen_reg_rtx (Pmode
);
14966 emit_move_insn (reg
, tmpreg
);
14967 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14969 /* If zero is not in the first two bytes, move two bytes forward. */
14970 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14971 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14972 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14973 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14974 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14977 /* Emit lea manually to avoid clobbering of flags. */
14978 emit_insn (gen_rtx_SET (SImode
, reg2
,
14979 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14981 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14982 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14983 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14984 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
14991 rtx end_2_label
= gen_label_rtx ();
14992 /* Is zero in the first two bytes? */
14994 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14995 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14996 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
14997 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
14998 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15000 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15001 JUMP_LABEL (tmp
) = end_2_label
;
15003 /* Not in the first two. Move two bytes forward. */
15004 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15006 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15008 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15010 emit_label (end_2_label
);
15014 /* Avoid branch in fixing the byte. */
15015 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15016 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15017 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
15019 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15021 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15023 emit_label (end_0_label
);
15026 /* For given symbol (function) construct code to compute address of it's PLT
15027 entry in large x86-64 PIC model. */
15029 construct_plt_address (rtx symbol
)
15031 rtx tmp
= gen_reg_rtx (Pmode
);
15032 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15034 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15035 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15037 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15038 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15043 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15044 rtx callarg2 ATTRIBUTE_UNUSED
,
15045 rtx pop
, int sibcall
)
15047 rtx use
= NULL
, call
;
15049 if (pop
== const0_rtx
)
15051 gcc_assert (!TARGET_64BIT
|| !pop
);
15053 if (TARGET_MACHO
&& !TARGET_64BIT
)
15056 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15057 fnaddr
= machopic_indirect_call_target (fnaddr
);
15062 /* Static functions and indirect calls don't need the pic register. */
15063 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15064 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15065 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15066 use_reg (&use
, pic_offset_table_rtx
);
15069 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15071 rtx al
= gen_rtx_REG (QImode
, 0);
15072 emit_move_insn (al
, callarg2
);
15073 use_reg (&use
, al
);
15076 if (ix86_cmodel
== CM_LARGE_PIC
15077 && GET_CODE (fnaddr
) == MEM
15078 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15079 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15080 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15081 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15083 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15084 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15086 if (sibcall
&& TARGET_64BIT
15087 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15090 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15091 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15092 emit_move_insn (fnaddr
, addr
);
15093 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15096 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15098 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15101 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15102 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15103 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15106 call
= emit_call_insn (call
);
15108 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15112 /* Clear stack slot assignments remembered from previous functions.
15113 This is called from INIT_EXPANDERS once before RTL is emitted for each
15116 static struct machine_function
*
15117 ix86_init_machine_status (void)
15119 struct machine_function
*f
;
15121 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15122 f
->use_fast_prologue_epilogue_nregs
= -1;
15123 f
->tls_descriptor_call_expanded_p
= 0;
15128 /* Return a MEM corresponding to a stack slot with mode MODE.
15129 Allocate a new slot if necessary.
15131 The RTL for a function can have several slots available: N is
15132 which slot to use. */
15135 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15137 struct stack_local_entry
*s
;
15139 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15141 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15142 if (s
->mode
== mode
&& s
->n
== n
)
15143 return copy_rtx (s
->rtl
);
15145 s
= (struct stack_local_entry
*)
15146 ggc_alloc (sizeof (struct stack_local_entry
));
15149 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15151 s
->next
= ix86_stack_locals
;
15152 ix86_stack_locals
= s
;
15156 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15158 static GTY(()) rtx ix86_tls_symbol
;
15160 ix86_tls_get_addr (void)
15163 if (!ix86_tls_symbol
)
15165 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15166 (TARGET_ANY_GNU_TLS
15168 ? "___tls_get_addr"
15169 : "__tls_get_addr");
15172 return ix86_tls_symbol
;
15175 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15177 static GTY(()) rtx ix86_tls_module_base_symbol
;
15179 ix86_tls_module_base (void)
15182 if (!ix86_tls_module_base_symbol
)
15184 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15185 "_TLS_MODULE_BASE_");
15186 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15187 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15190 return ix86_tls_module_base_symbol
;
15193 /* Calculate the length of the memory address in the instruction
15194 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15197 memory_address_length (rtx addr
)
15199 struct ix86_address parts
;
15200 rtx base
, index
, disp
;
15204 if (GET_CODE (addr
) == PRE_DEC
15205 || GET_CODE (addr
) == POST_INC
15206 || GET_CODE (addr
) == PRE_MODIFY
15207 || GET_CODE (addr
) == POST_MODIFY
)
15210 ok
= ix86_decompose_address (addr
, &parts
);
15213 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15214 parts
.base
= SUBREG_REG (parts
.base
);
15215 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15216 parts
.index
= SUBREG_REG (parts
.index
);
15219 index
= parts
.index
;
15224 - esp as the base always wants an index,
15225 - ebp as the base always wants a displacement. */
15227 /* Register Indirect. */
15228 if (base
&& !index
&& !disp
)
15230 /* esp (for its index) and ebp (for its displacement) need
15231 the two-byte modrm form. */
15232 if (addr
== stack_pointer_rtx
15233 || addr
== arg_pointer_rtx
15234 || addr
== frame_pointer_rtx
15235 || addr
== hard_frame_pointer_rtx
)
15239 /* Direct Addressing. */
15240 else if (disp
&& !base
&& !index
)
15245 /* Find the length of the displacement constant. */
15248 if (base
&& satisfies_constraint_K (disp
))
15253 /* ebp always wants a displacement. */
15254 else if (base
== hard_frame_pointer_rtx
)
15257 /* An index requires the two-byte modrm form.... */
15259 /* ...like esp, which always wants an index. */
15260 || base
== stack_pointer_rtx
15261 || base
== arg_pointer_rtx
15262 || base
== frame_pointer_rtx
)
15269 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15270 is set, expect that insn have 8bit immediate alternative. */
15272 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15276 extract_insn_cached (insn
);
15277 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15278 if (CONSTANT_P (recog_data
.operand
[i
]))
15281 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15285 switch (get_attr_mode (insn
))
15296 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15301 fatal_insn ("unknown insn mode", insn
);
15307 /* Compute default value for "length_address" attribute. */
15309 ix86_attr_length_address_default (rtx insn
)
15313 if (get_attr_type (insn
) == TYPE_LEA
)
15315 rtx set
= PATTERN (insn
);
15317 if (GET_CODE (set
) == PARALLEL
)
15318 set
= XVECEXP (set
, 0, 0);
15320 gcc_assert (GET_CODE (set
) == SET
);
15322 return memory_address_length (SET_SRC (set
));
15325 extract_insn_cached (insn
);
15326 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15327 if (MEM_P (recog_data
.operand
[i
]))
15329 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15335 /* Return the maximum number of instructions a cpu can issue. */
15338 ix86_issue_rate (void)
15342 case PROCESSOR_PENTIUM
:
15346 case PROCESSOR_PENTIUMPRO
:
15347 case PROCESSOR_PENTIUM4
:
15348 case PROCESSOR_ATHLON
:
15350 case PROCESSOR_AMDFAM10
:
15351 case PROCESSOR_NOCONA
:
15352 case PROCESSOR_GENERIC32
:
15353 case PROCESSOR_GENERIC64
:
15356 case PROCESSOR_CORE2
:
15364 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15365 by DEP_INSN and nothing set by DEP_INSN. */
15368 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15372 /* Simplify the test for uninteresting insns. */
15373 if (insn_type
!= TYPE_SETCC
15374 && insn_type
!= TYPE_ICMOV
15375 && insn_type
!= TYPE_FCMOV
15376 && insn_type
!= TYPE_IBR
)
15379 if ((set
= single_set (dep_insn
)) != 0)
15381 set
= SET_DEST (set
);
15384 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15385 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15386 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15387 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15389 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15390 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15395 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15398 /* This test is true if the dependent insn reads the flags but
15399 not any other potentially set register. */
15400 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15403 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15409 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15410 address with operands set by DEP_INSN. */
15413 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15417 if (insn_type
== TYPE_LEA
15420 addr
= PATTERN (insn
);
15422 if (GET_CODE (addr
) == PARALLEL
)
15423 addr
= XVECEXP (addr
, 0, 0);
15425 gcc_assert (GET_CODE (addr
) == SET
);
15427 addr
= SET_SRC (addr
);
15432 extract_insn_cached (insn
);
15433 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15434 if (MEM_P (recog_data
.operand
[i
]))
15436 addr
= XEXP (recog_data
.operand
[i
], 0);
15443 return modified_in_p (addr
, dep_insn
);
15447 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15449 enum attr_type insn_type
, dep_insn_type
;
15450 enum attr_memory memory
;
15452 int dep_insn_code_number
;
15454 /* Anti and output dependencies have zero cost on all CPUs. */
15455 if (REG_NOTE_KIND (link
) != 0)
15458 dep_insn_code_number
= recog_memoized (dep_insn
);
15460 /* If we can't recognize the insns, we can't really do anything. */
15461 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15464 insn_type
= get_attr_type (insn
);
15465 dep_insn_type
= get_attr_type (dep_insn
);
15469 case PROCESSOR_PENTIUM
:
15470 /* Address Generation Interlock adds a cycle of latency. */
15471 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15474 /* ??? Compares pair with jump/setcc. */
15475 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15478 /* Floating point stores require value to be ready one cycle earlier. */
15479 if (insn_type
== TYPE_FMOV
15480 && get_attr_memory (insn
) == MEMORY_STORE
15481 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15485 case PROCESSOR_PENTIUMPRO
:
15486 memory
= get_attr_memory (insn
);
15488 /* INT->FP conversion is expensive. */
15489 if (get_attr_fp_int_src (dep_insn
))
15492 /* There is one cycle extra latency between an FP op and a store. */
15493 if (insn_type
== TYPE_FMOV
15494 && (set
= single_set (dep_insn
)) != NULL_RTX
15495 && (set2
= single_set (insn
)) != NULL_RTX
15496 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15497 && MEM_P (SET_DEST (set2
)))
15500 /* Show ability of reorder buffer to hide latency of load by executing
15501 in parallel with previous instruction in case
15502 previous instruction is not needed to compute the address. */
15503 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15504 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15506 /* Claim moves to take one cycle, as core can issue one load
15507 at time and the next load can start cycle later. */
15508 if (dep_insn_type
== TYPE_IMOV
15509 || dep_insn_type
== TYPE_FMOV
)
15517 memory
= get_attr_memory (insn
);
15519 /* The esp dependency is resolved before the instruction is really
15521 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15522 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15525 /* INT->FP conversion is expensive. */
15526 if (get_attr_fp_int_src (dep_insn
))
15529 /* Show ability of reorder buffer to hide latency of load by executing
15530 in parallel with previous instruction in case
15531 previous instruction is not needed to compute the address. */
15532 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15533 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15535 /* Claim moves to take one cycle, as core can issue one load
15536 at time and the next load can start cycle later. */
15537 if (dep_insn_type
== TYPE_IMOV
15538 || dep_insn_type
== TYPE_FMOV
)
15547 case PROCESSOR_ATHLON
:
15549 case PROCESSOR_AMDFAM10
:
15550 case PROCESSOR_GENERIC32
:
15551 case PROCESSOR_GENERIC64
:
15552 memory
= get_attr_memory (insn
);
15554 /* Show ability of reorder buffer to hide latency of load by executing
15555 in parallel with previous instruction in case
15556 previous instruction is not needed to compute the address. */
15557 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15558 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15560 enum attr_unit unit
= get_attr_unit (insn
);
15563 /* Because of the difference between the length of integer and
15564 floating unit pipeline preparation stages, the memory operands
15565 for floating point are cheaper.
15567 ??? For Athlon it the difference is most probably 2. */
15568 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15571 loadcost
= TARGET_ATHLON
? 2 : 0;
15573 if (cost
>= loadcost
)
15586 /* How many alternative schedules to try. This should be as wide as the
15587 scheduling freedom in the DFA, but no wider. Making this value too
15588 large results extra work for the scheduler. */
15591 ia32_multipass_dfa_lookahead (void)
15593 if (ix86_tune
== PROCESSOR_PENTIUM
)
15596 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15597 || ix86_tune
== PROCESSOR_K6
)
15605 /* Compute the alignment given to a constant that is being placed in memory.
15606 EXP is the constant and ALIGN is the alignment that the object would
15608 The value of this function is used instead of that alignment to align
15612 ix86_constant_alignment (tree exp
, int align
)
15614 if (TREE_CODE (exp
) == REAL_CST
)
15616 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15618 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15621 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15622 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15623 return BITS_PER_WORD
;
15628 /* Compute the alignment for a static variable.
15629 TYPE is the data type, and ALIGN is the alignment that
15630 the object would ordinarily have. The value of this function is used
15631 instead of that alignment to align the object. */
15634 ix86_data_alignment (tree type
, int align
)
15636 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15638 if (AGGREGATE_TYPE_P (type
)
15639 && TYPE_SIZE (type
)
15640 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15641 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15642 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15643 && align
< max_align
)
15646 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15647 to 16byte boundary. */
15650 if (AGGREGATE_TYPE_P (type
)
15651 && TYPE_SIZE (type
)
15652 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15653 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15654 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15658 if (TREE_CODE (type
) == ARRAY_TYPE
)
15660 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15662 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15665 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15668 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15670 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15673 else if ((TREE_CODE (type
) == RECORD_TYPE
15674 || TREE_CODE (type
) == UNION_TYPE
15675 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15676 && TYPE_FIELDS (type
))
15678 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15680 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15683 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15684 || TREE_CODE (type
) == INTEGER_TYPE
)
15686 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15688 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15695 /* Compute the alignment for a local variable.
15696 TYPE is the data type, and ALIGN is the alignment that
15697 the object would ordinarily have. The value of this macro is used
15698 instead of that alignment to align the object. */
15701 ix86_local_alignment (tree type
, int align
)
15703 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15704 to 16byte boundary. */
15707 if (AGGREGATE_TYPE_P (type
)
15708 && TYPE_SIZE (type
)
15709 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15710 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15711 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15714 if (TREE_CODE (type
) == ARRAY_TYPE
)
15716 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15718 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15721 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15723 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15725 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15728 else if ((TREE_CODE (type
) == RECORD_TYPE
15729 || TREE_CODE (type
) == UNION_TYPE
15730 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15731 && TYPE_FIELDS (type
))
15733 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15735 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15738 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15739 || TREE_CODE (type
) == INTEGER_TYPE
)
15742 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15744 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15750 /* Emit RTL insns to initialize the variable parts of a trampoline.
15751 FNADDR is an RTX for the address of the function's pure code.
15752 CXT is an RTX for the static chain value for the function. */
15754 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15758 /* Compute offset from the end of the jmp to the target function. */
15759 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15760 plus_constant (tramp
, 10),
15761 NULL_RTX
, 1, OPTAB_DIRECT
);
15762 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15763 gen_int_mode (0xb9, QImode
));
15764 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15765 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15766 gen_int_mode (0xe9, QImode
));
15767 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15772 /* Try to load address using shorter movl instead of movabs.
15773 We may want to support movq for kernel mode, but kernel does not use
15774 trampolines at the moment. */
15775 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15777 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15778 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15779 gen_int_mode (0xbb41, HImode
));
15780 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15781 gen_lowpart (SImode
, fnaddr
));
15786 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15787 gen_int_mode (0xbb49, HImode
));
15788 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15792 /* Load static chain using movabs to r10. */
15793 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15794 gen_int_mode (0xba49, HImode
));
15795 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15798 /* Jump to the r11 */
15799 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15800 gen_int_mode (0xff49, HImode
));
15801 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15802 gen_int_mode (0xe3, QImode
));
15804 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15807 #ifdef ENABLE_EXECUTE_STACK
15808 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15809 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15813 /* Codes for all the SSE/MMX builtins. */
15816 IX86_BUILTIN_ADDPS
,
15817 IX86_BUILTIN_ADDSS
,
15818 IX86_BUILTIN_DIVPS
,
15819 IX86_BUILTIN_DIVSS
,
15820 IX86_BUILTIN_MULPS
,
15821 IX86_BUILTIN_MULSS
,
15822 IX86_BUILTIN_SUBPS
,
15823 IX86_BUILTIN_SUBSS
,
15825 IX86_BUILTIN_CMPEQPS
,
15826 IX86_BUILTIN_CMPLTPS
,
15827 IX86_BUILTIN_CMPLEPS
,
15828 IX86_BUILTIN_CMPGTPS
,
15829 IX86_BUILTIN_CMPGEPS
,
15830 IX86_BUILTIN_CMPNEQPS
,
15831 IX86_BUILTIN_CMPNLTPS
,
15832 IX86_BUILTIN_CMPNLEPS
,
15833 IX86_BUILTIN_CMPNGTPS
,
15834 IX86_BUILTIN_CMPNGEPS
,
15835 IX86_BUILTIN_CMPORDPS
,
15836 IX86_BUILTIN_CMPUNORDPS
,
15837 IX86_BUILTIN_CMPEQSS
,
15838 IX86_BUILTIN_CMPLTSS
,
15839 IX86_BUILTIN_CMPLESS
,
15840 IX86_BUILTIN_CMPNEQSS
,
15841 IX86_BUILTIN_CMPNLTSS
,
15842 IX86_BUILTIN_CMPNLESS
,
15843 IX86_BUILTIN_CMPNGTSS
,
15844 IX86_BUILTIN_CMPNGESS
,
15845 IX86_BUILTIN_CMPORDSS
,
15846 IX86_BUILTIN_CMPUNORDSS
,
15848 IX86_BUILTIN_COMIEQSS
,
15849 IX86_BUILTIN_COMILTSS
,
15850 IX86_BUILTIN_COMILESS
,
15851 IX86_BUILTIN_COMIGTSS
,
15852 IX86_BUILTIN_COMIGESS
,
15853 IX86_BUILTIN_COMINEQSS
,
15854 IX86_BUILTIN_UCOMIEQSS
,
15855 IX86_BUILTIN_UCOMILTSS
,
15856 IX86_BUILTIN_UCOMILESS
,
15857 IX86_BUILTIN_UCOMIGTSS
,
15858 IX86_BUILTIN_UCOMIGESS
,
15859 IX86_BUILTIN_UCOMINEQSS
,
15861 IX86_BUILTIN_CVTPI2PS
,
15862 IX86_BUILTIN_CVTPS2PI
,
15863 IX86_BUILTIN_CVTSI2SS
,
15864 IX86_BUILTIN_CVTSI642SS
,
15865 IX86_BUILTIN_CVTSS2SI
,
15866 IX86_BUILTIN_CVTSS2SI64
,
15867 IX86_BUILTIN_CVTTPS2PI
,
15868 IX86_BUILTIN_CVTTSS2SI
,
15869 IX86_BUILTIN_CVTTSS2SI64
,
15871 IX86_BUILTIN_MAXPS
,
15872 IX86_BUILTIN_MAXSS
,
15873 IX86_BUILTIN_MINPS
,
15874 IX86_BUILTIN_MINSS
,
15876 IX86_BUILTIN_LOADUPS
,
15877 IX86_BUILTIN_STOREUPS
,
15878 IX86_BUILTIN_MOVSS
,
15880 IX86_BUILTIN_MOVHLPS
,
15881 IX86_BUILTIN_MOVLHPS
,
15882 IX86_BUILTIN_LOADHPS
,
15883 IX86_BUILTIN_LOADLPS
,
15884 IX86_BUILTIN_STOREHPS
,
15885 IX86_BUILTIN_STORELPS
,
15887 IX86_BUILTIN_MASKMOVQ
,
15888 IX86_BUILTIN_MOVMSKPS
,
15889 IX86_BUILTIN_PMOVMSKB
,
15891 IX86_BUILTIN_MOVNTPS
,
15892 IX86_BUILTIN_MOVNTQ
,
15894 IX86_BUILTIN_LOADDQU
,
15895 IX86_BUILTIN_STOREDQU
,
15897 IX86_BUILTIN_PACKSSWB
,
15898 IX86_BUILTIN_PACKSSDW
,
15899 IX86_BUILTIN_PACKUSWB
,
15901 IX86_BUILTIN_PADDB
,
15902 IX86_BUILTIN_PADDW
,
15903 IX86_BUILTIN_PADDD
,
15904 IX86_BUILTIN_PADDQ
,
15905 IX86_BUILTIN_PADDSB
,
15906 IX86_BUILTIN_PADDSW
,
15907 IX86_BUILTIN_PADDUSB
,
15908 IX86_BUILTIN_PADDUSW
,
15909 IX86_BUILTIN_PSUBB
,
15910 IX86_BUILTIN_PSUBW
,
15911 IX86_BUILTIN_PSUBD
,
15912 IX86_BUILTIN_PSUBQ
,
15913 IX86_BUILTIN_PSUBSB
,
15914 IX86_BUILTIN_PSUBSW
,
15915 IX86_BUILTIN_PSUBUSB
,
15916 IX86_BUILTIN_PSUBUSW
,
15919 IX86_BUILTIN_PANDN
,
15923 IX86_BUILTIN_PAVGB
,
15924 IX86_BUILTIN_PAVGW
,
15926 IX86_BUILTIN_PCMPEQB
,
15927 IX86_BUILTIN_PCMPEQW
,
15928 IX86_BUILTIN_PCMPEQD
,
15929 IX86_BUILTIN_PCMPGTB
,
15930 IX86_BUILTIN_PCMPGTW
,
15931 IX86_BUILTIN_PCMPGTD
,
15933 IX86_BUILTIN_PMADDWD
,
15935 IX86_BUILTIN_PMAXSW
,
15936 IX86_BUILTIN_PMAXUB
,
15937 IX86_BUILTIN_PMINSW
,
15938 IX86_BUILTIN_PMINUB
,
15940 IX86_BUILTIN_PMULHUW
,
15941 IX86_BUILTIN_PMULHW
,
15942 IX86_BUILTIN_PMULLW
,
15944 IX86_BUILTIN_PSADBW
,
15945 IX86_BUILTIN_PSHUFW
,
15947 IX86_BUILTIN_PSLLW
,
15948 IX86_BUILTIN_PSLLD
,
15949 IX86_BUILTIN_PSLLQ
,
15950 IX86_BUILTIN_PSRAW
,
15951 IX86_BUILTIN_PSRAD
,
15952 IX86_BUILTIN_PSRLW
,
15953 IX86_BUILTIN_PSRLD
,
15954 IX86_BUILTIN_PSRLQ
,
15955 IX86_BUILTIN_PSLLWI
,
15956 IX86_BUILTIN_PSLLDI
,
15957 IX86_BUILTIN_PSLLQI
,
15958 IX86_BUILTIN_PSRAWI
,
15959 IX86_BUILTIN_PSRADI
,
15960 IX86_BUILTIN_PSRLWI
,
15961 IX86_BUILTIN_PSRLDI
,
15962 IX86_BUILTIN_PSRLQI
,
15964 IX86_BUILTIN_PUNPCKHBW
,
15965 IX86_BUILTIN_PUNPCKHWD
,
15966 IX86_BUILTIN_PUNPCKHDQ
,
15967 IX86_BUILTIN_PUNPCKLBW
,
15968 IX86_BUILTIN_PUNPCKLWD
,
15969 IX86_BUILTIN_PUNPCKLDQ
,
15971 IX86_BUILTIN_SHUFPS
,
15973 IX86_BUILTIN_RCPPS
,
15974 IX86_BUILTIN_RCPSS
,
15975 IX86_BUILTIN_RSQRTPS
,
15976 IX86_BUILTIN_RSQRTSS
,
15977 IX86_BUILTIN_SQRTPS
,
15978 IX86_BUILTIN_SQRTSS
,
15980 IX86_BUILTIN_UNPCKHPS
,
15981 IX86_BUILTIN_UNPCKLPS
,
15983 IX86_BUILTIN_ANDPS
,
15984 IX86_BUILTIN_ANDNPS
,
15986 IX86_BUILTIN_XORPS
,
15989 IX86_BUILTIN_LDMXCSR
,
15990 IX86_BUILTIN_STMXCSR
,
15991 IX86_BUILTIN_SFENCE
,
15993 /* 3DNow! Original */
15994 IX86_BUILTIN_FEMMS
,
15995 IX86_BUILTIN_PAVGUSB
,
15996 IX86_BUILTIN_PF2ID
,
15997 IX86_BUILTIN_PFACC
,
15998 IX86_BUILTIN_PFADD
,
15999 IX86_BUILTIN_PFCMPEQ
,
16000 IX86_BUILTIN_PFCMPGE
,
16001 IX86_BUILTIN_PFCMPGT
,
16002 IX86_BUILTIN_PFMAX
,
16003 IX86_BUILTIN_PFMIN
,
16004 IX86_BUILTIN_PFMUL
,
16005 IX86_BUILTIN_PFRCP
,
16006 IX86_BUILTIN_PFRCPIT1
,
16007 IX86_BUILTIN_PFRCPIT2
,
16008 IX86_BUILTIN_PFRSQIT1
,
16009 IX86_BUILTIN_PFRSQRT
,
16010 IX86_BUILTIN_PFSUB
,
16011 IX86_BUILTIN_PFSUBR
,
16012 IX86_BUILTIN_PI2FD
,
16013 IX86_BUILTIN_PMULHRW
,
16015 /* 3DNow! Athlon Extensions */
16016 IX86_BUILTIN_PF2IW
,
16017 IX86_BUILTIN_PFNACC
,
16018 IX86_BUILTIN_PFPNACC
,
16019 IX86_BUILTIN_PI2FW
,
16020 IX86_BUILTIN_PSWAPDSI
,
16021 IX86_BUILTIN_PSWAPDSF
,
16024 IX86_BUILTIN_ADDPD
,
16025 IX86_BUILTIN_ADDSD
,
16026 IX86_BUILTIN_DIVPD
,
16027 IX86_BUILTIN_DIVSD
,
16028 IX86_BUILTIN_MULPD
,
16029 IX86_BUILTIN_MULSD
,
16030 IX86_BUILTIN_SUBPD
,
16031 IX86_BUILTIN_SUBSD
,
16033 IX86_BUILTIN_CMPEQPD
,
16034 IX86_BUILTIN_CMPLTPD
,
16035 IX86_BUILTIN_CMPLEPD
,
16036 IX86_BUILTIN_CMPGTPD
,
16037 IX86_BUILTIN_CMPGEPD
,
16038 IX86_BUILTIN_CMPNEQPD
,
16039 IX86_BUILTIN_CMPNLTPD
,
16040 IX86_BUILTIN_CMPNLEPD
,
16041 IX86_BUILTIN_CMPNGTPD
,
16042 IX86_BUILTIN_CMPNGEPD
,
16043 IX86_BUILTIN_CMPORDPD
,
16044 IX86_BUILTIN_CMPUNORDPD
,
16045 IX86_BUILTIN_CMPNEPD
,
16046 IX86_BUILTIN_CMPEQSD
,
16047 IX86_BUILTIN_CMPLTSD
,
16048 IX86_BUILTIN_CMPLESD
,
16049 IX86_BUILTIN_CMPNEQSD
,
16050 IX86_BUILTIN_CMPNLTSD
,
16051 IX86_BUILTIN_CMPNLESD
,
16052 IX86_BUILTIN_CMPORDSD
,
16053 IX86_BUILTIN_CMPUNORDSD
,
16054 IX86_BUILTIN_CMPNESD
,
16056 IX86_BUILTIN_COMIEQSD
,
16057 IX86_BUILTIN_COMILTSD
,
16058 IX86_BUILTIN_COMILESD
,
16059 IX86_BUILTIN_COMIGTSD
,
16060 IX86_BUILTIN_COMIGESD
,
16061 IX86_BUILTIN_COMINEQSD
,
16062 IX86_BUILTIN_UCOMIEQSD
,
16063 IX86_BUILTIN_UCOMILTSD
,
16064 IX86_BUILTIN_UCOMILESD
,
16065 IX86_BUILTIN_UCOMIGTSD
,
16066 IX86_BUILTIN_UCOMIGESD
,
16067 IX86_BUILTIN_UCOMINEQSD
,
16069 IX86_BUILTIN_MAXPD
,
16070 IX86_BUILTIN_MAXSD
,
16071 IX86_BUILTIN_MINPD
,
16072 IX86_BUILTIN_MINSD
,
16074 IX86_BUILTIN_ANDPD
,
16075 IX86_BUILTIN_ANDNPD
,
16077 IX86_BUILTIN_XORPD
,
16079 IX86_BUILTIN_SQRTPD
,
16080 IX86_BUILTIN_SQRTSD
,
16082 IX86_BUILTIN_UNPCKHPD
,
16083 IX86_BUILTIN_UNPCKLPD
,
16085 IX86_BUILTIN_SHUFPD
,
16087 IX86_BUILTIN_LOADUPD
,
16088 IX86_BUILTIN_STOREUPD
,
16089 IX86_BUILTIN_MOVSD
,
16091 IX86_BUILTIN_LOADHPD
,
16092 IX86_BUILTIN_LOADLPD
,
16094 IX86_BUILTIN_CVTDQ2PD
,
16095 IX86_BUILTIN_CVTDQ2PS
,
16097 IX86_BUILTIN_CVTPD2DQ
,
16098 IX86_BUILTIN_CVTPD2PI
,
16099 IX86_BUILTIN_CVTPD2PS
,
16100 IX86_BUILTIN_CVTTPD2DQ
,
16101 IX86_BUILTIN_CVTTPD2PI
,
16103 IX86_BUILTIN_CVTPI2PD
,
16104 IX86_BUILTIN_CVTSI2SD
,
16105 IX86_BUILTIN_CVTSI642SD
,
16107 IX86_BUILTIN_CVTSD2SI
,
16108 IX86_BUILTIN_CVTSD2SI64
,
16109 IX86_BUILTIN_CVTSD2SS
,
16110 IX86_BUILTIN_CVTSS2SD
,
16111 IX86_BUILTIN_CVTTSD2SI
,
16112 IX86_BUILTIN_CVTTSD2SI64
,
16114 IX86_BUILTIN_CVTPS2DQ
,
16115 IX86_BUILTIN_CVTPS2PD
,
16116 IX86_BUILTIN_CVTTPS2DQ
,
16118 IX86_BUILTIN_MOVNTI
,
16119 IX86_BUILTIN_MOVNTPD
,
16120 IX86_BUILTIN_MOVNTDQ
,
16123 IX86_BUILTIN_MASKMOVDQU
,
16124 IX86_BUILTIN_MOVMSKPD
,
16125 IX86_BUILTIN_PMOVMSKB128
,
16127 IX86_BUILTIN_PACKSSWB128
,
16128 IX86_BUILTIN_PACKSSDW128
,
16129 IX86_BUILTIN_PACKUSWB128
,
16131 IX86_BUILTIN_PADDB128
,
16132 IX86_BUILTIN_PADDW128
,
16133 IX86_BUILTIN_PADDD128
,
16134 IX86_BUILTIN_PADDQ128
,
16135 IX86_BUILTIN_PADDSB128
,
16136 IX86_BUILTIN_PADDSW128
,
16137 IX86_BUILTIN_PADDUSB128
,
16138 IX86_BUILTIN_PADDUSW128
,
16139 IX86_BUILTIN_PSUBB128
,
16140 IX86_BUILTIN_PSUBW128
,
16141 IX86_BUILTIN_PSUBD128
,
16142 IX86_BUILTIN_PSUBQ128
,
16143 IX86_BUILTIN_PSUBSB128
,
16144 IX86_BUILTIN_PSUBSW128
,
16145 IX86_BUILTIN_PSUBUSB128
,
16146 IX86_BUILTIN_PSUBUSW128
,
16148 IX86_BUILTIN_PAND128
,
16149 IX86_BUILTIN_PANDN128
,
16150 IX86_BUILTIN_POR128
,
16151 IX86_BUILTIN_PXOR128
,
16153 IX86_BUILTIN_PAVGB128
,
16154 IX86_BUILTIN_PAVGW128
,
16156 IX86_BUILTIN_PCMPEQB128
,
16157 IX86_BUILTIN_PCMPEQW128
,
16158 IX86_BUILTIN_PCMPEQD128
,
16159 IX86_BUILTIN_PCMPGTB128
,
16160 IX86_BUILTIN_PCMPGTW128
,
16161 IX86_BUILTIN_PCMPGTD128
,
16163 IX86_BUILTIN_PMADDWD128
,
16165 IX86_BUILTIN_PMAXSW128
,
16166 IX86_BUILTIN_PMAXUB128
,
16167 IX86_BUILTIN_PMINSW128
,
16168 IX86_BUILTIN_PMINUB128
,
16170 IX86_BUILTIN_PMULUDQ
,
16171 IX86_BUILTIN_PMULUDQ128
,
16172 IX86_BUILTIN_PMULHUW128
,
16173 IX86_BUILTIN_PMULHW128
,
16174 IX86_BUILTIN_PMULLW128
,
16176 IX86_BUILTIN_PSADBW128
,
16177 IX86_BUILTIN_PSHUFHW
,
16178 IX86_BUILTIN_PSHUFLW
,
16179 IX86_BUILTIN_PSHUFD
,
16181 IX86_BUILTIN_PSLLW128
,
16182 IX86_BUILTIN_PSLLD128
,
16183 IX86_BUILTIN_PSLLQ128
,
16184 IX86_BUILTIN_PSRAW128
,
16185 IX86_BUILTIN_PSRAD128
,
16186 IX86_BUILTIN_PSRLW128
,
16187 IX86_BUILTIN_PSRLD128
,
16188 IX86_BUILTIN_PSRLQ128
,
16189 IX86_BUILTIN_PSLLDQI128
,
16190 IX86_BUILTIN_PSLLWI128
,
16191 IX86_BUILTIN_PSLLDI128
,
16192 IX86_BUILTIN_PSLLQI128
,
16193 IX86_BUILTIN_PSRAWI128
,
16194 IX86_BUILTIN_PSRADI128
,
16195 IX86_BUILTIN_PSRLDQI128
,
16196 IX86_BUILTIN_PSRLWI128
,
16197 IX86_BUILTIN_PSRLDI128
,
16198 IX86_BUILTIN_PSRLQI128
,
16200 IX86_BUILTIN_PUNPCKHBW128
,
16201 IX86_BUILTIN_PUNPCKHWD128
,
16202 IX86_BUILTIN_PUNPCKHDQ128
,
16203 IX86_BUILTIN_PUNPCKHQDQ128
,
16204 IX86_BUILTIN_PUNPCKLBW128
,
16205 IX86_BUILTIN_PUNPCKLWD128
,
16206 IX86_BUILTIN_PUNPCKLDQ128
,
16207 IX86_BUILTIN_PUNPCKLQDQ128
,
16209 IX86_BUILTIN_CLFLUSH
,
16210 IX86_BUILTIN_MFENCE
,
16211 IX86_BUILTIN_LFENCE
,
16213 /* Prescott New Instructions. */
16214 IX86_BUILTIN_ADDSUBPS
,
16215 IX86_BUILTIN_HADDPS
,
16216 IX86_BUILTIN_HSUBPS
,
16217 IX86_BUILTIN_MOVSHDUP
,
16218 IX86_BUILTIN_MOVSLDUP
,
16219 IX86_BUILTIN_ADDSUBPD
,
16220 IX86_BUILTIN_HADDPD
,
16221 IX86_BUILTIN_HSUBPD
,
16222 IX86_BUILTIN_LDDQU
,
16224 IX86_BUILTIN_MONITOR
,
16225 IX86_BUILTIN_MWAIT
,
16228 IX86_BUILTIN_PHADDW
,
16229 IX86_BUILTIN_PHADDD
,
16230 IX86_BUILTIN_PHADDSW
,
16231 IX86_BUILTIN_PHSUBW
,
16232 IX86_BUILTIN_PHSUBD
,
16233 IX86_BUILTIN_PHSUBSW
,
16234 IX86_BUILTIN_PMADDUBSW
,
16235 IX86_BUILTIN_PMULHRSW
,
16236 IX86_BUILTIN_PSHUFB
,
16237 IX86_BUILTIN_PSIGNB
,
16238 IX86_BUILTIN_PSIGNW
,
16239 IX86_BUILTIN_PSIGND
,
16240 IX86_BUILTIN_PALIGNR
,
16241 IX86_BUILTIN_PABSB
,
16242 IX86_BUILTIN_PABSW
,
16243 IX86_BUILTIN_PABSD
,
16245 IX86_BUILTIN_PHADDW128
,
16246 IX86_BUILTIN_PHADDD128
,
16247 IX86_BUILTIN_PHADDSW128
,
16248 IX86_BUILTIN_PHSUBW128
,
16249 IX86_BUILTIN_PHSUBD128
,
16250 IX86_BUILTIN_PHSUBSW128
,
16251 IX86_BUILTIN_PMADDUBSW128
,
16252 IX86_BUILTIN_PMULHRSW128
,
16253 IX86_BUILTIN_PSHUFB128
,
16254 IX86_BUILTIN_PSIGNB128
,
16255 IX86_BUILTIN_PSIGNW128
,
16256 IX86_BUILTIN_PSIGND128
,
16257 IX86_BUILTIN_PALIGNR128
,
16258 IX86_BUILTIN_PABSB128
,
16259 IX86_BUILTIN_PABSW128
,
16260 IX86_BUILTIN_PABSD128
,
16262 /* AMDFAM10 - SSE4A New Instructions. */
16263 IX86_BUILTIN_MOVNTSD
,
16264 IX86_BUILTIN_MOVNTSS
,
16265 IX86_BUILTIN_EXTRQI
,
16266 IX86_BUILTIN_EXTRQ
,
16267 IX86_BUILTIN_INSERTQI
,
16268 IX86_BUILTIN_INSERTQ
,
16270 IX86_BUILTIN_VEC_INIT_V2SI
,
16271 IX86_BUILTIN_VEC_INIT_V4HI
,
16272 IX86_BUILTIN_VEC_INIT_V8QI
,
16273 IX86_BUILTIN_VEC_EXT_V2DF
,
16274 IX86_BUILTIN_VEC_EXT_V2DI
,
16275 IX86_BUILTIN_VEC_EXT_V4SF
,
16276 IX86_BUILTIN_VEC_EXT_V4SI
,
16277 IX86_BUILTIN_VEC_EXT_V8HI
,
16278 IX86_BUILTIN_VEC_EXT_V2SI
,
16279 IX86_BUILTIN_VEC_EXT_V4HI
,
16280 IX86_BUILTIN_VEC_SET_V8HI
,
16281 IX86_BUILTIN_VEC_SET_V4HI
,
16286 /* Table for the ix86 builtin decls. */
16287 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16289 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16290 * if the target_flags include one of MASK. Stores the function decl
16291 * in the ix86_builtins array.
16292 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16295 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16297 tree decl
= NULL_TREE
;
16299 if (mask
& target_flags
16300 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16302 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16304 ix86_builtins
[(int) code
] = decl
;
16310 /* Like def_builtin, but also marks the function decl "const". */
16313 def_builtin_const (int mask
, const char *name
, tree type
,
16314 enum ix86_builtins code
)
16316 tree decl
= def_builtin (mask
, name
, type
, code
);
16318 TREE_READONLY (decl
) = 1;
16322 /* Bits for builtin_description.flag. */
16324 /* Set when we don't support the comparison natively, and should
16325 swap_comparison in order to support it. */
16326 #define BUILTIN_DESC_SWAP_OPERANDS 1
16328 struct builtin_description
16330 const unsigned int mask
;
16331 const enum insn_code icode
;
16332 const char *const name
;
16333 const enum ix86_builtins code
;
16334 const enum rtx_code comparison
;
16335 const unsigned int flag
;
16338 static const struct builtin_description bdesc_comi
[] =
16340 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16341 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16342 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16343 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16344 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16345 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16346 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16347 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16348 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16349 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16350 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16351 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16352 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16353 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16354 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16355 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16356 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16357 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16358 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16359 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16360 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16361 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16362 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16363 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16366 static const struct builtin_description bdesc_2arg
[] =
16369 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16370 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16371 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16372 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16373 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16374 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16375 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16376 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16378 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16379 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16380 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16381 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16382 BUILTIN_DESC_SWAP_OPERANDS
},
16383 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16384 BUILTIN_DESC_SWAP_OPERANDS
},
16385 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16386 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16387 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16388 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16389 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16390 BUILTIN_DESC_SWAP_OPERANDS
},
16391 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16392 BUILTIN_DESC_SWAP_OPERANDS
},
16393 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16394 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16395 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16396 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16397 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16398 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16399 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16400 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16401 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16402 BUILTIN_DESC_SWAP_OPERANDS
},
16403 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16404 BUILTIN_DESC_SWAP_OPERANDS
},
16405 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16407 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16408 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16409 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16410 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16412 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16413 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16414 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16415 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16417 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16418 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16419 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16420 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16421 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16424 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16425 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16426 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16427 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16428 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16429 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16430 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16431 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16433 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16434 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16435 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16436 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16437 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16438 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16439 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16440 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16442 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16443 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16444 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16446 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16447 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16448 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16449 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16451 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16452 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16454 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16455 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16456 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16457 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16458 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16459 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16461 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16462 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16463 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16464 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16466 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16467 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16468 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16469 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16470 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16471 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16474 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16475 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16476 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16478 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16479 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16480 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16482 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16483 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16484 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16485 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16486 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16487 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16489 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16490 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16491 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16492 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16493 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16494 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16496 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16497 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16498 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16499 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16501 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16502 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16505 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16506 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16507 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16508 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16509 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16510 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16511 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16512 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16514 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16515 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16516 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16517 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16518 BUILTIN_DESC_SWAP_OPERANDS
},
16519 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16520 BUILTIN_DESC_SWAP_OPERANDS
},
16521 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16522 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16523 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16524 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16525 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16526 BUILTIN_DESC_SWAP_OPERANDS
},
16527 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16528 BUILTIN_DESC_SWAP_OPERANDS
},
16529 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16530 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16531 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16532 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16533 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16534 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16535 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16536 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16537 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16539 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16540 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16541 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16542 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16544 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16545 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16546 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16547 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16549 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16550 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16551 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16554 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16555 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16556 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16557 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16558 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16559 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16560 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16561 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16563 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16564 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16565 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16566 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16567 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16568 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16569 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16570 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16572 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16573 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16575 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16576 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16577 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16578 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16580 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16581 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16583 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16584 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16585 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16586 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16587 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16588 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16590 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16591 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16592 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16593 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16595 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16596 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16597 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16598 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16599 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16600 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16601 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16602 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16604 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16605 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16606 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16608 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16609 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16611 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16612 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16614 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16615 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16616 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16618 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16619 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16620 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16622 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16623 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16625 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16627 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16628 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16629 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16630 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16633 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16634 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16635 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16636 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16637 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16638 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16641 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16642 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16643 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16644 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16645 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16646 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16647 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16648 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16649 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16650 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16651 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16652 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16653 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16654 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16655 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16656 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16657 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16658 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16659 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16660 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16661 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16662 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16663 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16664 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16667 static const struct builtin_description bdesc_1arg
[] =
16669 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16670 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16672 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16673 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16674 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16676 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16677 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16678 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16679 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16680 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16681 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16683 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16684 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16686 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16688 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16689 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16691 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16692 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16693 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16694 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16695 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16697 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16699 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16700 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16701 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16702 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16704 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16705 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16706 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16709 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16710 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16713 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16714 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16715 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16716 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16717 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16718 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16722 ix86_init_builtins (void)
16725 ix86_init_mmx_sse_builtins ();
16728 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16729 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16732 ix86_init_mmx_sse_builtins (void)
16734 const struct builtin_description
* d
;
16737 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16738 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16739 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16740 tree V2DI_type_node
16741 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16742 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16743 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16744 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16745 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16746 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16747 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16749 tree pchar_type_node
= build_pointer_type (char_type_node
);
16750 tree pcchar_type_node
= build_pointer_type (
16751 build_type_variant (char_type_node
, 1, 0));
16752 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16753 tree pcfloat_type_node
= build_pointer_type (
16754 build_type_variant (float_type_node
, 1, 0));
16755 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16756 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16757 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16760 tree int_ftype_v4sf_v4sf
16761 = build_function_type_list (integer_type_node
,
16762 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16763 tree v4si_ftype_v4sf_v4sf
16764 = build_function_type_list (V4SI_type_node
,
16765 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16766 /* MMX/SSE/integer conversions. */
16767 tree int_ftype_v4sf
16768 = build_function_type_list (integer_type_node
,
16769 V4SF_type_node
, NULL_TREE
);
16770 tree int64_ftype_v4sf
16771 = build_function_type_list (long_long_integer_type_node
,
16772 V4SF_type_node
, NULL_TREE
);
16773 tree int_ftype_v8qi
16774 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16775 tree v4sf_ftype_v4sf_int
16776 = build_function_type_list (V4SF_type_node
,
16777 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16778 tree v4sf_ftype_v4sf_int64
16779 = build_function_type_list (V4SF_type_node
,
16780 V4SF_type_node
, long_long_integer_type_node
,
16782 tree v4sf_ftype_v4sf_v2si
16783 = build_function_type_list (V4SF_type_node
,
16784 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16786 /* Miscellaneous. */
16787 tree v8qi_ftype_v4hi_v4hi
16788 = build_function_type_list (V8QI_type_node
,
16789 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16790 tree v4hi_ftype_v2si_v2si
16791 = build_function_type_list (V4HI_type_node
,
16792 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16793 tree v4sf_ftype_v4sf_v4sf_int
16794 = build_function_type_list (V4SF_type_node
,
16795 V4SF_type_node
, V4SF_type_node
,
16796 integer_type_node
, NULL_TREE
);
16797 tree v2si_ftype_v4hi_v4hi
16798 = build_function_type_list (V2SI_type_node
,
16799 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16800 tree v4hi_ftype_v4hi_int
16801 = build_function_type_list (V4HI_type_node
,
16802 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16803 tree v4hi_ftype_v4hi_di
16804 = build_function_type_list (V4HI_type_node
,
16805 V4HI_type_node
, long_long_unsigned_type_node
,
16807 tree v2si_ftype_v2si_di
16808 = build_function_type_list (V2SI_type_node
,
16809 V2SI_type_node
, long_long_unsigned_type_node
,
16811 tree void_ftype_void
16812 = build_function_type (void_type_node
, void_list_node
);
16813 tree void_ftype_unsigned
16814 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16815 tree void_ftype_unsigned_unsigned
16816 = build_function_type_list (void_type_node
, unsigned_type_node
,
16817 unsigned_type_node
, NULL_TREE
);
16818 tree void_ftype_pcvoid_unsigned_unsigned
16819 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16820 unsigned_type_node
, unsigned_type_node
,
16822 tree unsigned_ftype_void
16823 = build_function_type (unsigned_type_node
, void_list_node
);
16824 tree v2si_ftype_v4sf
16825 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16826 /* Loads/stores. */
16827 tree void_ftype_v8qi_v8qi_pchar
16828 = build_function_type_list (void_type_node
,
16829 V8QI_type_node
, V8QI_type_node
,
16830 pchar_type_node
, NULL_TREE
);
16831 tree v4sf_ftype_pcfloat
16832 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16833 /* @@@ the type is bogus */
16834 tree v4sf_ftype_v4sf_pv2si
16835 = build_function_type_list (V4SF_type_node
,
16836 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16837 tree void_ftype_pv2si_v4sf
16838 = build_function_type_list (void_type_node
,
16839 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16840 tree void_ftype_pfloat_v4sf
16841 = build_function_type_list (void_type_node
,
16842 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16843 tree void_ftype_pdi_di
16844 = build_function_type_list (void_type_node
,
16845 pdi_type_node
, long_long_unsigned_type_node
,
16847 tree void_ftype_pv2di_v2di
16848 = build_function_type_list (void_type_node
,
16849 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16850 /* Normal vector unops. */
16851 tree v4sf_ftype_v4sf
16852 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16853 tree v16qi_ftype_v16qi
16854 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16855 tree v8hi_ftype_v8hi
16856 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16857 tree v4si_ftype_v4si
16858 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16859 tree v8qi_ftype_v8qi
16860 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16861 tree v4hi_ftype_v4hi
16862 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16864 /* Normal vector binops. */
16865 tree v4sf_ftype_v4sf_v4sf
16866 = build_function_type_list (V4SF_type_node
,
16867 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16868 tree v8qi_ftype_v8qi_v8qi
16869 = build_function_type_list (V8QI_type_node
,
16870 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16871 tree v4hi_ftype_v4hi_v4hi
16872 = build_function_type_list (V4HI_type_node
,
16873 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16874 tree v2si_ftype_v2si_v2si
16875 = build_function_type_list (V2SI_type_node
,
16876 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16877 tree di_ftype_di_di
16878 = build_function_type_list (long_long_unsigned_type_node
,
16879 long_long_unsigned_type_node
,
16880 long_long_unsigned_type_node
, NULL_TREE
);
16882 tree di_ftype_di_di_int
16883 = build_function_type_list (long_long_unsigned_type_node
,
16884 long_long_unsigned_type_node
,
16885 long_long_unsigned_type_node
,
16886 integer_type_node
, NULL_TREE
);
16888 tree v2si_ftype_v2sf
16889 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16890 tree v2sf_ftype_v2si
16891 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16892 tree v2si_ftype_v2si
16893 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16894 tree v2sf_ftype_v2sf
16895 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16896 tree v2sf_ftype_v2sf_v2sf
16897 = build_function_type_list (V2SF_type_node
,
16898 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16899 tree v2si_ftype_v2sf_v2sf
16900 = build_function_type_list (V2SI_type_node
,
16901 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16902 tree pint_type_node
= build_pointer_type (integer_type_node
);
16903 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16904 tree pcdouble_type_node
= build_pointer_type (
16905 build_type_variant (double_type_node
, 1, 0));
16906 tree int_ftype_v2df_v2df
16907 = build_function_type_list (integer_type_node
,
16908 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16910 tree void_ftype_pcvoid
16911 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16912 tree v4sf_ftype_v4si
16913 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16914 tree v4si_ftype_v4sf
16915 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16916 tree v2df_ftype_v4si
16917 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16918 tree v4si_ftype_v2df
16919 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16920 tree v2si_ftype_v2df
16921 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16922 tree v4sf_ftype_v2df
16923 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16924 tree v2df_ftype_v2si
16925 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16926 tree v2df_ftype_v4sf
16927 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16928 tree int_ftype_v2df
16929 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16930 tree int64_ftype_v2df
16931 = build_function_type_list (long_long_integer_type_node
,
16932 V2DF_type_node
, NULL_TREE
);
16933 tree v2df_ftype_v2df_int
16934 = build_function_type_list (V2DF_type_node
,
16935 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16936 tree v2df_ftype_v2df_int64
16937 = build_function_type_list (V2DF_type_node
,
16938 V2DF_type_node
, long_long_integer_type_node
,
16940 tree v4sf_ftype_v4sf_v2df
16941 = build_function_type_list (V4SF_type_node
,
16942 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16943 tree v2df_ftype_v2df_v4sf
16944 = build_function_type_list (V2DF_type_node
,
16945 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16946 tree v2df_ftype_v2df_v2df_int
16947 = build_function_type_list (V2DF_type_node
,
16948 V2DF_type_node
, V2DF_type_node
,
16951 tree v2df_ftype_v2df_pcdouble
16952 = build_function_type_list (V2DF_type_node
,
16953 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16954 tree void_ftype_pdouble_v2df
16955 = build_function_type_list (void_type_node
,
16956 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16957 tree void_ftype_pint_int
16958 = build_function_type_list (void_type_node
,
16959 pint_type_node
, integer_type_node
, NULL_TREE
);
16960 tree void_ftype_v16qi_v16qi_pchar
16961 = build_function_type_list (void_type_node
,
16962 V16QI_type_node
, V16QI_type_node
,
16963 pchar_type_node
, NULL_TREE
);
16964 tree v2df_ftype_pcdouble
16965 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16966 tree v2df_ftype_v2df_v2df
16967 = build_function_type_list (V2DF_type_node
,
16968 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16969 tree v16qi_ftype_v16qi_v16qi
16970 = build_function_type_list (V16QI_type_node
,
16971 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16972 tree v8hi_ftype_v8hi_v8hi
16973 = build_function_type_list (V8HI_type_node
,
16974 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16975 tree v4si_ftype_v4si_v4si
16976 = build_function_type_list (V4SI_type_node
,
16977 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16978 tree v2di_ftype_v2di_v2di
16979 = build_function_type_list (V2DI_type_node
,
16980 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16981 tree v2di_ftype_v2df_v2df
16982 = build_function_type_list (V2DI_type_node
,
16983 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16984 tree v2df_ftype_v2df
16985 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16986 tree v2di_ftype_v2di_int
16987 = build_function_type_list (V2DI_type_node
,
16988 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16989 tree v2di_ftype_v2di_v2di_int
16990 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
16991 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16992 tree v4si_ftype_v4si_int
16993 = build_function_type_list (V4SI_type_node
,
16994 V4SI_type_node
, integer_type_node
, NULL_TREE
);
16995 tree v8hi_ftype_v8hi_int
16996 = build_function_type_list (V8HI_type_node
,
16997 V8HI_type_node
, integer_type_node
, NULL_TREE
);
16998 tree v8hi_ftype_v8hi_v2di
16999 = build_function_type_list (V8HI_type_node
,
17000 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
17001 tree v4si_ftype_v4si_v2di
17002 = build_function_type_list (V4SI_type_node
,
17003 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
17004 tree v4si_ftype_v8hi_v8hi
17005 = build_function_type_list (V4SI_type_node
,
17006 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17007 tree di_ftype_v8qi_v8qi
17008 = build_function_type_list (long_long_unsigned_type_node
,
17009 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17010 tree di_ftype_v2si_v2si
17011 = build_function_type_list (long_long_unsigned_type_node
,
17012 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17013 tree v2di_ftype_v16qi_v16qi
17014 = build_function_type_list (V2DI_type_node
,
17015 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17016 tree v2di_ftype_v4si_v4si
17017 = build_function_type_list (V2DI_type_node
,
17018 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17019 tree int_ftype_v16qi
17020 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17021 tree v16qi_ftype_pcchar
17022 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17023 tree void_ftype_pchar_v16qi
17024 = build_function_type_list (void_type_node
,
17025 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17027 tree v2di_ftype_v2di_unsigned_unsigned
17028 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17029 unsigned_type_node
, unsigned_type_node
,
17031 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17032 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17033 unsigned_type_node
, unsigned_type_node
,
17035 tree v2di_ftype_v2di_v16qi
17036 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17040 tree float128_type
;
17043 /* The __float80 type. */
17044 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17045 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17049 /* The __float80 type. */
17050 float80_type
= make_node (REAL_TYPE
);
17051 TYPE_PRECISION (float80_type
) = 80;
17052 layout_type (float80_type
);
17053 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
17058 float128_type
= make_node (REAL_TYPE
);
17059 TYPE_PRECISION (float128_type
) = 128;
17060 layout_type (float128_type
);
17061 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
17064 /* Add all builtins that are more or less simple operations on two
17066 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17068 /* Use one of the operands; the target can have a different mode for
17069 mask-generating compares. */
17070 enum machine_mode mode
;
17075 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17080 type
= v16qi_ftype_v16qi_v16qi
;
17083 type
= v8hi_ftype_v8hi_v8hi
;
17086 type
= v4si_ftype_v4si_v4si
;
17089 type
= v2di_ftype_v2di_v2di
;
17092 type
= v2df_ftype_v2df_v2df
;
17095 type
= v4sf_ftype_v4sf_v4sf
;
17098 type
= v8qi_ftype_v8qi_v8qi
;
17101 type
= v4hi_ftype_v4hi_v4hi
;
17104 type
= v2si_ftype_v2si_v2si
;
17107 type
= di_ftype_di_di
;
17111 gcc_unreachable ();
17114 /* Override for comparisons. */
17115 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17116 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17117 type
= v4si_ftype_v4sf_v4sf
;
17119 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17120 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17121 type
= v2di_ftype_v2df_v2df
;
17123 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17126 /* Add all builtins that are more or less simple operations on 1 operand. */
17127 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17129 enum machine_mode mode
;
17134 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17139 type
= v16qi_ftype_v16qi
;
17142 type
= v8hi_ftype_v8hi
;
17145 type
= v4si_ftype_v4si
;
17148 type
= v2df_ftype_v2df
;
17151 type
= v4sf_ftype_v4sf
;
17154 type
= v8qi_ftype_v8qi
;
17157 type
= v4hi_ftype_v4hi
;
17160 type
= v2si_ftype_v2si
;
17167 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17170 /* Add the remaining MMX insns with somewhat more complicated types. */
17171 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17172 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17173 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17174 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17176 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17177 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17178 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17180 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17181 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17183 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17184 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17186 /* comi/ucomi insns. */
17187 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17188 if (d
->mask
== MASK_SSE2
)
17189 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17191 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17193 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17194 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17195 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17197 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17198 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17199 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17200 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17201 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17202 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17203 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17204 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17205 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17206 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17207 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17209 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17211 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17212 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17214 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17215 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17216 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17217 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17219 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17220 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17221 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17222 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17224 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17226 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17228 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17229 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17230 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17231 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17232 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17233 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17235 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17237 /* Original 3DNow! */
17238 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17239 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17240 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17241 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17242 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17243 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17244 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17245 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17246 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17247 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17248 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17249 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17250 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17251 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17252 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17253 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17254 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17255 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17256 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17257 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17259 /* 3DNow! extension as used in the Athlon CPU. */
17260 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17261 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17262 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17263 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17264 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17265 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17268 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17270 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17271 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17273 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17274 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17276 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17277 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17278 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17279 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17280 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17282 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17283 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17284 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17285 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17287 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17288 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17290 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17292 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17293 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17295 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17296 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17297 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17298 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17299 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17301 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17303 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17304 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17305 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17306 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17308 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17309 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17310 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17312 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17313 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17314 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17315 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17317 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17318 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17319 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17321 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17322 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17324 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17325 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17327 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17328 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17329 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17331 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17332 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17333 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17335 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17336 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17338 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17339 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17340 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17341 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17343 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17344 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17345 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17346 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17348 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17349 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17351 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17353 /* Prescott New Instructions. */
17354 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17355 void_ftype_pcvoid_unsigned_unsigned
,
17356 IX86_BUILTIN_MONITOR
);
17357 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17358 void_ftype_unsigned_unsigned
,
17359 IX86_BUILTIN_MWAIT
);
17360 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17361 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17364 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17365 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17366 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17367 IX86_BUILTIN_PALIGNR
);
17369 /* AMDFAM10 SSE4A New built-ins */
17370 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17371 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17372 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17373 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17374 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17375 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17376 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17377 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17378 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17379 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17380 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17381 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17383 /* Access to the vec_init patterns. */
17384 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17385 integer_type_node
, NULL_TREE
);
17386 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17387 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17389 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17390 short_integer_type_node
,
17391 short_integer_type_node
,
17392 short_integer_type_node
, NULL_TREE
);
17393 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17394 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17396 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17397 char_type_node
, char_type_node
,
17398 char_type_node
, char_type_node
,
17399 char_type_node
, char_type_node
,
17400 char_type_node
, NULL_TREE
);
17401 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17402 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17404 /* Access to the vec_extract patterns. */
17405 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17406 integer_type_node
, NULL_TREE
);
17407 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17408 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17410 ftype
= build_function_type_list (long_long_integer_type_node
,
17411 V2DI_type_node
, integer_type_node
,
17413 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17414 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17416 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17417 integer_type_node
, NULL_TREE
);
17418 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17419 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17421 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17422 integer_type_node
, NULL_TREE
);
17423 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17424 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17426 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17427 integer_type_node
, NULL_TREE
);
17428 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17429 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17431 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17432 integer_type_node
, NULL_TREE
);
17433 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17434 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17436 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17437 integer_type_node
, NULL_TREE
);
17438 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17439 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17441 /* Access to the vec_set patterns. */
17442 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17444 integer_type_node
, NULL_TREE
);
17445 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17446 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17448 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17450 integer_type_node
, NULL_TREE
);
17451 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17452 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17455 /* Errors in the source file can cause expand_expr to return const0_rtx
17456 where we expect a vector. To avoid crashing, use one of the vector
17457 clear instructions. */
17459 safe_vector_operand (rtx x
, enum machine_mode mode
)
17461 if (x
== const0_rtx
)
17462 x
= CONST0_RTX (mode
);
17466 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17469 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17472 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17473 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17474 rtx op0
= expand_normal (arg0
);
17475 rtx op1
= expand_normal (arg1
);
17476 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17477 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17478 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17480 if (VECTOR_MODE_P (mode0
))
17481 op0
= safe_vector_operand (op0
, mode0
);
17482 if (VECTOR_MODE_P (mode1
))
17483 op1
= safe_vector_operand (op1
, mode1
);
17485 if (optimize
|| !target
17486 || GET_MODE (target
) != tmode
17487 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17488 target
= gen_reg_rtx (tmode
);
17490 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17492 rtx x
= gen_reg_rtx (V4SImode
);
17493 emit_insn (gen_sse2_loadd (x
, op1
));
17494 op1
= gen_lowpart (TImode
, x
);
17497 /* The insn must want input operands in the same modes as the
17499 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17500 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17502 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17503 op0
= copy_to_mode_reg (mode0
, op0
);
17504 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17505 op1
= copy_to_mode_reg (mode1
, op1
);
17507 /* ??? Using ix86_fixup_binary_operands is problematic when
17508 we've got mismatched modes. Fake it. */
17514 if (tmode
== mode0
&& tmode
== mode1
)
17516 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17520 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17522 op0
= force_reg (mode0
, op0
);
17523 op1
= force_reg (mode1
, op1
);
17524 target
= gen_reg_rtx (tmode
);
17527 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17534 /* Subroutine of ix86_expand_builtin to take care of stores. */
17537 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17540 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17541 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17542 rtx op0
= expand_normal (arg0
);
17543 rtx op1
= expand_normal (arg1
);
17544 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17545 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17547 if (VECTOR_MODE_P (mode1
))
17548 op1
= safe_vector_operand (op1
, mode1
);
17550 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17551 op1
= copy_to_mode_reg (mode1
, op1
);
17553 pat
= GEN_FCN (icode
) (op0
, op1
);
17559 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17562 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17563 rtx target
, int do_load
)
17566 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17567 rtx op0
= expand_normal (arg0
);
17568 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17569 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17571 if (optimize
|| !target
17572 || GET_MODE (target
) != tmode
17573 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17574 target
= gen_reg_rtx (tmode
);
17576 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17579 if (VECTOR_MODE_P (mode0
))
17580 op0
= safe_vector_operand (op0
, mode0
);
17582 if ((optimize
&& !register_operand (op0
, mode0
))
17583 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17584 op0
= copy_to_mode_reg (mode0
, op0
);
17587 pat
= GEN_FCN (icode
) (target
, op0
);
17594 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17595 sqrtss, rsqrtss, rcpss. */
17598 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17601 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17602 rtx op1
, op0
= expand_normal (arg0
);
17603 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17604 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17606 if (optimize
|| !target
17607 || GET_MODE (target
) != tmode
17608 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17609 target
= gen_reg_rtx (tmode
);
17611 if (VECTOR_MODE_P (mode0
))
17612 op0
= safe_vector_operand (op0
, mode0
);
17614 if ((optimize
&& !register_operand (op0
, mode0
))
17615 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17616 op0
= copy_to_mode_reg (mode0
, op0
);
17619 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17620 op1
= copy_to_mode_reg (mode0
, op1
);
17622 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17629 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17632 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17636 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17637 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17638 rtx op0
= expand_normal (arg0
);
17639 rtx op1
= expand_normal (arg1
);
17641 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17642 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17643 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17644 enum rtx_code comparison
= d
->comparison
;
17646 if (VECTOR_MODE_P (mode0
))
17647 op0
= safe_vector_operand (op0
, mode0
);
17648 if (VECTOR_MODE_P (mode1
))
17649 op1
= safe_vector_operand (op1
, mode1
);
17651 /* Swap operands if we have a comparison that isn't available in
17653 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17655 rtx tmp
= gen_reg_rtx (mode1
);
17656 emit_move_insn (tmp
, op1
);
17661 if (optimize
|| !target
17662 || GET_MODE (target
) != tmode
17663 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17664 target
= gen_reg_rtx (tmode
);
17666 if ((optimize
&& !register_operand (op0
, mode0
))
17667 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17668 op0
= copy_to_mode_reg (mode0
, op0
);
17669 if ((optimize
&& !register_operand (op1
, mode1
))
17670 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17671 op1
= copy_to_mode_reg (mode1
, op1
);
17673 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17674 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17681 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17684 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17688 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17689 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17690 rtx op0
= expand_normal (arg0
);
17691 rtx op1
= expand_normal (arg1
);
17693 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17694 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17695 enum rtx_code comparison
= d
->comparison
;
17697 if (VECTOR_MODE_P (mode0
))
17698 op0
= safe_vector_operand (op0
, mode0
);
17699 if (VECTOR_MODE_P (mode1
))
17700 op1
= safe_vector_operand (op1
, mode1
);
17702 /* Swap operands if we have a comparison that isn't available in
17704 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17711 target
= gen_reg_rtx (SImode
);
17712 emit_move_insn (target
, const0_rtx
);
17713 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17715 if ((optimize
&& !register_operand (op0
, mode0
))
17716 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17717 op0
= copy_to_mode_reg (mode0
, op0
);
17718 if ((optimize
&& !register_operand (op1
, mode1
))
17719 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17720 op1
= copy_to_mode_reg (mode1
, op1
);
17722 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17723 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17727 emit_insn (gen_rtx_SET (VOIDmode
,
17728 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17729 gen_rtx_fmt_ee (comparison
, QImode
,
17733 return SUBREG_REG (target
);
17736 /* Return the integer constant in ARG. Constrain it to be in the range
17737 of the subparts of VEC_TYPE; issue an error if not. */
17740 get_element_number (tree vec_type
, tree arg
)
17742 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17744 if (!host_integerp (arg
, 1)
17745 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17747 error ("selector must be an integer constant in the range 0..%wi", max
);
17754 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17755 ix86_expand_vector_init. We DO have language-level syntax for this, in
17756 the form of (type){ init-list }. Except that since we can't place emms
17757 instructions from inside the compiler, we can't allow the use of MMX
17758 registers unless the user explicitly asks for it. So we do *not* define
17759 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17760 we have builtins invoked by mmintrin.h that gives us license to emit
17761 these sorts of instructions. */
17764 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17766 enum machine_mode tmode
= TYPE_MODE (type
);
17767 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17768 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17769 rtvec v
= rtvec_alloc (n_elt
);
17771 gcc_assert (VECTOR_MODE_P (tmode
));
17772 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17774 for (i
= 0; i
< n_elt
; ++i
)
17776 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17777 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17780 if (!target
|| !register_operand (target
, tmode
))
17781 target
= gen_reg_rtx (tmode
);
17783 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17787 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17788 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17789 had a language-level syntax for referencing vector elements. */
17792 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17794 enum machine_mode tmode
, mode0
;
17799 arg0
= CALL_EXPR_ARG (exp
, 0);
17800 arg1
= CALL_EXPR_ARG (exp
, 1);
17802 op0
= expand_normal (arg0
);
17803 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17805 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17806 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17807 gcc_assert (VECTOR_MODE_P (mode0
));
17809 op0
= force_reg (mode0
, op0
);
17811 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17812 target
= gen_reg_rtx (tmode
);
17814 ix86_expand_vector_extract (true, target
, op0
, elt
);
17819 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17820 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17821 a language-level syntax for referencing vector elements. */
17824 ix86_expand_vec_set_builtin (tree exp
)
17826 enum machine_mode tmode
, mode1
;
17827 tree arg0
, arg1
, arg2
;
17831 arg0
= CALL_EXPR_ARG (exp
, 0);
17832 arg1
= CALL_EXPR_ARG (exp
, 1);
17833 arg2
= CALL_EXPR_ARG (exp
, 2);
17835 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17836 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17837 gcc_assert (VECTOR_MODE_P (tmode
));
17839 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17840 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17841 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17843 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17844 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17846 op0
= force_reg (tmode
, op0
);
17847 op1
= force_reg (mode1
, op1
);
17849 ix86_expand_vector_set (true, op0
, op1
, elt
);
17854 /* Expand an expression EXP that calls a built-in function,
17855 with result going to TARGET if that's convenient
17856 (and in mode MODE if that's convenient).
17857 SUBTARGET may be used as the target for computing one of EXP's operands.
17858 IGNORE is nonzero if the value is to be ignored. */
17861 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17862 enum machine_mode mode ATTRIBUTE_UNUSED
,
17863 int ignore ATTRIBUTE_UNUSED
)
17865 const struct builtin_description
*d
;
17867 enum insn_code icode
;
17868 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17869 tree arg0
, arg1
, arg2
, arg3
;
17870 rtx op0
, op1
, op2
, op3
, pat
;
17871 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17872 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17876 case IX86_BUILTIN_EMMS
:
17877 emit_insn (gen_mmx_emms ());
17880 case IX86_BUILTIN_SFENCE
:
17881 emit_insn (gen_sse_sfence ());
17884 case IX86_BUILTIN_MASKMOVQ
:
17885 case IX86_BUILTIN_MASKMOVDQU
:
17886 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17887 ? CODE_FOR_mmx_maskmovq
17888 : CODE_FOR_sse2_maskmovdqu
);
17889 /* Note the arg order is different from the operand order. */
17890 arg1
= CALL_EXPR_ARG (exp
, 0);
17891 arg2
= CALL_EXPR_ARG (exp
, 1);
17892 arg0
= CALL_EXPR_ARG (exp
, 2);
17893 op0
= expand_normal (arg0
);
17894 op1
= expand_normal (arg1
);
17895 op2
= expand_normal (arg2
);
17896 mode0
= insn_data
[icode
].operand
[0].mode
;
17897 mode1
= insn_data
[icode
].operand
[1].mode
;
17898 mode2
= insn_data
[icode
].operand
[2].mode
;
17900 op0
= force_reg (Pmode
, op0
);
17901 op0
= gen_rtx_MEM (mode1
, op0
);
17903 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17904 op0
= copy_to_mode_reg (mode0
, op0
);
17905 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17906 op1
= copy_to_mode_reg (mode1
, op1
);
17907 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17908 op2
= copy_to_mode_reg (mode2
, op2
);
17909 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17915 case IX86_BUILTIN_SQRTSS
:
17916 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17917 case IX86_BUILTIN_RSQRTSS
:
17918 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17919 case IX86_BUILTIN_RCPSS
:
17920 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17922 case IX86_BUILTIN_LOADUPS
:
17923 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17925 case IX86_BUILTIN_STOREUPS
:
17926 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17928 case IX86_BUILTIN_LOADHPS
:
17929 case IX86_BUILTIN_LOADLPS
:
17930 case IX86_BUILTIN_LOADHPD
:
17931 case IX86_BUILTIN_LOADLPD
:
17932 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17933 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17934 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17935 : CODE_FOR_sse2_loadlpd
);
17936 arg0
= CALL_EXPR_ARG (exp
, 0);
17937 arg1
= CALL_EXPR_ARG (exp
, 1);
17938 op0
= expand_normal (arg0
);
17939 op1
= expand_normal (arg1
);
17940 tmode
= insn_data
[icode
].operand
[0].mode
;
17941 mode0
= insn_data
[icode
].operand
[1].mode
;
17942 mode1
= insn_data
[icode
].operand
[2].mode
;
17944 op0
= force_reg (mode0
, op0
);
17945 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17946 if (optimize
|| target
== 0
17947 || GET_MODE (target
) != tmode
17948 || !register_operand (target
, tmode
))
17949 target
= gen_reg_rtx (tmode
);
17950 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17956 case IX86_BUILTIN_STOREHPS
:
17957 case IX86_BUILTIN_STORELPS
:
17958 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17959 : CODE_FOR_sse_storelps
);
17960 arg0
= CALL_EXPR_ARG (exp
, 0);
17961 arg1
= CALL_EXPR_ARG (exp
, 1);
17962 op0
= expand_normal (arg0
);
17963 op1
= expand_normal (arg1
);
17964 mode0
= insn_data
[icode
].operand
[0].mode
;
17965 mode1
= insn_data
[icode
].operand
[1].mode
;
17967 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17968 op1
= force_reg (mode1
, op1
);
17970 pat
= GEN_FCN (icode
) (op0
, op1
);
17976 case IX86_BUILTIN_MOVNTPS
:
17977 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
17978 case IX86_BUILTIN_MOVNTQ
:
17979 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
17981 case IX86_BUILTIN_LDMXCSR
:
17982 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
17983 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17984 emit_move_insn (target
, op0
);
17985 emit_insn (gen_sse_ldmxcsr (target
));
17988 case IX86_BUILTIN_STMXCSR
:
17989 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17990 emit_insn (gen_sse_stmxcsr (target
));
17991 return copy_to_mode_reg (SImode
, target
);
17993 case IX86_BUILTIN_SHUFPS
:
17994 case IX86_BUILTIN_SHUFPD
:
17995 icode
= (fcode
== IX86_BUILTIN_SHUFPS
17996 ? CODE_FOR_sse_shufps
17997 : CODE_FOR_sse2_shufpd
);
17998 arg0
= CALL_EXPR_ARG (exp
, 0);
17999 arg1
= CALL_EXPR_ARG (exp
, 1);
18000 arg2
= CALL_EXPR_ARG (exp
, 2);
18001 op0
= expand_normal (arg0
);
18002 op1
= expand_normal (arg1
);
18003 op2
= expand_normal (arg2
);
18004 tmode
= insn_data
[icode
].operand
[0].mode
;
18005 mode0
= insn_data
[icode
].operand
[1].mode
;
18006 mode1
= insn_data
[icode
].operand
[2].mode
;
18007 mode2
= insn_data
[icode
].operand
[3].mode
;
18009 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18010 op0
= copy_to_mode_reg (mode0
, op0
);
18011 if ((optimize
&& !register_operand (op1
, mode1
))
18012 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18013 op1
= copy_to_mode_reg (mode1
, op1
);
18014 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18016 /* @@@ better error message */
18017 error ("mask must be an immediate");
18018 return gen_reg_rtx (tmode
);
18020 if (optimize
|| target
== 0
18021 || GET_MODE (target
) != tmode
18022 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18023 target
= gen_reg_rtx (tmode
);
18024 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18030 case IX86_BUILTIN_PSHUFW
:
18031 case IX86_BUILTIN_PSHUFD
:
18032 case IX86_BUILTIN_PSHUFHW
:
18033 case IX86_BUILTIN_PSHUFLW
:
18034 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
18035 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
18036 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
18037 : CODE_FOR_mmx_pshufw
);
18038 arg0
= CALL_EXPR_ARG (exp
, 0);
18039 arg1
= CALL_EXPR_ARG (exp
, 1);
18040 op0
= expand_normal (arg0
);
18041 op1
= expand_normal (arg1
);
18042 tmode
= insn_data
[icode
].operand
[0].mode
;
18043 mode1
= insn_data
[icode
].operand
[1].mode
;
18044 mode2
= insn_data
[icode
].operand
[2].mode
;
18046 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18047 op0
= copy_to_mode_reg (mode1
, op0
);
18048 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18050 /* @@@ better error message */
18051 error ("mask must be an immediate");
18055 || GET_MODE (target
) != tmode
18056 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18057 target
= gen_reg_rtx (tmode
);
18058 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18064 case IX86_BUILTIN_PSLLDQI128
:
18065 case IX86_BUILTIN_PSRLDQI128
:
18066 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
18067 : CODE_FOR_sse2_lshrti3
);
18068 arg0
= CALL_EXPR_ARG (exp
, 0);
18069 arg1
= CALL_EXPR_ARG (exp
, 1);
18070 op0
= expand_normal (arg0
);
18071 op1
= expand_normal (arg1
);
18072 tmode
= insn_data
[icode
].operand
[0].mode
;
18073 mode1
= insn_data
[icode
].operand
[1].mode
;
18074 mode2
= insn_data
[icode
].operand
[2].mode
;
18076 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18078 op0
= copy_to_reg (op0
);
18079 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18081 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18083 error ("shift must be an immediate");
18086 target
= gen_reg_rtx (V2DImode
);
18087 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
18093 case IX86_BUILTIN_FEMMS
:
18094 emit_insn (gen_mmx_femms ());
18097 case IX86_BUILTIN_PAVGUSB
:
18098 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18100 case IX86_BUILTIN_PF2ID
:
18101 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18103 case IX86_BUILTIN_PFACC
:
18104 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18106 case IX86_BUILTIN_PFADD
:
18107 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18109 case IX86_BUILTIN_PFCMPEQ
:
18110 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18112 case IX86_BUILTIN_PFCMPGE
:
18113 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18115 case IX86_BUILTIN_PFCMPGT
:
18116 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18118 case IX86_BUILTIN_PFMAX
:
18119 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18121 case IX86_BUILTIN_PFMIN
:
18122 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18124 case IX86_BUILTIN_PFMUL
:
18125 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18127 case IX86_BUILTIN_PFRCP
:
18128 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18130 case IX86_BUILTIN_PFRCPIT1
:
18131 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18133 case IX86_BUILTIN_PFRCPIT2
:
18134 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18136 case IX86_BUILTIN_PFRSQIT1
:
18137 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18139 case IX86_BUILTIN_PFRSQRT
:
18140 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18142 case IX86_BUILTIN_PFSUB
:
18143 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18145 case IX86_BUILTIN_PFSUBR
:
18146 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18148 case IX86_BUILTIN_PI2FD
:
18149 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18151 case IX86_BUILTIN_PMULHRW
:
18152 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18154 case IX86_BUILTIN_PF2IW
:
18155 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18157 case IX86_BUILTIN_PFNACC
:
18158 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18160 case IX86_BUILTIN_PFPNACC
:
18161 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18163 case IX86_BUILTIN_PI2FW
:
18164 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18166 case IX86_BUILTIN_PSWAPDSI
:
18167 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18169 case IX86_BUILTIN_PSWAPDSF
:
18170 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18172 case IX86_BUILTIN_SQRTSD
:
18173 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18174 case IX86_BUILTIN_LOADUPD
:
18175 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18176 case IX86_BUILTIN_STOREUPD
:
18177 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18179 case IX86_BUILTIN_MFENCE
:
18180 emit_insn (gen_sse2_mfence ());
18182 case IX86_BUILTIN_LFENCE
:
18183 emit_insn (gen_sse2_lfence ());
18186 case IX86_BUILTIN_CLFLUSH
:
18187 arg0
= CALL_EXPR_ARG (exp
, 0);
18188 op0
= expand_normal (arg0
);
18189 icode
= CODE_FOR_sse2_clflush
;
18190 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18191 op0
= copy_to_mode_reg (Pmode
, op0
);
18193 emit_insn (gen_sse2_clflush (op0
));
18196 case IX86_BUILTIN_MOVNTPD
:
18197 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18198 case IX86_BUILTIN_MOVNTDQ
:
18199 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18200 case IX86_BUILTIN_MOVNTI
:
18201 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18203 case IX86_BUILTIN_LOADDQU
:
18204 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18205 case IX86_BUILTIN_STOREDQU
:
18206 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18208 case IX86_BUILTIN_MONITOR
:
18209 arg0
= CALL_EXPR_ARG (exp
, 0);
18210 arg1
= CALL_EXPR_ARG (exp
, 1);
18211 arg2
= CALL_EXPR_ARG (exp
, 2);
18212 op0
= expand_normal (arg0
);
18213 op1
= expand_normal (arg1
);
18214 op2
= expand_normal (arg2
);
18216 op0
= copy_to_mode_reg (Pmode
, op0
);
18218 op1
= copy_to_mode_reg (SImode
, op1
);
18220 op2
= copy_to_mode_reg (SImode
, op2
);
18222 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18224 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18227 case IX86_BUILTIN_MWAIT
:
18228 arg0
= CALL_EXPR_ARG (exp
, 0);
18229 arg1
= CALL_EXPR_ARG (exp
, 1);
18230 op0
= expand_normal (arg0
);
18231 op1
= expand_normal (arg1
);
18233 op0
= copy_to_mode_reg (SImode
, op0
);
18235 op1
= copy_to_mode_reg (SImode
, op1
);
18236 emit_insn (gen_sse3_mwait (op0
, op1
));
18239 case IX86_BUILTIN_LDDQU
:
18240 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18243 case IX86_BUILTIN_PALIGNR
:
18244 case IX86_BUILTIN_PALIGNR128
:
18245 if (fcode
== IX86_BUILTIN_PALIGNR
)
18247 icode
= CODE_FOR_ssse3_palignrdi
;
18252 icode
= CODE_FOR_ssse3_palignrti
;
18255 arg0
= CALL_EXPR_ARG (exp
, 0);
18256 arg1
= CALL_EXPR_ARG (exp
, 1);
18257 arg2
= CALL_EXPR_ARG (exp
, 2);
18258 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18259 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18260 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18261 tmode
= insn_data
[icode
].operand
[0].mode
;
18262 mode1
= insn_data
[icode
].operand
[1].mode
;
18263 mode2
= insn_data
[icode
].operand
[2].mode
;
18264 mode3
= insn_data
[icode
].operand
[3].mode
;
18266 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18268 op0
= copy_to_reg (op0
);
18269 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18271 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18273 op1
= copy_to_reg (op1
);
18274 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18276 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18278 error ("shift must be an immediate");
18281 target
= gen_reg_rtx (mode
);
18282 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18289 case IX86_BUILTIN_MOVNTSD
:
18290 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18292 case IX86_BUILTIN_MOVNTSS
:
18293 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18295 case IX86_BUILTIN_INSERTQ
:
18296 case IX86_BUILTIN_EXTRQ
:
18297 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18298 ? CODE_FOR_sse4a_extrq
18299 : CODE_FOR_sse4a_insertq
);
18300 arg0
= CALL_EXPR_ARG (exp
, 0);
18301 arg1
= CALL_EXPR_ARG (exp
, 1);
18302 op0
= expand_normal (arg0
);
18303 op1
= expand_normal (arg1
);
18304 tmode
= insn_data
[icode
].operand
[0].mode
;
18305 mode1
= insn_data
[icode
].operand
[1].mode
;
18306 mode2
= insn_data
[icode
].operand
[2].mode
;
18307 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18308 op0
= copy_to_mode_reg (mode1
, op0
);
18309 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18310 op1
= copy_to_mode_reg (mode2
, op1
);
18311 if (optimize
|| target
== 0
18312 || GET_MODE (target
) != tmode
18313 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18314 target
= gen_reg_rtx (tmode
);
18315 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18321 case IX86_BUILTIN_EXTRQI
:
18322 icode
= CODE_FOR_sse4a_extrqi
;
18323 arg0
= CALL_EXPR_ARG (exp
, 0);
18324 arg1
= CALL_EXPR_ARG (exp
, 1);
18325 arg2
= CALL_EXPR_ARG (exp
, 2);
18326 op0
= expand_normal (arg0
);
18327 op1
= expand_normal (arg1
);
18328 op2
= expand_normal (arg2
);
18329 tmode
= insn_data
[icode
].operand
[0].mode
;
18330 mode1
= insn_data
[icode
].operand
[1].mode
;
18331 mode2
= insn_data
[icode
].operand
[2].mode
;
18332 mode3
= insn_data
[icode
].operand
[3].mode
;
18333 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18334 op0
= copy_to_mode_reg (mode1
, op0
);
18335 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18337 error ("index mask must be an immediate");
18338 return gen_reg_rtx (tmode
);
18340 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18342 error ("length mask must be an immediate");
18343 return gen_reg_rtx (tmode
);
18345 if (optimize
|| target
== 0
18346 || GET_MODE (target
) != tmode
18347 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18348 target
= gen_reg_rtx (tmode
);
18349 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18355 case IX86_BUILTIN_INSERTQI
:
18356 icode
= CODE_FOR_sse4a_insertqi
;
18357 arg0
= CALL_EXPR_ARG (exp
, 0);
18358 arg1
= CALL_EXPR_ARG (exp
, 1);
18359 arg2
= CALL_EXPR_ARG (exp
, 2);
18360 arg3
= CALL_EXPR_ARG (exp
, 3);
18361 op0
= expand_normal (arg0
);
18362 op1
= expand_normal (arg1
);
18363 op2
= expand_normal (arg2
);
18364 op3
= expand_normal (arg3
);
18365 tmode
= insn_data
[icode
].operand
[0].mode
;
18366 mode1
= insn_data
[icode
].operand
[1].mode
;
18367 mode2
= insn_data
[icode
].operand
[2].mode
;
18368 mode3
= insn_data
[icode
].operand
[3].mode
;
18369 mode4
= insn_data
[icode
].operand
[4].mode
;
18371 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18372 op0
= copy_to_mode_reg (mode1
, op0
);
18374 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18375 op1
= copy_to_mode_reg (mode2
, op1
);
18377 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18379 error ("index mask must be an immediate");
18380 return gen_reg_rtx (tmode
);
18382 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18384 error ("length mask must be an immediate");
18385 return gen_reg_rtx (tmode
);
18387 if (optimize
|| target
== 0
18388 || GET_MODE (target
) != tmode
18389 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18390 target
= gen_reg_rtx (tmode
);
18391 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18397 case IX86_BUILTIN_VEC_INIT_V2SI
:
18398 case IX86_BUILTIN_VEC_INIT_V4HI
:
18399 case IX86_BUILTIN_VEC_INIT_V8QI
:
18400 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18402 case IX86_BUILTIN_VEC_EXT_V2DF
:
18403 case IX86_BUILTIN_VEC_EXT_V2DI
:
18404 case IX86_BUILTIN_VEC_EXT_V4SF
:
18405 case IX86_BUILTIN_VEC_EXT_V4SI
:
18406 case IX86_BUILTIN_VEC_EXT_V8HI
:
18407 case IX86_BUILTIN_VEC_EXT_V2SI
:
18408 case IX86_BUILTIN_VEC_EXT_V4HI
:
18409 return ix86_expand_vec_ext_builtin (exp
, target
);
18411 case IX86_BUILTIN_VEC_SET_V8HI
:
18412 case IX86_BUILTIN_VEC_SET_V4HI
:
18413 return ix86_expand_vec_set_builtin (exp
);
18419 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18420 if (d
->code
== fcode
)
18422 /* Compares are treated specially. */
18423 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18424 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18425 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18426 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18427 return ix86_expand_sse_compare (d
, exp
, target
);
18429 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18432 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18433 if (d
->code
== fcode
)
18434 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18436 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18437 if (d
->code
== fcode
)
18438 return ix86_expand_sse_comi (d
, exp
, target
);
18440 gcc_unreachable ();
18443 /* Returns a function decl for a vectorized version of the builtin function
18444 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18445 if it is not available. */
18448 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18451 enum machine_mode in_mode
, out_mode
;
18454 if (TREE_CODE (type_out
) != VECTOR_TYPE
18455 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18458 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18459 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18460 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18461 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18465 case BUILT_IN_SQRT
:
18466 if (out_mode
== DFmode
&& out_n
== 2
18467 && in_mode
== DFmode
&& in_n
== 2)
18468 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18471 case BUILT_IN_SQRTF
:
18472 if (out_mode
== SFmode
&& out_n
== 4
18473 && in_mode
== SFmode
&& in_n
== 4)
18474 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18477 case BUILT_IN_LRINTF
:
18478 if (out_mode
== SImode
&& out_n
== 4
18479 && in_mode
== SFmode
&& in_n
== 4)
18480 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18490 /* Returns a decl of a function that implements conversion of the
18491 input vector of type TYPE, or NULL_TREE if it is not available. */
18494 ix86_builtin_conversion (enum tree_code code
, tree type
)
18496 if (TREE_CODE (type
) != VECTOR_TYPE
)
18502 switch (TYPE_MODE (type
))
18505 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18510 case FIX_TRUNC_EXPR
:
18511 switch (TYPE_MODE (type
))
18514 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18524 /* Store OPERAND to the memory after reload is completed. This means
18525 that we can't easily use assign_stack_local. */
18527 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18531 gcc_assert (reload_completed
);
18532 if (TARGET_RED_ZONE
)
18534 result
= gen_rtx_MEM (mode
,
18535 gen_rtx_PLUS (Pmode
,
18537 GEN_INT (-RED_ZONE_SIZE
)));
18538 emit_move_insn (result
, operand
);
18540 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18546 operand
= gen_lowpart (DImode
, operand
);
18550 gen_rtx_SET (VOIDmode
,
18551 gen_rtx_MEM (DImode
,
18552 gen_rtx_PRE_DEC (DImode
,
18553 stack_pointer_rtx
)),
18557 gcc_unreachable ();
18559 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18568 split_di (&operand
, 1, operands
, operands
+ 1);
18570 gen_rtx_SET (VOIDmode
,
18571 gen_rtx_MEM (SImode
,
18572 gen_rtx_PRE_DEC (Pmode
,
18573 stack_pointer_rtx
)),
18576 gen_rtx_SET (VOIDmode
,
18577 gen_rtx_MEM (SImode
,
18578 gen_rtx_PRE_DEC (Pmode
,
18579 stack_pointer_rtx
)),
18584 /* Store HImodes as SImodes. */
18585 operand
= gen_lowpart (SImode
, operand
);
18589 gen_rtx_SET (VOIDmode
,
18590 gen_rtx_MEM (GET_MODE (operand
),
18591 gen_rtx_PRE_DEC (SImode
,
18592 stack_pointer_rtx
)),
18596 gcc_unreachable ();
18598 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18603 /* Free operand from the memory. */
18605 ix86_free_from_memory (enum machine_mode mode
)
18607 if (!TARGET_RED_ZONE
)
18611 if (mode
== DImode
|| TARGET_64BIT
)
18615 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18616 to pop or add instruction if registers are available. */
18617 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18618 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18623 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18624 QImode must go into class Q_REGS.
18625 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18626 movdf to do mem-to-mem moves through integer regs. */
18628 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18630 enum machine_mode mode
= GET_MODE (x
);
18632 /* We're only allowed to return a subclass of CLASS. Many of the
18633 following checks fail for NO_REGS, so eliminate that early. */
18634 if (class == NO_REGS
)
18637 /* All classes can load zeros. */
18638 if (x
== CONST0_RTX (mode
))
18641 /* Force constants into memory if we are loading a (nonzero) constant into
18642 an MMX or SSE register. This is because there are no MMX/SSE instructions
18643 to load from a constant. */
18645 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18648 /* Prefer SSE regs only, if we can use them for math. */
18649 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18650 return SSE_CLASS_P (class) ? class : NO_REGS
;
18652 /* Floating-point constants need more complex checks. */
18653 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18655 /* General regs can load everything. */
18656 if (reg_class_subset_p (class, GENERAL_REGS
))
18659 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18660 zero above. We only want to wind up preferring 80387 registers if
18661 we plan on doing computation with them. */
18663 && standard_80387_constant_p (x
))
18665 /* Limit class to non-sse. */
18666 if (class == FLOAT_SSE_REGS
)
18668 if (class == FP_TOP_SSE_REGS
)
18670 if (class == FP_SECOND_SSE_REGS
)
18671 return FP_SECOND_REG
;
18672 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18679 /* Generally when we see PLUS here, it's the function invariant
18680 (plus soft-fp const_int). Which can only be computed into general
18682 if (GET_CODE (x
) == PLUS
)
18683 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18685 /* QImode constants are easy to load, but non-constant QImode data
18686 must go into Q_REGS. */
18687 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18689 if (reg_class_subset_p (class, Q_REGS
))
18691 if (reg_class_subset_p (Q_REGS
, class))
18699 /* Discourage putting floating-point values in SSE registers unless
18700 SSE math is being used, and likewise for the 387 registers. */
18702 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18704 enum machine_mode mode
= GET_MODE (x
);
18706 /* Restrict the output reload class to the register bank that we are doing
18707 math on. If we would like not to return a subset of CLASS, reject this
18708 alternative: if reload cannot do this, it will still use its choice. */
18709 mode
= GET_MODE (x
);
18710 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18711 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18713 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18715 if (class == FP_TOP_SSE_REGS
)
18717 else if (class == FP_SECOND_SSE_REGS
)
18718 return FP_SECOND_REG
;
18720 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18726 /* If we are copying between general and FP registers, we need a memory
18727 location. The same is true for SSE and MMX registers.
18729 The macro can't work reliably when one of the CLASSES is class containing
18730 registers from multiple units (SSE, MMX, integer). We avoid this by never
18731 combining those units in single alternative in the machine description.
18732 Ensure that this constraint holds to avoid unexpected surprises.
18734 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18735 enforce these sanity checks. */
18738 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18739 enum machine_mode mode
, int strict
)
18741 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18742 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18743 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18744 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18745 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18746 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18748 gcc_assert (!strict
);
18752 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18755 /* ??? This is a lie. We do have moves between mmx/general, and for
18756 mmx/sse2. But by saying we need secondary memory we discourage the
18757 register allocator from using the mmx registers unless needed. */
18758 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18761 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18763 /* SSE1 doesn't have any direct moves from other classes. */
18767 /* If the target says that inter-unit moves are more expensive
18768 than moving through memory, then don't generate them. */
18769 if (!TARGET_INTER_UNIT_MOVES
)
18772 /* Between SSE and general, we have moves no larger than word size. */
18773 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18780 /* Return true if the registers in CLASS cannot represent the change from
18781 modes FROM to TO. */
18784 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18785 enum reg_class
class)
18790 /* x87 registers can't do subreg at all, as all values are reformatted
18791 to extended precision. */
18792 if (MAYBE_FLOAT_CLASS_P (class))
18795 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18797 /* Vector registers do not support QI or HImode loads. If we don't
18798 disallow a change to these modes, reload will assume it's ok to
18799 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18800 the vec_dupv4hi pattern. */
18801 if (GET_MODE_SIZE (from
) < 4)
18804 /* Vector registers do not support subreg with nonzero offsets, which
18805 are otherwise valid for integer registers. Since we can't see
18806 whether we have a nonzero offset from here, prohibit all
18807 nonparadoxical subregs changing size. */
18808 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18815 /* Return the cost of moving data from a register in class CLASS1 to
18816 one in class CLASS2.
18818 It is not required that the cost always equal 2 when FROM is the same as TO;
18819 on some machines it is expensive to move between registers if they are not
18820 general registers. */
18823 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18824 enum reg_class class2
)
18826 /* In case we require secondary memory, compute cost of the store followed
18827 by load. In order to avoid bad register allocation choices, we need
18828 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18830 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18834 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18835 MEMORY_MOVE_COST (mode
, class1
, 1));
18836 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18837 MEMORY_MOVE_COST (mode
, class2
, 1));
18839 /* In case of copying from general_purpose_register we may emit multiple
18840 stores followed by single load causing memory size mismatch stall.
18841 Count this as arbitrarily high cost of 20. */
18842 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18845 /* In the case of FP/MMX moves, the registers actually overlap, and we
18846 have to switch modes in order to treat them differently. */
18847 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18848 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18854 /* Moves between SSE/MMX and integer unit are expensive. */
18855 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18856 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18857 return ix86_cost
->mmxsse_to_integer
;
18858 if (MAYBE_FLOAT_CLASS_P (class1
))
18859 return ix86_cost
->fp_move
;
18860 if (MAYBE_SSE_CLASS_P (class1
))
18861 return ix86_cost
->sse_move
;
18862 if (MAYBE_MMX_CLASS_P (class1
))
18863 return ix86_cost
->mmx_move
;
18867 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18870 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18872 /* Flags and only flags can only hold CCmode values. */
18873 if (CC_REGNO_P (regno
))
18874 return GET_MODE_CLASS (mode
) == MODE_CC
;
18875 if (GET_MODE_CLASS (mode
) == MODE_CC
18876 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18877 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18879 if (FP_REGNO_P (regno
))
18880 return VALID_FP_MODE_P (mode
);
18881 if (SSE_REGNO_P (regno
))
18883 /* We implement the move patterns for all vector modes into and
18884 out of SSE registers, even when no operation instructions
18886 return (VALID_SSE_REG_MODE (mode
)
18887 || VALID_SSE2_REG_MODE (mode
)
18888 || VALID_MMX_REG_MODE (mode
)
18889 || VALID_MMX_REG_MODE_3DNOW (mode
));
18891 if (MMX_REGNO_P (regno
))
18893 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18894 so if the register is available at all, then we can move data of
18895 the given mode into or out of it. */
18896 return (VALID_MMX_REG_MODE (mode
)
18897 || VALID_MMX_REG_MODE_3DNOW (mode
));
18900 if (mode
== QImode
)
18902 /* Take care for QImode values - they can be in non-QI regs,
18903 but then they do cause partial register stalls. */
18904 if (regno
< 4 || TARGET_64BIT
)
18906 if (!TARGET_PARTIAL_REG_STALL
)
18908 return reload_in_progress
|| reload_completed
;
18910 /* We handle both integer and floats in the general purpose registers. */
18911 else if (VALID_INT_MODE_P (mode
))
18913 else if (VALID_FP_MODE_P (mode
))
18915 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18916 on to use that value in smaller contexts, this can easily force a
18917 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18918 supporting DImode, allow it. */
18919 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18925 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18926 tieable integer mode. */
18929 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18938 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18941 return TARGET_64BIT
;
18948 /* Return true if MODE1 is accessible in a register that can hold MODE2
18949 without copying. That is, all register classes that can hold MODE2
18950 can also hold MODE1. */
18953 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18955 if (mode1
== mode2
)
18958 if (ix86_tieable_integer_mode_p (mode1
)
18959 && ix86_tieable_integer_mode_p (mode2
))
18962 /* MODE2 being XFmode implies fp stack or general regs, which means we
18963 can tie any smaller floating point modes to it. Note that we do not
18964 tie this with TFmode. */
18965 if (mode2
== XFmode
)
18966 return mode1
== SFmode
|| mode1
== DFmode
;
18968 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18969 that we can tie it with SFmode. */
18970 if (mode2
== DFmode
)
18971 return mode1
== SFmode
;
18973 /* If MODE2 is only appropriate for an SSE register, then tie with
18974 any other mode acceptable to SSE registers. */
18975 if (GET_MODE_SIZE (mode2
) == 16
18976 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18977 return (GET_MODE_SIZE (mode1
) == 16
18978 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18980 /* If MODE2 is appropriate for an MMX register, then tie
18981 with any other mode acceptable to MMX registers. */
18982 if (GET_MODE_SIZE (mode2
) == 8
18983 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18984 return (GET_MODE_SIZE (mode1
) == 8
18985 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18990 /* Return the cost of moving data of mode M between a
18991 register and memory. A value of 2 is the default; this cost is
18992 relative to those in `REGISTER_MOVE_COST'.
18994 If moving between registers and memory is more expensive than
18995 between two registers, you should define this macro to express the
18998 Model also increased moving costs of QImode registers in non
19002 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
19004 if (FLOAT_CLASS_P (class))
19021 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
19023 if (SSE_CLASS_P (class))
19026 switch (GET_MODE_SIZE (mode
))
19040 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
19042 if (MMX_CLASS_P (class))
19045 switch (GET_MODE_SIZE (mode
))
19056 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
19058 switch (GET_MODE_SIZE (mode
))
19062 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
19063 : ix86_cost
->movzbl_load
);
19065 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
19066 : ix86_cost
->int_store
[0] + 4);
19069 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
19071 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19072 if (mode
== TFmode
)
19074 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
19075 * (((int) GET_MODE_SIZE (mode
)
19076 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
19080 /* Compute a (partial) cost for rtx X. Return true if the complete
19081 cost has been computed, and false if subexpressions should be
19082 scanned. In either case, *TOTAL contains the cost result. */
19085 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19087 enum machine_mode mode
= GET_MODE (x
);
19095 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19097 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19099 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19101 || (!GET_CODE (x
) != LABEL_REF
19102 && (GET_CODE (x
) != SYMBOL_REF
19103 || !SYMBOL_REF_LOCAL_P (x
)))))
19110 if (mode
== VOIDmode
)
19113 switch (standard_80387_constant_p (x
))
19118 default: /* Other constants */
19123 /* Start with (MEM (SYMBOL_REF)), since that's where
19124 it'll probably end up. Add a penalty for size. */
19125 *total
= (COSTS_N_INSNS (1)
19126 + (flag_pic
!= 0 && !TARGET_64BIT
)
19127 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19133 /* The zero extensions is often completely free on x86_64, so make
19134 it as cheap as possible. */
19135 if (TARGET_64BIT
&& mode
== DImode
19136 && GET_MODE (XEXP (x
, 0)) == SImode
)
19138 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19139 *total
= ix86_cost
->add
;
19141 *total
= ix86_cost
->movzx
;
19145 *total
= ix86_cost
->movsx
;
19149 if (CONST_INT_P (XEXP (x
, 1))
19150 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19152 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19155 *total
= ix86_cost
->add
;
19158 if ((value
== 2 || value
== 3)
19159 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19161 *total
= ix86_cost
->lea
;
19171 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19173 if (CONST_INT_P (XEXP (x
, 1)))
19175 if (INTVAL (XEXP (x
, 1)) > 32)
19176 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19178 *total
= ix86_cost
->shift_const
* 2;
19182 if (GET_CODE (XEXP (x
, 1)) == AND
)
19183 *total
= ix86_cost
->shift_var
* 2;
19185 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19190 if (CONST_INT_P (XEXP (x
, 1)))
19191 *total
= ix86_cost
->shift_const
;
19193 *total
= ix86_cost
->shift_var
;
19198 if (FLOAT_MODE_P (mode
))
19200 *total
= ix86_cost
->fmul
;
19205 rtx op0
= XEXP (x
, 0);
19206 rtx op1
= XEXP (x
, 1);
19208 if (CONST_INT_P (XEXP (x
, 1)))
19210 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19211 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19215 /* This is arbitrary. */
19218 /* Compute costs correctly for widening multiplication. */
19219 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19220 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19221 == GET_MODE_SIZE (mode
))
19223 int is_mulwiden
= 0;
19224 enum machine_mode inner_mode
= GET_MODE (op0
);
19226 if (GET_CODE (op0
) == GET_CODE (op1
))
19227 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19228 else if (CONST_INT_P (op1
))
19230 if (GET_CODE (op0
) == SIGN_EXTEND
)
19231 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19234 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19238 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19241 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19242 + nbits
* ix86_cost
->mult_bit
19243 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19252 if (FLOAT_MODE_P (mode
))
19253 *total
= ix86_cost
->fdiv
;
19255 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19259 if (FLOAT_MODE_P (mode
))
19260 *total
= ix86_cost
->fadd
;
19261 else if (GET_MODE_CLASS (mode
) == MODE_INT
19262 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19264 if (GET_CODE (XEXP (x
, 0)) == PLUS
19265 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19266 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19267 && CONSTANT_P (XEXP (x
, 1)))
19269 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19270 if (val
== 2 || val
== 4 || val
== 8)
19272 *total
= ix86_cost
->lea
;
19273 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19274 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19276 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19280 else if (GET_CODE (XEXP (x
, 0)) == MULT
19281 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19283 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19284 if (val
== 2 || val
== 4 || val
== 8)
19286 *total
= ix86_cost
->lea
;
19287 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19288 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19292 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19294 *total
= ix86_cost
->lea
;
19295 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19296 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19297 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19304 if (FLOAT_MODE_P (mode
))
19306 *total
= ix86_cost
->fadd
;
19314 if (!TARGET_64BIT
&& mode
== DImode
)
19316 *total
= (ix86_cost
->add
* 2
19317 + (rtx_cost (XEXP (x
, 0), outer_code
)
19318 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19319 + (rtx_cost (XEXP (x
, 1), outer_code
)
19320 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19326 if (FLOAT_MODE_P (mode
))
19328 *total
= ix86_cost
->fchs
;
19334 if (!TARGET_64BIT
&& mode
== DImode
)
19335 *total
= ix86_cost
->add
* 2;
19337 *total
= ix86_cost
->add
;
19341 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19342 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19343 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19344 && XEXP (x
, 1) == const0_rtx
)
19346 /* This kind of construct is implemented using test[bwl].
19347 Treat it as if we had an AND. */
19348 *total
= (ix86_cost
->add
19349 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19350 + rtx_cost (const1_rtx
, outer_code
));
19356 if (!TARGET_SSE_MATH
19358 || (mode
== DFmode
&& !TARGET_SSE2
))
19363 if (FLOAT_MODE_P (mode
))
19364 *total
= ix86_cost
->fabs
;
19368 if (FLOAT_MODE_P (mode
))
19369 *total
= ix86_cost
->fsqrt
;
19373 if (XINT (x
, 1) == UNSPEC_TP
)
19384 static int current_machopic_label_num
;
19386 /* Given a symbol name and its associated stub, write out the
19387 definition of the stub. */
19390 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19392 unsigned int length
;
19393 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19394 int label
= ++current_machopic_label_num
;
19396 /* For 64-bit we shouldn't get here. */
19397 gcc_assert (!TARGET_64BIT
);
19399 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19400 symb
= (*targetm
.strip_name_encoding
) (symb
);
19402 length
= strlen (stub
);
19403 binder_name
= alloca (length
+ 32);
19404 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19406 length
= strlen (symb
);
19407 symbol_name
= alloca (length
+ 32);
19408 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19410 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19413 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19415 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19417 fprintf (file
, "%s:\n", stub
);
19418 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19422 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19423 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19424 fprintf (file
, "\tjmp\t*%%edx\n");
19427 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19429 fprintf (file
, "%s:\n", binder_name
);
19433 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19434 fprintf (file
, "\tpushl\t%%eax\n");
19437 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19439 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19441 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19442 fprintf (file
, "%s:\n", lazy_ptr_name
);
19443 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19444 fprintf (file
, "\t.long %s\n", binder_name
);
19448 darwin_x86_file_end (void)
19450 darwin_file_end ();
19453 #endif /* TARGET_MACHO */
19455 /* Order the registers for register allocator. */
19458 x86_order_regs_for_local_alloc (void)
19463 /* First allocate the local general purpose registers. */
19464 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19465 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19466 reg_alloc_order
[pos
++] = i
;
19468 /* Global general purpose registers. */
19469 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19470 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19471 reg_alloc_order
[pos
++] = i
;
19473 /* x87 registers come first in case we are doing FP math
19475 if (!TARGET_SSE_MATH
)
19476 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19477 reg_alloc_order
[pos
++] = i
;
19479 /* SSE registers. */
19480 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19481 reg_alloc_order
[pos
++] = i
;
19482 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19483 reg_alloc_order
[pos
++] = i
;
19485 /* x87 registers. */
19486 if (TARGET_SSE_MATH
)
19487 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19488 reg_alloc_order
[pos
++] = i
;
19490 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19491 reg_alloc_order
[pos
++] = i
;
19493 /* Initialize the rest of array as we do not allocate some registers
19495 while (pos
< FIRST_PSEUDO_REGISTER
)
19496 reg_alloc_order
[pos
++] = 0;
19499 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19500 struct attribute_spec.handler. */
19502 ix86_handle_struct_attribute (tree
*node
, tree name
,
19503 tree args ATTRIBUTE_UNUSED
,
19504 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19507 if (DECL_P (*node
))
19509 if (TREE_CODE (*node
) == TYPE_DECL
)
19510 type
= &TREE_TYPE (*node
);
19515 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19516 || TREE_CODE (*type
) == UNION_TYPE
)))
19518 warning (OPT_Wattributes
, "%qs attribute ignored",
19519 IDENTIFIER_POINTER (name
));
19520 *no_add_attrs
= true;
19523 else if ((is_attribute_p ("ms_struct", name
)
19524 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19525 || ((is_attribute_p ("gcc_struct", name
)
19526 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19528 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19529 IDENTIFIER_POINTER (name
));
19530 *no_add_attrs
= true;
19537 ix86_ms_bitfield_layout_p (tree record_type
)
19539 return (TARGET_MS_BITFIELD_LAYOUT
&&
19540 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19541 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19544 /* Returns an expression indicating where the this parameter is
19545 located on entry to the FUNCTION. */
19548 x86_this_parameter (tree function
)
19550 tree type
= TREE_TYPE (function
);
19554 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19555 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19558 if (ix86_function_regparm (type
, function
) > 0)
19562 parm
= TYPE_ARG_TYPES (type
);
19563 /* Figure out whether or not the function has a variable number of
19565 for (; parm
; parm
= TREE_CHAIN (parm
))
19566 if (TREE_VALUE (parm
) == void_type_node
)
19568 /* If not, the this parameter is in the first argument. */
19572 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19574 return gen_rtx_REG (SImode
, regno
);
19578 if (aggregate_value_p (TREE_TYPE (type
), type
))
19579 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19581 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19584 /* Determine whether x86_output_mi_thunk can succeed. */
19587 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19588 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19589 HOST_WIDE_INT vcall_offset
, tree function
)
19591 /* 64-bit can handle anything. */
19595 /* For 32-bit, everything's fine if we have one free register. */
19596 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19599 /* Need a free register for vcall_offset. */
19603 /* Need a free register for GOT references. */
19604 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19607 /* Otherwise ok. */
19611 /* Output the assembler code for a thunk function. THUNK_DECL is the
19612 declaration for the thunk function itself, FUNCTION is the decl for
19613 the target function. DELTA is an immediate constant offset to be
19614 added to THIS. If VCALL_OFFSET is nonzero, the word at
19615 *(*this + vcall_offset) should be added to THIS. */
19618 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19619 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19620 HOST_WIDE_INT vcall_offset
, tree function
)
19623 rtx
this = x86_this_parameter (function
);
19626 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19627 pull it in now and let DELTA benefit. */
19630 else if (vcall_offset
)
19632 /* Put the this parameter into %eax. */
19634 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19635 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19638 this_reg
= NULL_RTX
;
19640 /* Adjust the this parameter by a fixed constant. */
19643 xops
[0] = GEN_INT (delta
);
19644 xops
[1] = this_reg
? this_reg
: this;
19647 if (!x86_64_general_operand (xops
[0], DImode
))
19649 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19651 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19655 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19658 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19661 /* Adjust the this parameter by a value stored in the vtable. */
19665 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19668 int tmp_regno
= 2 /* ECX */;
19669 if (lookup_attribute ("fastcall",
19670 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19671 tmp_regno
= 0 /* EAX */;
19672 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19675 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19678 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19680 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19682 /* Adjust the this parameter. */
19683 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19684 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19686 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19687 xops
[0] = GEN_INT (vcall_offset
);
19689 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19690 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19692 xops
[1] = this_reg
;
19694 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19696 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19699 /* If necessary, drop THIS back to its stack slot. */
19700 if (this_reg
&& this_reg
!= this)
19702 xops
[0] = this_reg
;
19704 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19707 xops
[0] = XEXP (DECL_RTL (function
), 0);
19710 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19711 output_asm_insn ("jmp\t%P0", xops
);
19714 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19715 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19716 tmp
= gen_rtx_MEM (QImode
, tmp
);
19718 output_asm_insn ("jmp\t%A0", xops
);
19723 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19724 output_asm_insn ("jmp\t%P0", xops
);
19729 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19730 tmp
= (gen_rtx_SYMBOL_REF
19732 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19733 tmp
= gen_rtx_MEM (QImode
, tmp
);
19735 output_asm_insn ("jmp\t%0", xops
);
19738 #endif /* TARGET_MACHO */
19740 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19741 output_set_got (tmp
, NULL_RTX
);
19744 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19745 output_asm_insn ("jmp\t{*}%1", xops
);
19751 x86_file_start (void)
19753 default_file_start ();
19755 darwin_file_start ();
19757 if (X86_FILE_START_VERSION_DIRECTIVE
)
19758 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19759 if (X86_FILE_START_FLTUSED
)
19760 fputs ("\t.global\t__fltused\n", asm_out_file
);
19761 if (ix86_asm_dialect
== ASM_INTEL
)
19762 fputs ("\t.intel_syntax\n", asm_out_file
);
19766 x86_field_alignment (tree field
, int computed
)
19768 enum machine_mode mode
;
19769 tree type
= TREE_TYPE (field
);
19771 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19773 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19774 ? get_inner_array_type (type
) : type
);
19775 if (mode
== DFmode
|| mode
== DCmode
19776 || GET_MODE_CLASS (mode
) == MODE_INT
19777 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19778 return MIN (32, computed
);
19782 /* Output assembler code to FILE to increment profiler label # LABELNO
19783 for profiling a function entry. */
19785 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19790 #ifndef NO_PROFILE_COUNTERS
19791 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19793 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19797 #ifndef NO_PROFILE_COUNTERS
19798 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19800 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19804 #ifndef NO_PROFILE_COUNTERS
19805 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19806 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19808 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19812 #ifndef NO_PROFILE_COUNTERS
19813 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19814 PROFILE_COUNT_REGISTER
);
19816 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19820 /* We don't have exact information about the insn sizes, but we may assume
19821 quite safely that we are informed about all 1 byte insns and memory
19822 address sizes. This is enough to eliminate unnecessary padding in
19826 min_insn_size (rtx insn
)
19830 if (!INSN_P (insn
) || !active_insn_p (insn
))
19833 /* Discard alignments we've emit and jump instructions. */
19834 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19835 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19838 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19839 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19842 /* Important case - calls are always 5 bytes.
19843 It is common to have many calls in the row. */
19845 && symbolic_reference_mentioned_p (PATTERN (insn
))
19846 && !SIBLING_CALL_P (insn
))
19848 if (get_attr_length (insn
) <= 1)
19851 /* For normal instructions we may rely on the sizes of addresses
19852 and the presence of symbol to require 4 bytes of encoding.
19853 This is not the case for jumps where references are PC relative. */
19854 if (!JUMP_P (insn
))
19856 l
= get_attr_length_address (insn
);
19857 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19866 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19870 ix86_avoid_jump_misspredicts (void)
19872 rtx insn
, start
= get_insns ();
19873 int nbytes
= 0, njumps
= 0;
19876 /* Look for all minimal intervals of instructions containing 4 jumps.
19877 The intervals are bounded by START and INSN. NBYTES is the total
19878 size of instructions in the interval including INSN and not including
19879 START. When the NBYTES is smaller than 16 bytes, it is possible
19880 that the end of START and INSN ends up in the same 16byte page.
19882 The smallest offset in the page INSN can start is the case where START
19883 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19884 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19886 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19889 nbytes
+= min_insn_size (insn
);
19891 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19892 INSN_UID (insn
), min_insn_size (insn
));
19894 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19895 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19903 start
= NEXT_INSN (start
);
19904 if ((JUMP_P (start
)
19905 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19906 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19908 njumps
--, isjump
= 1;
19911 nbytes
-= min_insn_size (start
);
19913 gcc_assert (njumps
>= 0);
19915 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19916 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19918 if (njumps
== 3 && isjump
&& nbytes
< 16)
19920 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19923 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19924 INSN_UID (insn
), padsize
);
19925 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19930 /* AMD Athlon works faster
19931 when RET is not destination of conditional jump or directly preceded
19932 by other jump instruction. We avoid the penalty by inserting NOP just
19933 before the RET instructions in such cases. */
19935 ix86_pad_returns (void)
19940 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19942 basic_block bb
= e
->src
;
19943 rtx ret
= BB_END (bb
);
19945 bool replace
= false;
19947 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19948 || !maybe_hot_bb_p (bb
))
19950 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19951 if (active_insn_p (prev
) || LABEL_P (prev
))
19953 if (prev
&& LABEL_P (prev
))
19958 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19959 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19960 && !(e
->flags
& EDGE_FALLTHRU
))
19965 prev
= prev_active_insn (ret
);
19967 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19970 /* Empty functions get branch mispredict even when the jump destination
19971 is not visible to us. */
19972 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19977 emit_insn_before (gen_return_internal_long (), ret
);
19983 /* Implement machine specific optimizations. We implement padding of returns
19984 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19988 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19989 ix86_pad_returns ();
19990 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
19991 ix86_avoid_jump_misspredicts ();
19994 /* Return nonzero when QImode register that must be represented via REX prefix
19997 x86_extended_QIreg_mentioned_p (rtx insn
)
20000 extract_insn_cached (insn
);
20001 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20002 if (REG_P (recog_data
.operand
[i
])
20003 && REGNO (recog_data
.operand
[i
]) >= 4)
20008 /* Return nonzero when P points to register encoded via REX prefix.
20009 Called via for_each_rtx. */
20011 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
20013 unsigned int regno
;
20016 regno
= REGNO (*p
);
20017 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
20020 /* Return true when INSN mentions register that must be encoded using REX
20023 x86_extended_reg_mentioned_p (rtx insn
)
20025 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
20028 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20029 optabs would emit if we didn't have TFmode patterns. */
20032 x86_emit_floatuns (rtx operands
[2])
20034 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
20035 enum machine_mode mode
, inmode
;
20037 inmode
= GET_MODE (operands
[1]);
20038 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20041 in
= force_reg (inmode
, operands
[1]);
20042 mode
= GET_MODE (out
);
20043 neglab
= gen_label_rtx ();
20044 donelab
= gen_label_rtx ();
20045 f0
= gen_reg_rtx (mode
);
20047 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20049 expand_float (out
, in
, 0);
20051 emit_jump_insn (gen_jump (donelab
));
20054 emit_label (neglab
);
20056 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20058 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20060 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20062 expand_float (f0
, i0
, 0);
20064 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20066 emit_label (donelab
);
20069 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20070 with all elements equal to VAR. Return true if successful. */
20073 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
20074 rtx target
, rtx val
)
20076 enum machine_mode smode
, wsmode
, wvmode
;
20091 val
= force_reg (GET_MODE_INNER (mode
), val
);
20092 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20093 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20099 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20101 val
= gen_lowpart (SImode
, val
);
20102 x
= gen_rtx_TRUNCATE (HImode
, val
);
20103 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20104 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20126 /* Extend HImode to SImode using a paradoxical SUBREG. */
20127 tmp1
= gen_reg_rtx (SImode
);
20128 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20129 /* Insert the SImode value as low element of V4SImode vector. */
20130 tmp2
= gen_reg_rtx (V4SImode
);
20131 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20132 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20133 CONST0_RTX (V4SImode
),
20135 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20136 /* Cast the V4SImode vector back to a V8HImode vector. */
20137 tmp1
= gen_reg_rtx (V8HImode
);
20138 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20139 /* Duplicate the low short through the whole low SImode word. */
20140 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20141 /* Cast the V8HImode vector back to a V4SImode vector. */
20142 tmp2
= gen_reg_rtx (V4SImode
);
20143 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20144 /* Replicate the low element of the V4SImode vector. */
20145 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20146 /* Cast the V2SImode back to V8HImode, and store in target. */
20147 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20158 /* Extend QImode to SImode using a paradoxical SUBREG. */
20159 tmp1
= gen_reg_rtx (SImode
);
20160 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20161 /* Insert the SImode value as low element of V4SImode vector. */
20162 tmp2
= gen_reg_rtx (V4SImode
);
20163 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20164 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20165 CONST0_RTX (V4SImode
),
20167 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20168 /* Cast the V4SImode vector back to a V16QImode vector. */
20169 tmp1
= gen_reg_rtx (V16QImode
);
20170 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20171 /* Duplicate the low byte through the whole low SImode word. */
20172 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20173 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20174 /* Cast the V16QImode vector back to a V4SImode vector. */
20175 tmp2
= gen_reg_rtx (V4SImode
);
20176 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20177 /* Replicate the low element of the V4SImode vector. */
20178 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20179 /* Cast the V2SImode back to V16QImode, and store in target. */
20180 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20188 /* Replicate the value once into the next wider mode and recurse. */
20189 val
= convert_modes (wsmode
, smode
, val
, true);
20190 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20191 GEN_INT (GET_MODE_BITSIZE (smode
)),
20192 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20193 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20195 x
= gen_reg_rtx (wvmode
);
20196 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20197 gcc_unreachable ();
20198 emit_move_insn (target
, gen_lowpart (mode
, x
));
20206 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20207 whose ONE_VAR element is VAR, and other elements are zero. Return true
20211 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20212 rtx target
, rtx var
, int one_var
)
20214 enum machine_mode vsimode
;
20230 var
= force_reg (GET_MODE_INNER (mode
), var
);
20231 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20232 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20237 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20238 new_target
= gen_reg_rtx (mode
);
20240 new_target
= target
;
20241 var
= force_reg (GET_MODE_INNER (mode
), var
);
20242 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20243 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20244 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20247 /* We need to shuffle the value to the correct position, so
20248 create a new pseudo to store the intermediate result. */
20250 /* With SSE2, we can use the integer shuffle insns. */
20251 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20253 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20255 GEN_INT (one_var
== 1 ? 0 : 1),
20256 GEN_INT (one_var
== 2 ? 0 : 1),
20257 GEN_INT (one_var
== 3 ? 0 : 1)));
20258 if (target
!= new_target
)
20259 emit_move_insn (target
, new_target
);
20263 /* Otherwise convert the intermediate result to V4SFmode and
20264 use the SSE1 shuffle instructions. */
20265 if (mode
!= V4SFmode
)
20267 tmp
= gen_reg_rtx (V4SFmode
);
20268 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20273 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20275 GEN_INT (one_var
== 1 ? 0 : 1),
20276 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20277 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20279 if (mode
!= V4SFmode
)
20280 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20281 else if (tmp
!= target
)
20282 emit_move_insn (target
, tmp
);
20284 else if (target
!= new_target
)
20285 emit_move_insn (target
, new_target
);
20290 vsimode
= V4SImode
;
20296 vsimode
= V2SImode
;
20302 /* Zero extend the variable element to SImode and recurse. */
20303 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20305 x
= gen_reg_rtx (vsimode
);
20306 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20308 gcc_unreachable ();
20310 emit_move_insn (target
, gen_lowpart (mode
, x
));
20318 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20319 consisting of the values in VALS. It is known that all elements
20320 except ONE_VAR are constants. Return true if successful. */
20323 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20324 rtx target
, rtx vals
, int one_var
)
20326 rtx var
= XVECEXP (vals
, 0, one_var
);
20327 enum machine_mode wmode
;
20330 const_vec
= copy_rtx (vals
);
20331 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20332 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20340 /* For the two element vectors, it's just as easy to use
20341 the general case. */
20357 /* There's no way to set one QImode entry easily. Combine
20358 the variable value with its adjacent constant value, and
20359 promote to an HImode set. */
20360 x
= XVECEXP (vals
, 0, one_var
^ 1);
20363 var
= convert_modes (HImode
, QImode
, var
, true);
20364 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20365 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20366 x
= GEN_INT (INTVAL (x
) & 0xff);
20370 var
= convert_modes (HImode
, QImode
, var
, true);
20371 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20373 if (x
!= const0_rtx
)
20374 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20375 1, OPTAB_LIB_WIDEN
);
20377 x
= gen_reg_rtx (wmode
);
20378 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20379 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20381 emit_move_insn (target
, gen_lowpart (mode
, x
));
20388 emit_move_insn (target
, const_vec
);
20389 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20393 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20394 all values variable, and none identical. */
20397 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20398 rtx target
, rtx vals
)
20400 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20401 rtx op0
= NULL
, op1
= NULL
;
20402 bool use_vec_concat
= false;
20408 if (!mmx_ok
&& !TARGET_SSE
)
20414 /* For the two element vectors, we always implement VEC_CONCAT. */
20415 op0
= XVECEXP (vals
, 0, 0);
20416 op1
= XVECEXP (vals
, 0, 1);
20417 use_vec_concat
= true;
20421 half_mode
= V2SFmode
;
20424 half_mode
= V2SImode
;
20430 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20431 Recurse to load the two halves. */
20433 op0
= gen_reg_rtx (half_mode
);
20434 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20435 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20437 op1
= gen_reg_rtx (half_mode
);
20438 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20439 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20441 use_vec_concat
= true;
20452 gcc_unreachable ();
20455 if (use_vec_concat
)
20457 if (!register_operand (op0
, half_mode
))
20458 op0
= force_reg (half_mode
, op0
);
20459 if (!register_operand (op1
, half_mode
))
20460 op1
= force_reg (half_mode
, op1
);
20462 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20463 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20467 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20468 enum machine_mode inner_mode
;
20469 rtx words
[4], shift
;
20471 inner_mode
= GET_MODE_INNER (mode
);
20472 n_elts
= GET_MODE_NUNITS (mode
);
20473 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20474 n_elt_per_word
= n_elts
/ n_words
;
20475 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20477 for (i
= 0; i
< n_words
; ++i
)
20479 rtx word
= NULL_RTX
;
20481 for (j
= 0; j
< n_elt_per_word
; ++j
)
20483 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20484 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20490 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20491 word
, 1, OPTAB_LIB_WIDEN
);
20492 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20493 word
, 1, OPTAB_LIB_WIDEN
);
20501 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20502 else if (n_words
== 2)
20504 rtx tmp
= gen_reg_rtx (mode
);
20505 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20506 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20507 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20508 emit_move_insn (target
, tmp
);
20510 else if (n_words
== 4)
20512 rtx tmp
= gen_reg_rtx (V4SImode
);
20513 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20514 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20515 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20518 gcc_unreachable ();
20522 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20523 instructions unless MMX_OK is true. */
20526 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20528 enum machine_mode mode
= GET_MODE (target
);
20529 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20530 int n_elts
= GET_MODE_NUNITS (mode
);
20531 int n_var
= 0, one_var
= -1;
20532 bool all_same
= true, all_const_zero
= true;
20536 for (i
= 0; i
< n_elts
; ++i
)
20538 x
= XVECEXP (vals
, 0, i
);
20539 if (!CONSTANT_P (x
))
20540 n_var
++, one_var
= i
;
20541 else if (x
!= CONST0_RTX (inner_mode
))
20542 all_const_zero
= false;
20543 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20547 /* Constants are best loaded from the constant pool. */
20550 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20554 /* If all values are identical, broadcast the value. */
20556 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20557 XVECEXP (vals
, 0, 0)))
20560 /* Values where only one field is non-constant are best loaded from
20561 the pool and overwritten via move later. */
20565 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20566 XVECEXP (vals
, 0, one_var
),
20570 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20574 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20578 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20580 enum machine_mode mode
= GET_MODE (target
);
20581 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20582 bool use_vec_merge
= false;
20591 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20592 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20594 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20596 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20597 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20607 /* For the two element vectors, we implement a VEC_CONCAT with
20608 the extraction of the other element. */
20610 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20611 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20614 op0
= val
, op1
= tmp
;
20616 op0
= tmp
, op1
= val
;
20618 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20619 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20627 use_vec_merge
= true;
20631 /* tmp = target = A B C D */
20632 tmp
= copy_to_reg (target
);
20633 /* target = A A B B */
20634 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20635 /* target = X A B B */
20636 ix86_expand_vector_set (false, target
, val
, 0);
20637 /* target = A X C D */
20638 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20639 GEN_INT (1), GEN_INT (0),
20640 GEN_INT (2+4), GEN_INT (3+4)));
20644 /* tmp = target = A B C D */
20645 tmp
= copy_to_reg (target
);
20646 /* tmp = X B C D */
20647 ix86_expand_vector_set (false, tmp
, val
, 0);
20648 /* target = A B X D */
20649 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20650 GEN_INT (0), GEN_INT (1),
20651 GEN_INT (0+4), GEN_INT (3+4)));
20655 /* tmp = target = A B C D */
20656 tmp
= copy_to_reg (target
);
20657 /* tmp = X B C D */
20658 ix86_expand_vector_set (false, tmp
, val
, 0);
20659 /* target = A B X D */
20660 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20661 GEN_INT (0), GEN_INT (1),
20662 GEN_INT (2+4), GEN_INT (0+4)));
20666 gcc_unreachable ();
20671 /* Element 0 handled by vec_merge below. */
20674 use_vec_merge
= true;
20680 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20681 store into element 0, then shuffle them back. */
20685 order
[0] = GEN_INT (elt
);
20686 order
[1] = const1_rtx
;
20687 order
[2] = const2_rtx
;
20688 order
[3] = GEN_INT (3);
20689 order
[elt
] = const0_rtx
;
20691 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20692 order
[1], order
[2], order
[3]));
20694 ix86_expand_vector_set (false, target
, val
, 0);
20696 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20697 order
[1], order
[2], order
[3]));
20701 /* For SSE1, we have to reuse the V4SF code. */
20702 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20703 gen_lowpart (SFmode
, val
), elt
);
20708 use_vec_merge
= TARGET_SSE2
;
20711 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20722 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20723 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20724 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20728 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20730 emit_move_insn (mem
, target
);
20732 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20733 emit_move_insn (tmp
, val
);
20735 emit_move_insn (target
, mem
);
20740 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20742 enum machine_mode mode
= GET_MODE (vec
);
20743 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20744 bool use_vec_extr
= false;
20757 use_vec_extr
= true;
20769 tmp
= gen_reg_rtx (mode
);
20770 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20771 GEN_INT (elt
), GEN_INT (elt
),
20772 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20776 tmp
= gen_reg_rtx (mode
);
20777 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20781 gcc_unreachable ();
20784 use_vec_extr
= true;
20799 tmp
= gen_reg_rtx (mode
);
20800 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20801 GEN_INT (elt
), GEN_INT (elt
),
20802 GEN_INT (elt
), GEN_INT (elt
)));
20806 tmp
= gen_reg_rtx (mode
);
20807 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20811 gcc_unreachable ();
20814 use_vec_extr
= true;
20819 /* For SSE1, we have to reuse the V4SF code. */
20820 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20821 gen_lowpart (V4SFmode
, vec
), elt
);
20827 use_vec_extr
= TARGET_SSE2
;
20830 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20835 /* ??? Could extract the appropriate HImode element and shift. */
20842 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20843 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20845 /* Let the rtl optimizers know about the zero extension performed. */
20846 if (inner_mode
== HImode
)
20848 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20849 target
= gen_lowpart (SImode
, target
);
20852 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20856 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20858 emit_move_insn (mem
, vec
);
20860 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20861 emit_move_insn (target
, tmp
);
20865 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20866 pattern to reduce; DEST is the destination; IN is the input vector. */
20869 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20871 rtx tmp1
, tmp2
, tmp3
;
20873 tmp1
= gen_reg_rtx (V4SFmode
);
20874 tmp2
= gen_reg_rtx (V4SFmode
);
20875 tmp3
= gen_reg_rtx (V4SFmode
);
20877 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20878 emit_insn (fn (tmp2
, tmp1
, in
));
20880 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20881 GEN_INT (1), GEN_INT (1),
20882 GEN_INT (1+4), GEN_INT (1+4)));
20883 emit_insn (fn (dest
, tmp2
, tmp3
));
20886 /* Target hook for scalar_mode_supported_p. */
20888 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20890 if (DECIMAL_FLOAT_MODE_P (mode
))
20893 return default_scalar_mode_supported_p (mode
);
20896 /* Implements target hook vector_mode_supported_p. */
20898 ix86_vector_mode_supported_p (enum machine_mode mode
)
20900 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20902 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20904 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20906 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20911 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20913 We do this in the new i386 backend to maintain source compatibility
20914 with the old cc0-based compiler. */
20917 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20918 tree inputs ATTRIBUTE_UNUSED
,
20921 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20923 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20928 /* Return true if this goes in small data/bss. */
20931 ix86_in_large_data_p (tree exp
)
20933 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20936 /* Functions are never large data. */
20937 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20940 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20942 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20943 if (strcmp (section
, ".ldata") == 0
20944 || strcmp (section
, ".lbss") == 0)
20950 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20952 /* If this is an incomplete type with size 0, then we can't put it
20953 in data because it might be too big when completed. */
20954 if (!size
|| size
> ix86_section_threshold
)
20961 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20963 default_encode_section_info (decl
, rtl
, first
);
20965 if (TREE_CODE (decl
) == VAR_DECL
20966 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20967 && ix86_in_large_data_p (decl
))
20968 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20971 /* Worker function for REVERSE_CONDITION. */
20974 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20976 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20977 ? reverse_condition (code
)
20978 : reverse_condition_maybe_unordered (code
));
20981 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20985 output_387_reg_move (rtx insn
, rtx
*operands
)
20987 if (REG_P (operands
[1])
20988 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20990 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20991 return output_387_ffreep (operands
, 0);
20992 return "fstp\t%y0";
20994 if (STACK_TOP_P (operands
[0]))
20995 return "fld%z1\t%y1";
20999 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21000 FP status register is set. */
21003 ix86_emit_fp_unordered_jump (rtx label
)
21005 rtx reg
= gen_reg_rtx (HImode
);
21008 emit_insn (gen_x86_fnstsw_1 (reg
));
21010 if (TARGET_USE_SAHF
)
21012 emit_insn (gen_x86_sahf_1 (reg
));
21014 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21015 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
21019 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
21021 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21022 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
21025 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
21026 gen_rtx_LABEL_REF (VOIDmode
, label
),
21028 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
21029 emit_jump_insn (temp
);
21032 /* Output code to perform a log1p XFmode calculation. */
21034 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
21036 rtx label1
= gen_label_rtx ();
21037 rtx label2
= gen_label_rtx ();
21039 rtx tmp
= gen_reg_rtx (XFmode
);
21040 rtx tmp2
= gen_reg_rtx (XFmode
);
21042 emit_insn (gen_absxf2 (tmp
, op1
));
21043 emit_insn (gen_cmpxf (tmp
,
21044 CONST_DOUBLE_FROM_REAL_VALUE (
21045 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
21047 emit_jump_insn (gen_bge (label1
));
21049 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21050 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
21051 emit_jump (label2
);
21053 emit_label (label1
);
21054 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
21055 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
21056 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21057 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
21059 emit_label (label2
);
21062 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21065 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21068 /* With Binutils 2.15, the "@unwind" marker must be specified on
21069 every occurrence of the ".eh_frame" section, not just the first
21072 && strcmp (name
, ".eh_frame") == 0)
21074 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21075 flags
& SECTION_WRITE
? "aw" : "a");
21078 default_elf_asm_named_section (name
, flags
, decl
);
21081 /* Return the mangling of TYPE if it is an extended fundamental type. */
21083 static const char *
21084 ix86_mangle_fundamental_type (tree type
)
21086 switch (TYPE_MODE (type
))
21089 /* __float128 is "g". */
21092 /* "long double" or __float80 is "e". */
21099 /* For 32-bit code we can save PIC register setup by using
21100 __stack_chk_fail_local hidden function instead of calling
21101 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21102 register, so it is better to call __stack_chk_fail directly. */
21105 ix86_stack_protect_fail (void)
21107 return TARGET_64BIT
21108 ? default_external_stack_protect_fail ()
21109 : default_hidden_stack_protect_fail ();
21112 /* Select a format to encode pointers in exception handling data. CODE
21113 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21114 true if the symbol may be affected by dynamic relocations.
21116 ??? All x86 object file formats are capable of representing this.
21117 After all, the relocation needed is the same as for the call insn.
21118 Whether or not a particular assembler allows us to enter such, I
21119 guess we'll have to see. */
21121 asm_preferred_eh_data_format (int code
, int global
)
21125 int type
= DW_EH_PE_sdata8
;
21127 || ix86_cmodel
== CM_SMALL_PIC
21128 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21129 type
= DW_EH_PE_sdata4
;
21130 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21132 if (ix86_cmodel
== CM_SMALL
21133 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21134 return DW_EH_PE_udata4
;
21135 return DW_EH_PE_absptr
;
21138 /* Expand copysign from SIGN to the positive value ABS_VALUE
21139 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21142 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21144 enum machine_mode mode
= GET_MODE (sign
);
21145 rtx sgn
= gen_reg_rtx (mode
);
21146 if (mask
== NULL_RTX
)
21148 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21149 if (!VECTOR_MODE_P (mode
))
21151 /* We need to generate a scalar mode mask in this case. */
21152 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21153 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21154 mask
= gen_reg_rtx (mode
);
21155 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21159 mask
= gen_rtx_NOT (mode
, mask
);
21160 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21161 gen_rtx_AND (mode
, mask
, sign
)));
21162 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21163 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21166 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21167 mask for masking out the sign-bit is stored in *SMASK, if that is
21170 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21172 enum machine_mode mode
= GET_MODE (op0
);
21175 xa
= gen_reg_rtx (mode
);
21176 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21177 if (!VECTOR_MODE_P (mode
))
21179 /* We need to generate a scalar mode mask in this case. */
21180 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21181 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21182 mask
= gen_reg_rtx (mode
);
21183 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21185 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21186 gen_rtx_AND (mode
, op0
, mask
)));
21194 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21195 swapping the operands if SWAP_OPERANDS is true. The expanded
21196 code is a forward jump to a newly created label in case the
21197 comparison is true. The generated label rtx is returned. */
21199 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21200 bool swap_operands
)
21211 label
= gen_label_rtx ();
21212 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21213 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21214 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21215 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21216 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21217 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21218 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21219 JUMP_LABEL (tmp
) = label
;
21224 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21225 using comparison code CODE. Operands are swapped for the comparison if
21226 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21228 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21229 bool swap_operands
)
21231 enum machine_mode mode
= GET_MODE (op0
);
21232 rtx mask
= gen_reg_rtx (mode
);
21241 if (mode
== DFmode
)
21242 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21243 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21245 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21246 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21251 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21252 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21254 ix86_gen_TWO52 (enum machine_mode mode
)
21256 REAL_VALUE_TYPE TWO52r
;
21259 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21260 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21261 TWO52
= force_reg (mode
, TWO52
);
21266 /* Expand SSE sequence for computing lround from OP1 storing
21269 ix86_expand_lround (rtx op0
, rtx op1
)
21271 /* C code for the stuff we're doing below:
21272 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21275 enum machine_mode mode
= GET_MODE (op1
);
21276 const struct real_format
*fmt
;
21277 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21280 /* load nextafter (0.5, 0.0) */
21281 fmt
= REAL_MODE_FORMAT (mode
);
21282 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21283 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21285 /* adj = copysign (0.5, op1) */
21286 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21287 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21289 /* adj = op1 + adj */
21290 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21292 /* op0 = (imode)adj */
21293 expand_fix (op0
, adj
, 0);
21296 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21299 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21301 /* C code for the stuff we're doing below (for do_floor):
21303 xi -= (double)xi > op1 ? 1 : 0;
21306 enum machine_mode fmode
= GET_MODE (op1
);
21307 enum machine_mode imode
= GET_MODE (op0
);
21308 rtx ireg
, freg
, label
, tmp
;
21310 /* reg = (long)op1 */
21311 ireg
= gen_reg_rtx (imode
);
21312 expand_fix (ireg
, op1
, 0);
21314 /* freg = (double)reg */
21315 freg
= gen_reg_rtx (fmode
);
21316 expand_float (freg
, ireg
, 0);
21318 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21319 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21320 freg
, op1
, !do_floor
);
21321 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21322 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21323 emit_move_insn (ireg
, tmp
);
21325 emit_label (label
);
21326 LABEL_NUSES (label
) = 1;
21328 emit_move_insn (op0
, ireg
);
21331 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21332 result in OPERAND0. */
21334 ix86_expand_rint (rtx operand0
, rtx operand1
)
21336 /* C code for the stuff we're doing below:
21337 xa = fabs (operand1);
21338 if (!isless (xa, 2**52))
21340 xa = xa + 2**52 - 2**52;
21341 return copysign (xa, operand1);
21343 enum machine_mode mode
= GET_MODE (operand0
);
21344 rtx res
, xa
, label
, TWO52
, mask
;
21346 res
= gen_reg_rtx (mode
);
21347 emit_move_insn (res
, operand1
);
21349 /* xa = abs (operand1) */
21350 xa
= ix86_expand_sse_fabs (res
, &mask
);
21352 /* if (!isless (xa, TWO52)) goto label; */
21353 TWO52
= ix86_gen_TWO52 (mode
);
21354 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21356 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21357 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21359 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21361 emit_label (label
);
21362 LABEL_NUSES (label
) = 1;
21364 emit_move_insn (operand0
, res
);
21367 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21370 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21372 /* C code for the stuff we expand below.
21373 double xa = fabs (x), x2;
21374 if (!isless (xa, TWO52))
21376 xa = xa + TWO52 - TWO52;
21377 x2 = copysign (xa, x);
21386 enum machine_mode mode
= GET_MODE (operand0
);
21387 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21389 TWO52
= ix86_gen_TWO52 (mode
);
21391 /* Temporary for holding the result, initialized to the input
21392 operand to ease control flow. */
21393 res
= gen_reg_rtx (mode
);
21394 emit_move_insn (res
, operand1
);
21396 /* xa = abs (operand1) */
21397 xa
= ix86_expand_sse_fabs (res
, &mask
);
21399 /* if (!isless (xa, TWO52)) goto label; */
21400 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21402 /* xa = xa + TWO52 - TWO52; */
21403 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21404 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21406 /* xa = copysign (xa, operand1) */
21407 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21409 /* generate 1.0 or -1.0 */
21410 one
= force_reg (mode
,
21411 const_double_from_real_value (do_floor
21412 ? dconst1
: dconstm1
, mode
));
21414 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21415 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21416 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21417 gen_rtx_AND (mode
, one
, tmp
)));
21418 /* We always need to subtract here to preserve signed zero. */
21419 tmp
= expand_simple_binop (mode
, MINUS
,
21420 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21421 emit_move_insn (res
, tmp
);
21423 emit_label (label
);
21424 LABEL_NUSES (label
) = 1;
21426 emit_move_insn (operand0
, res
);
21429 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21432 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21434 /* C code for the stuff we expand below.
21435 double xa = fabs (x), x2;
21436 if (!isless (xa, TWO52))
21438 x2 = (double)(long)x;
21445 if (HONOR_SIGNED_ZEROS (mode))
21446 return copysign (x2, x);
21449 enum machine_mode mode
= GET_MODE (operand0
);
21450 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21452 TWO52
= ix86_gen_TWO52 (mode
);
21454 /* Temporary for holding the result, initialized to the input
21455 operand to ease control flow. */
21456 res
= gen_reg_rtx (mode
);
21457 emit_move_insn (res
, operand1
);
21459 /* xa = abs (operand1) */
21460 xa
= ix86_expand_sse_fabs (res
, &mask
);
21462 /* if (!isless (xa, TWO52)) goto label; */
21463 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21465 /* xa = (double)(long)x */
21466 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21467 expand_fix (xi
, res
, 0);
21468 expand_float (xa
, xi
, 0);
21471 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21473 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21474 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21475 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21476 gen_rtx_AND (mode
, one
, tmp
)));
21477 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21478 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21479 emit_move_insn (res
, tmp
);
21481 if (HONOR_SIGNED_ZEROS (mode
))
21482 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21484 emit_label (label
);
21485 LABEL_NUSES (label
) = 1;
21487 emit_move_insn (operand0
, res
);
21490 /* Expand SSE sequence for computing round from OPERAND1 storing
21491 into OPERAND0. Sequence that works without relying on DImode truncation
21492 via cvttsd2siq that is only available on 64bit targets. */
21494 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21496 /* C code for the stuff we expand below.
21497 double xa = fabs (x), xa2, x2;
21498 if (!isless (xa, TWO52))
21500 Using the absolute value and copying back sign makes
21501 -0.0 -> -0.0 correct.
21502 xa2 = xa + TWO52 - TWO52;
21507 else if (dxa > 0.5)
21509 x2 = copysign (xa2, x);
21512 enum machine_mode mode
= GET_MODE (operand0
);
21513 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21515 TWO52
= ix86_gen_TWO52 (mode
);
21517 /* Temporary for holding the result, initialized to the input
21518 operand to ease control flow. */
21519 res
= gen_reg_rtx (mode
);
21520 emit_move_insn (res
, operand1
);
21522 /* xa = abs (operand1) */
21523 xa
= ix86_expand_sse_fabs (res
, &mask
);
21525 /* if (!isless (xa, TWO52)) goto label; */
21526 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21528 /* xa2 = xa + TWO52 - TWO52; */
21529 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21530 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21532 /* dxa = xa2 - xa; */
21533 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21535 /* generate 0.5, 1.0 and -0.5 */
21536 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21537 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21538 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21542 tmp
= gen_reg_rtx (mode
);
21543 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21544 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21545 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21546 gen_rtx_AND (mode
, one
, tmp
)));
21547 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21548 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21549 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21550 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21551 gen_rtx_AND (mode
, one
, tmp
)));
21552 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21554 /* res = copysign (xa2, operand1) */
21555 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21557 emit_label (label
);
21558 LABEL_NUSES (label
) = 1;
21560 emit_move_insn (operand0
, res
);
21563 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21566 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21568 /* C code for SSE variant we expand below.
21569 double xa = fabs (x), x2;
21570 if (!isless (xa, TWO52))
21572 x2 = (double)(long)x;
21573 if (HONOR_SIGNED_ZEROS (mode))
21574 return copysign (x2, x);
21577 enum machine_mode mode
= GET_MODE (operand0
);
21578 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21580 TWO52
= ix86_gen_TWO52 (mode
);
21582 /* Temporary for holding the result, initialized to the input
21583 operand to ease control flow. */
21584 res
= gen_reg_rtx (mode
);
21585 emit_move_insn (res
, operand1
);
21587 /* xa = abs (operand1) */
21588 xa
= ix86_expand_sse_fabs (res
, &mask
);
21590 /* if (!isless (xa, TWO52)) goto label; */
21591 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21593 /* x = (double)(long)x */
21594 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21595 expand_fix (xi
, res
, 0);
21596 expand_float (res
, xi
, 0);
21598 if (HONOR_SIGNED_ZEROS (mode
))
21599 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21601 emit_label (label
);
21602 LABEL_NUSES (label
) = 1;
21604 emit_move_insn (operand0
, res
);
21607 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21610 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21612 enum machine_mode mode
= GET_MODE (operand0
);
21613 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21615 /* C code for SSE variant we expand below.
21616 double xa = fabs (x), x2;
21617 if (!isless (xa, TWO52))
21619 xa2 = xa + TWO52 - TWO52;
21623 x2 = copysign (xa2, x);
21627 TWO52
= ix86_gen_TWO52 (mode
);
21629 /* Temporary for holding the result, initialized to the input
21630 operand to ease control flow. */
21631 res
= gen_reg_rtx (mode
);
21632 emit_move_insn (res
, operand1
);
21634 /* xa = abs (operand1) */
21635 xa
= ix86_expand_sse_fabs (res
, &smask
);
21637 /* if (!isless (xa, TWO52)) goto label; */
21638 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21640 /* res = xa + TWO52 - TWO52; */
21641 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21642 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21643 emit_move_insn (res
, tmp
);
21646 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21648 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21649 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21650 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21651 gen_rtx_AND (mode
, mask
, one
)));
21652 tmp
= expand_simple_binop (mode
, MINUS
,
21653 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21654 emit_move_insn (res
, tmp
);
21656 /* res = copysign (res, operand1) */
21657 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21659 emit_label (label
);
21660 LABEL_NUSES (label
) = 1;
21662 emit_move_insn (operand0
, res
);
21665 /* Expand SSE sequence for computing round from OPERAND1 storing
21668 ix86_expand_round (rtx operand0
, rtx operand1
)
21670 /* C code for the stuff we're doing below:
21671 double xa = fabs (x);
21672 if (!isless (xa, TWO52))
21674 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21675 return copysign (xa, x);
21677 enum machine_mode mode
= GET_MODE (operand0
);
21678 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21679 const struct real_format
*fmt
;
21680 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21682 /* Temporary for holding the result, initialized to the input
21683 operand to ease control flow. */
21684 res
= gen_reg_rtx (mode
);
21685 emit_move_insn (res
, operand1
);
21687 TWO52
= ix86_gen_TWO52 (mode
);
21688 xa
= ix86_expand_sse_fabs (res
, &mask
);
21689 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21691 /* load nextafter (0.5, 0.0) */
21692 fmt
= REAL_MODE_FORMAT (mode
);
21693 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21694 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21696 /* xa = xa + 0.5 */
21697 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21698 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21700 /* xa = (double)(int64_t)xa */
21701 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21702 expand_fix (xi
, xa
, 0);
21703 expand_float (xa
, xi
, 0);
21705 /* res = copysign (xa, operand1) */
21706 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21708 emit_label (label
);
21709 LABEL_NUSES (label
) = 1;
21711 emit_move_insn (operand0
, res
);
21714 #include "gt-i386.h"