1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1030 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1043 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1048 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386
| m_486
| m_K6_GEODE
,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1089 | m_GENERIC
/* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT
| m_486
| m_386
),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386
| m_PENT4
| m_NOCONA
,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1117 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1124 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1129 | m_GENERIC
| m_GEODE
),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10
,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO
| m_PENT4
| m_NOCONA
,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10
,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10
,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
1204 path on AMD machines) */
1205 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1207 /* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
1209 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1211 /* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
1215 /* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
1216 one byte longer). */
1219 /* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
1220 operand that cannot be represented using a modRM byte. The XOR
1221 replacement is long decoded, so this split helps here as well). */
1225 /* Feature tests against the various architecture variations. */
1226 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1227 /* X86_ARCH_CMOVE */
1228 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1230 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1233 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1236 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1239 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 static const unsigned int x86_accumulate_outgoing_args
1244 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1246 static const unsigned int x86_arch_always_fancy_math_387
1247 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1248 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1250 static enum stringop_alg stringop_alg
= no_stringop
;
1252 /* In case the average insn count for single function invocation is
1253 lower than this constant, emit fast (but longer) prologue and
1255 #define FAST_PROLOGUE_INSN_COUNT 20
1257 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1258 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1259 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1260 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1262 /* Array of the smallest class containing reg number REGNO, indexed by
1263 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1265 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1267 /* ax, dx, cx, bx */
1268 AREG
, DREG
, CREG
, BREG
,
1269 /* si, di, bp, sp */
1270 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1272 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1273 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1276 /* flags, fpsr, fpcr, frame */
1277 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1278 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1280 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1282 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1283 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1284 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1288 /* The "default" register map used in 32bit mode. */
1290 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1292 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1293 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1294 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1295 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1296 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1297 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1298 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1301 static int const x86_64_int_parameter_registers
[6] =
1303 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1304 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1307 static int const x86_64_int_return_registers
[4] =
1309 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1312 /* The "default" register map used in 64bit mode. */
1313 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1315 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1316 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1317 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1319 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1320 8,9,10,11,12,13,14,15, /* extended integer registers */
1321 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1324 /* Define the register numbers to be used in Dwarf debugging information.
1325 The SVR4 reference port C compiler uses the following register numbers
1326 in its Dwarf output code:
1327 0 for %eax (gcc regno = 0)
1328 1 for %ecx (gcc regno = 2)
1329 2 for %edx (gcc regno = 1)
1330 3 for %ebx (gcc regno = 3)
1331 4 for %esp (gcc regno = 7)
1332 5 for %ebp (gcc regno = 6)
1333 6 for %esi (gcc regno = 4)
1334 7 for %edi (gcc regno = 5)
1335 The following three DWARF register numbers are never generated by
1336 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1337 believes these numbers have these meanings.
1338 8 for %eip (no gcc equivalent)
1339 9 for %eflags (gcc regno = 17)
1340 10 for %trapno (no gcc equivalent)
1341 It is not at all clear how we should number the FP stack registers
1342 for the x86 architecture. If the version of SDB on x86/svr4 were
1343 a bit less brain dead with respect to floating-point then we would
1344 have a precedent to follow with respect to DWARF register numbers
1345 for x86 FP registers, but the SDB on x86/svr4 is so completely
1346 broken with respect to FP registers that it is hardly worth thinking
1347 of it as something to strive for compatibility with.
1348 The version of x86/svr4 SDB I have at the moment does (partially)
1349 seem to believe that DWARF register number 11 is associated with
1350 the x86 register %st(0), but that's about all. Higher DWARF
1351 register numbers don't seem to be associated with anything in
1352 particular, and even for DWARF regno 11, SDB only seems to under-
1353 stand that it should say that a variable lives in %st(0) (when
1354 asked via an `=' command) if we said it was in DWARF regno 11,
1355 but SDB still prints garbage when asked for the value of the
1356 variable in question (via a `/' command).
1357 (Also note that the labels SDB prints for various FP stack regs
1358 when doing an `x' command are all wrong.)
1359 Note that these problems generally don't affect the native SVR4
1360 C compiler because it doesn't allow the use of -O with -g and
1361 because when it is *not* optimizing, it allocates a memory
1362 location for each floating-point variable, and the memory
1363 location is what gets described in the DWARF AT_location
1364 attribute for the variable in question.
1365 Regardless of the severe mental illness of the x86/svr4 SDB, we
1366 do something sensible here and we use the following DWARF
1367 register numbers. Note that these are all stack-top-relative
1369 11 for %st(0) (gcc regno = 8)
1370 12 for %st(1) (gcc regno = 9)
1371 13 for %st(2) (gcc regno = 10)
1372 14 for %st(3) (gcc regno = 11)
1373 15 for %st(4) (gcc regno = 12)
1374 16 for %st(5) (gcc regno = 13)
1375 17 for %st(6) (gcc regno = 14)
1376 18 for %st(7) (gcc regno = 15)
1378 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1380 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1381 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1382 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1383 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1384 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1385 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1389 /* Test and compare insns in i386.md store the information needed to
1390 generate branch and scc insns here. */
1392 rtx ix86_compare_op0
= NULL_RTX
;
1393 rtx ix86_compare_op1
= NULL_RTX
;
1394 rtx ix86_compare_emitted
= NULL_RTX
;
1396 /* Size of the register save area. */
1397 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1399 /* Define the structure for the machine field in struct function. */
1401 struct stack_local_entry
GTY(())
1403 unsigned short mode
;
1406 struct stack_local_entry
*next
;
1409 /* Structure describing stack frame layout.
1410 Stack grows downward:
1416 saved frame pointer if frame_pointer_needed
1417 <- HARD_FRAME_POINTER
1422 [va_arg registers] (
1423 > to_allocate <- FRAME_POINTER
1433 HOST_WIDE_INT frame
;
1435 int outgoing_arguments_size
;
1438 HOST_WIDE_INT to_allocate
;
1439 /* The offsets relative to ARG_POINTER. */
1440 HOST_WIDE_INT frame_pointer_offset
;
1441 HOST_WIDE_INT hard_frame_pointer_offset
;
1442 HOST_WIDE_INT stack_pointer_offset
;
1444 /* When save_regs_using_mov is set, emit prologue using
1445 move instead of push instructions. */
1446 bool save_regs_using_mov
;
1449 /* Code model option. */
1450 enum cmodel ix86_cmodel
;
1452 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1454 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1456 /* Which unit we are generating floating point math for. */
1457 enum fpmath_unit ix86_fpmath
;
1459 /* Which cpu are we scheduling for. */
1460 enum processor_type ix86_tune
;
1462 /* Which instruction set architecture to use. */
1463 enum processor_type ix86_arch
;
1465 /* true if sse prefetch instruction is not NOOP. */
1466 int x86_prefetch_sse
;
1468 /* ix86_regparm_string as a number */
1469 static int ix86_regparm
;
1471 /* -mstackrealign option */
1472 extern int ix86_force_align_arg_pointer
;
1473 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1475 /* Preferred alignment for stack boundary in bits. */
1476 unsigned int ix86_preferred_stack_boundary
;
1478 /* Values 1-5: see jump.c */
1479 int ix86_branch_cost
;
1481 /* Variables which are this size or smaller are put in the data/bss
1482 or ldata/lbss sections. */
1484 int ix86_section_threshold
= 65536;
1486 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1487 char internal_label_prefix
[16];
1488 int internal_label_prefix_len
;
1490 static bool ix86_handle_option (size_t, const char *, int);
1491 static void output_pic_addr_const (FILE *, rtx
, int);
1492 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1494 static const char *get_some_local_dynamic_name (void);
1495 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1496 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1497 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1499 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1500 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1502 static rtx
get_thread_pointer (int);
1503 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1504 static void get_pc_thunk_name (char [32], unsigned int);
1505 static rtx
gen_push (rtx
);
1506 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1507 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1508 static struct machine_function
* ix86_init_machine_status (void);
1509 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1510 static int ix86_nsaved_regs (void);
1511 static void ix86_emit_save_regs (void);
1512 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1513 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1514 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1515 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1516 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1517 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1518 static int ix86_issue_rate (void);
1519 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1520 static int ia32_multipass_dfa_lookahead (void);
1521 static void ix86_init_mmx_sse_builtins (void);
1522 static rtx
x86_this_parameter (tree
);
1523 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1524 HOST_WIDE_INT
, tree
);
1525 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1526 static void x86_file_start (void);
1527 static void ix86_reorg (void);
1528 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1529 static tree
ix86_build_builtin_va_list (void);
1530 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1532 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1533 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1534 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1536 static int ix86_address_cost (rtx
);
1537 static bool ix86_cannot_force_const_mem (rtx
);
1538 static rtx
ix86_delegitimize_address (rtx
);
1540 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1542 struct builtin_description
;
1543 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1545 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1547 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1548 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1549 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1550 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1551 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1552 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1553 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1554 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1555 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1556 static int ix86_fp_comparison_cost (enum rtx_code code
);
1557 static unsigned int ix86_select_alt_pic_regnum (void);
1558 static int ix86_save_reg (unsigned int, int);
1559 static void ix86_compute_frame_layout (struct ix86_frame
*);
1560 static int ix86_comp_type_attributes (tree
, tree
);
1561 static int ix86_function_regparm (tree
, tree
);
1562 const struct attribute_spec ix86_attribute_table
[];
1563 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1564 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1565 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1566 static bool contains_128bit_aligned_vector_p (tree
);
1567 static rtx
ix86_struct_value_rtx (tree
, int);
1568 static bool ix86_ms_bitfield_layout_p (tree
);
1569 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1570 static int extended_reg_mentioned_1 (rtx
*, void *);
1571 static bool ix86_rtx_costs (rtx
, int, int, int *);
1572 static int min_insn_size (rtx
);
1573 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1574 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1575 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1577 static void ix86_init_builtins (void);
1578 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1579 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1580 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1581 static const char *ix86_mangle_fundamental_type (tree
);
1582 static tree
ix86_stack_protect_fail (void);
1583 static rtx
ix86_internal_arg_pointer (void);
1584 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1585 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1588 /* This function is only used on Solaris. */
1589 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1592 /* Register class used for passing given 64bit part of the argument.
1593 These represent classes as documented by the PS ABI, with the exception
1594 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1595 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1597 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1598 whenever possible (upper half does contain padding).
1600 enum x86_64_reg_class
1603 X86_64_INTEGER_CLASS
,
1604 X86_64_INTEGERSI_CLASS
,
1611 X86_64_COMPLEX_X87_CLASS
,
1614 static const char * const x86_64_reg_class_name
[] = {
1615 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1616 "sseup", "x87", "x87up", "cplx87", "no"
1619 #define MAX_CLASSES 4
1621 /* Table of constants used by fldpi, fldln2, etc.... */
1622 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1623 static bool ext_80387_constants_init
= 0;
1624 static void init_ext_80387_constants (void);
1625 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1626 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1627 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1628 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1629 unsigned HOST_WIDE_INT align
)
1632 /* Initialize the GCC target structure. */
1633 #undef TARGET_ATTRIBUTE_TABLE
1634 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1635 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1636 # undef TARGET_MERGE_DECL_ATTRIBUTES
1637 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1640 #undef TARGET_COMP_TYPE_ATTRIBUTES
1641 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1643 #undef TARGET_INIT_BUILTINS
1644 #define TARGET_INIT_BUILTINS ix86_init_builtins
1645 #undef TARGET_EXPAND_BUILTIN
1646 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1648 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1649 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1650 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1651 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1653 #undef TARGET_ASM_FUNCTION_EPILOGUE
1654 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1656 #undef TARGET_ENCODE_SECTION_INFO
1657 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1658 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1660 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1663 #undef TARGET_ASM_OPEN_PAREN
1664 #define TARGET_ASM_OPEN_PAREN ""
1665 #undef TARGET_ASM_CLOSE_PAREN
1666 #define TARGET_ASM_CLOSE_PAREN ""
1668 #undef TARGET_ASM_ALIGNED_HI_OP
1669 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1670 #undef TARGET_ASM_ALIGNED_SI_OP
1671 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1673 #undef TARGET_ASM_ALIGNED_DI_OP
1674 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1677 #undef TARGET_ASM_UNALIGNED_HI_OP
1678 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1679 #undef TARGET_ASM_UNALIGNED_SI_OP
1680 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1681 #undef TARGET_ASM_UNALIGNED_DI_OP
1682 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1684 #undef TARGET_SCHED_ADJUST_COST
1685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1686 #undef TARGET_SCHED_ISSUE_RATE
1687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1690 ia32_multipass_dfa_lookahead
1692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1696 #undef TARGET_HAVE_TLS
1697 #define TARGET_HAVE_TLS true
1699 #undef TARGET_CANNOT_FORCE_CONST_MEM
1700 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1701 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1702 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1704 #undef TARGET_DELEGITIMIZE_ADDRESS
1705 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1707 #undef TARGET_MS_BITFIELD_LAYOUT_P
1708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1711 #undef TARGET_BINDS_LOCAL_P
1712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1715 #undef TARGET_ASM_OUTPUT_MI_THUNK
1716 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1717 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1718 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1720 #undef TARGET_ASM_FILE_START
1721 #define TARGET_ASM_FILE_START x86_file_start
1723 #undef TARGET_DEFAULT_TARGET_FLAGS
1724 #define TARGET_DEFAULT_TARGET_FLAGS \
1726 | TARGET_64BIT_DEFAULT \
1727 | TARGET_SUBTARGET_DEFAULT \
1728 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1730 #undef TARGET_HANDLE_OPTION
1731 #define TARGET_HANDLE_OPTION ix86_handle_option
1733 #undef TARGET_RTX_COSTS
1734 #define TARGET_RTX_COSTS ix86_rtx_costs
1735 #undef TARGET_ADDRESS_COST
1736 #define TARGET_ADDRESS_COST ix86_address_cost
1738 #undef TARGET_FIXED_CONDITION_CODE_REGS
1739 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1740 #undef TARGET_CC_MODES_COMPATIBLE
1741 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1743 #undef TARGET_MACHINE_DEPENDENT_REORG
1744 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1746 #undef TARGET_BUILD_BUILTIN_VA_LIST
1747 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1749 #undef TARGET_MD_ASM_CLOBBERS
1750 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1752 #undef TARGET_PROMOTE_PROTOTYPES
1753 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1754 #undef TARGET_STRUCT_VALUE_RTX
1755 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1756 #undef TARGET_SETUP_INCOMING_VARARGS
1757 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1758 #undef TARGET_MUST_PASS_IN_STACK
1759 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1760 #undef TARGET_PASS_BY_REFERENCE
1761 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1762 #undef TARGET_INTERNAL_ARG_POINTER
1763 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1764 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1765 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1767 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1768 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1770 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1771 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1774 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1781 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1782 #undef TARGET_INSERT_ATTRIBUTES
1783 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1786 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1787 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1789 #undef TARGET_STACK_PROTECT_FAIL
1790 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1792 #undef TARGET_FUNCTION_VALUE
1793 #define TARGET_FUNCTION_VALUE ix86_function_value
1795 struct gcc_target targetm
= TARGET_INITIALIZER
;
1798 /* The svr4 ABI for the i386 says that records and unions are returned
1800 #ifndef DEFAULT_PCC_STRUCT_RETURN
1801 #define DEFAULT_PCC_STRUCT_RETURN 1
1804 /* Implement TARGET_HANDLE_OPTION. */
1807 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1814 target_flags
&= ~MASK_3DNOW_A
;
1815 target_flags_explicit
|= MASK_3DNOW_A
;
1822 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1823 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1830 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1831 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1838 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1839 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1846 target_flags
&= ~MASK_SSE4A
;
1847 target_flags_explicit
|= MASK_SSE4A
;
1856 /* Sometimes certain combinations of command options do not make
1857 sense on a particular target machine. You can define a macro
1858 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1859 defined, is executed once just after all the command options have
1862 Don't use this macro to turn on various extra optimizations for
1863 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1866 override_options (void)
1869 int ix86_tune_defaulted
= 0;
1870 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1872 /* Comes from final.c -- no real reason to change it. */
1873 #define MAX_CODE_ALIGN 16
1877 const struct processor_costs
*cost
; /* Processor costs */
1878 const int target_enable
; /* Target flags to enable. */
1879 const int target_disable
; /* Target flags to disable. */
1880 const int align_loop
; /* Default alignments. */
1881 const int align_loop_max_skip
;
1882 const int align_jump
;
1883 const int align_jump_max_skip
;
1884 const int align_func
;
1886 const processor_target_table
[PROCESSOR_max
] =
1888 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1889 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1890 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1891 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1892 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1893 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1894 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1895 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1896 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1897 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1898 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1899 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1900 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1901 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1904 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1907 const char *const name
; /* processor name or nickname. */
1908 const enum processor_type processor
;
1909 const enum pta_flags
1915 PTA_PREFETCH_SSE
= 1 << 4,
1917 PTA_3DNOW_A
= 1 << 6,
1921 PTA_POPCNT
= 1 << 10,
1923 PTA_SSE4A
= 1 << 12,
1924 PTA_NO_SAHF
= 1 << 13
1927 const processor_alias_table
[] =
1929 {"i386", PROCESSOR_I386
, 0},
1930 {"i486", PROCESSOR_I486
, 0},
1931 {"i586", PROCESSOR_PENTIUM
, 0},
1932 {"pentium", PROCESSOR_PENTIUM
, 0},
1933 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1934 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1935 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1936 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1937 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1938 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1939 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1940 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1941 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1942 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1943 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1944 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1945 | PTA_MMX
| PTA_PREFETCH_SSE
},
1946 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1947 | PTA_MMX
| PTA_PREFETCH_SSE
},
1948 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1949 | PTA_MMX
| PTA_PREFETCH_SSE
},
1950 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1951 | PTA_MMX
| PTA_PREFETCH_SSE
1952 | PTA_CX16
| PTA_NO_SAHF
},
1953 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1954 | PTA_64BIT
| PTA_MMX
1955 | PTA_PREFETCH_SSE
| PTA_CX16
},
1956 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1958 {"k6", PROCESSOR_K6
, PTA_MMX
},
1959 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1960 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1961 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1963 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1964 | PTA_3DNOW
| PTA_3DNOW_A
},
1965 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1966 | PTA_3DNOW_A
| PTA_SSE
},
1967 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1968 | PTA_3DNOW_A
| PTA_SSE
},
1969 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1970 | PTA_3DNOW_A
| PTA_SSE
},
1971 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1972 | PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
1973 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1974 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1975 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1976 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1977 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1978 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1979 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1980 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1981 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1982 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1983 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1984 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1985 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1986 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1989 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1991 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1992 SUBTARGET_OVERRIDE_OPTIONS
;
1995 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1996 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1999 /* -fPIC is the default for x86_64. */
2000 if (TARGET_MACHO
&& TARGET_64BIT
)
2003 /* Set the default values for switches whose default depends on TARGET_64BIT
2004 in case they weren't overwritten by command line options. */
2007 /* Mach-O doesn't support omitting the frame pointer for now. */
2008 if (flag_omit_frame_pointer
== 2)
2009 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2010 if (flag_asynchronous_unwind_tables
== 2)
2011 flag_asynchronous_unwind_tables
= 1;
2012 if (flag_pcc_struct_return
== 2)
2013 flag_pcc_struct_return
= 0;
2017 if (flag_omit_frame_pointer
== 2)
2018 flag_omit_frame_pointer
= 0;
2019 if (flag_asynchronous_unwind_tables
== 2)
2020 flag_asynchronous_unwind_tables
= 0;
2021 if (flag_pcc_struct_return
== 2)
2022 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2025 /* Need to check -mtune=generic first. */
2026 if (ix86_tune_string
)
2028 if (!strcmp (ix86_tune_string
, "generic")
2029 || !strcmp (ix86_tune_string
, "i686")
2030 /* As special support for cross compilers we read -mtune=native
2031 as -mtune=generic. With native compilers we won't see the
2032 -mtune=native, as it was changed by the driver. */
2033 || !strcmp (ix86_tune_string
, "native"))
2036 ix86_tune_string
= "generic64";
2038 ix86_tune_string
= "generic32";
2040 else if (!strncmp (ix86_tune_string
, "generic", 7))
2041 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2045 if (ix86_arch_string
)
2046 ix86_tune_string
= ix86_arch_string
;
2047 if (!ix86_tune_string
)
2049 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2050 ix86_tune_defaulted
= 1;
2053 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2054 need to use a sensible tune option. */
2055 if (!strcmp (ix86_tune_string
, "generic")
2056 || !strcmp (ix86_tune_string
, "x86-64")
2057 || !strcmp (ix86_tune_string
, "i686"))
2060 ix86_tune_string
= "generic64";
2062 ix86_tune_string
= "generic32";
2065 if (ix86_stringop_string
)
2067 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2068 stringop_alg
= rep_prefix_1_byte
;
2069 else if (!strcmp (ix86_stringop_string
, "libcall"))
2070 stringop_alg
= libcall
;
2071 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2072 stringop_alg
= rep_prefix_4_byte
;
2073 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2074 stringop_alg
= rep_prefix_8_byte
;
2075 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2076 stringop_alg
= loop_1_byte
;
2077 else if (!strcmp (ix86_stringop_string
, "loop"))
2078 stringop_alg
= loop
;
2079 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2080 stringop_alg
= unrolled_loop
;
2082 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2084 if (!strcmp (ix86_tune_string
, "x86-64"))
2085 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2086 "-mtune=generic instead as appropriate.");
2088 if (!ix86_arch_string
)
2089 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2090 if (!strcmp (ix86_arch_string
, "generic"))
2091 error ("generic CPU can be used only for -mtune= switch");
2092 if (!strncmp (ix86_arch_string
, "generic", 7))
2093 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2095 if (ix86_cmodel_string
!= 0)
2097 if (!strcmp (ix86_cmodel_string
, "small"))
2098 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2099 else if (!strcmp (ix86_cmodel_string
, "medium"))
2100 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2101 else if (!strcmp (ix86_cmodel_string
, "large"))
2102 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2104 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2105 else if (!strcmp (ix86_cmodel_string
, "32"))
2106 ix86_cmodel
= CM_32
;
2107 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2108 ix86_cmodel
= CM_KERNEL
;
2110 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2114 ix86_cmodel
= CM_32
;
2116 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2118 if (ix86_asm_string
!= 0)
2121 && !strcmp (ix86_asm_string
, "intel"))
2122 ix86_asm_dialect
= ASM_INTEL
;
2123 else if (!strcmp (ix86_asm_string
, "att"))
2124 ix86_asm_dialect
= ASM_ATT
;
2126 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2128 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2129 error ("code model %qs not supported in the %s bit mode",
2130 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2131 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2132 sorry ("%i-bit mode not compiled in",
2133 (target_flags
& MASK_64BIT
) ? 64 : 32);
2135 for (i
= 0; i
< pta_size
; i
++)
2136 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2138 ix86_arch
= processor_alias_table
[i
].processor
;
2139 /* Default cpu tuning to the architecture. */
2140 ix86_tune
= ix86_arch
;
2141 if (processor_alias_table
[i
].flags
& PTA_MMX
2142 && !(target_flags_explicit
& MASK_MMX
))
2143 target_flags
|= MASK_MMX
;
2144 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2145 && !(target_flags_explicit
& MASK_3DNOW
))
2146 target_flags
|= MASK_3DNOW
;
2147 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2148 && !(target_flags_explicit
& MASK_3DNOW_A
))
2149 target_flags
|= MASK_3DNOW_A
;
2150 if (processor_alias_table
[i
].flags
& PTA_SSE
2151 && !(target_flags_explicit
& MASK_SSE
))
2152 target_flags
|= MASK_SSE
;
2153 if (processor_alias_table
[i
].flags
& PTA_SSE2
2154 && !(target_flags_explicit
& MASK_SSE2
))
2155 target_flags
|= MASK_SSE2
;
2156 if (processor_alias_table
[i
].flags
& PTA_SSE3
2157 && !(target_flags_explicit
& MASK_SSE3
))
2158 target_flags
|= MASK_SSE3
;
2159 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2160 && !(target_flags_explicit
& MASK_SSSE3
))
2161 target_flags
|= MASK_SSSE3
;
2162 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2163 x86_prefetch_sse
= true;
2164 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2165 x86_cmpxchg16b
= true;
2166 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2167 && !(target_flags_explicit
& MASK_POPCNT
))
2168 target_flags
|= MASK_POPCNT
;
2169 if (processor_alias_table
[i
].flags
& PTA_ABM
2170 && !(target_flags_explicit
& MASK_ABM
))
2171 target_flags
|= MASK_ABM
;
2172 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2173 && !(target_flags_explicit
& MASK_SSE4A
))
2174 target_flags
|= MASK_SSE4A
;
2175 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2177 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2178 error ("CPU you selected does not support x86-64 "
2184 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2186 ix86_arch_mask
= 1u << ix86_arch
;
2187 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2188 ix86_arch_features
[i
] &= ix86_arch_mask
;
2190 for (i
= 0; i
< pta_size
; i
++)
2191 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2193 ix86_tune
= processor_alias_table
[i
].processor
;
2194 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2196 if (ix86_tune_defaulted
)
2198 ix86_tune_string
= "x86-64";
2199 for (i
= 0; i
< pta_size
; i
++)
2200 if (! strcmp (ix86_tune_string
,
2201 processor_alias_table
[i
].name
))
2203 ix86_tune
= processor_alias_table
[i
].processor
;
2206 error ("CPU you selected does not support x86-64 "
2209 /* Intel CPUs have always interpreted SSE prefetch instructions as
2210 NOPs; so, we can enable SSE prefetch instructions even when
2211 -mtune (rather than -march) points us to a processor that has them.
2212 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2213 higher processors. */
2214 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2215 x86_prefetch_sse
= true;
2219 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2221 ix86_tune_mask
= 1u << ix86_tune
;
2222 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2223 ix86_tune_features
[i
] &= ix86_tune_mask
;
2226 ix86_cost
= &size_cost
;
2228 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2229 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2230 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2232 /* Arrange to set up i386_stack_locals for all functions. */
2233 init_machine_status
= ix86_init_machine_status
;
2235 /* Validate -mregparm= value. */
2236 if (ix86_regparm_string
)
2238 i
= atoi (ix86_regparm_string
);
2239 if (i
< 0 || i
> REGPARM_MAX
)
2240 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2246 ix86_regparm
= REGPARM_MAX
;
2248 /* If the user has provided any of the -malign-* options,
2249 warn and use that value only if -falign-* is not set.
2250 Remove this code in GCC 3.2 or later. */
2251 if (ix86_align_loops_string
)
2253 warning (0, "-malign-loops is obsolete, use -falign-loops");
2254 if (align_loops
== 0)
2256 i
= atoi (ix86_align_loops_string
);
2257 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2258 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2260 align_loops
= 1 << i
;
2264 if (ix86_align_jumps_string
)
2266 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2267 if (align_jumps
== 0)
2269 i
= atoi (ix86_align_jumps_string
);
2270 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2271 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2273 align_jumps
= 1 << i
;
2277 if (ix86_align_funcs_string
)
2279 warning (0, "-malign-functions is obsolete, use -falign-functions");
2280 if (align_functions
== 0)
2282 i
= atoi (ix86_align_funcs_string
);
2283 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2284 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2286 align_functions
= 1 << i
;
2290 /* Default align_* from the processor table. */
2291 if (align_loops
== 0)
2293 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2294 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2296 if (align_jumps
== 0)
2298 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2299 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2301 if (align_functions
== 0)
2303 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2306 /* Validate -mbranch-cost= value, or provide default. */
2307 ix86_branch_cost
= ix86_cost
->branch_cost
;
2308 if (ix86_branch_cost_string
)
2310 i
= atoi (ix86_branch_cost_string
);
2312 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2314 ix86_branch_cost
= i
;
2316 if (ix86_section_threshold_string
)
2318 i
= atoi (ix86_section_threshold_string
);
2320 error ("-mlarge-data-threshold=%d is negative", i
);
2322 ix86_section_threshold
= i
;
2325 if (ix86_tls_dialect_string
)
2327 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2328 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2329 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2330 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2331 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2332 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2334 error ("bad value (%s) for -mtls-dialect= switch",
2335 ix86_tls_dialect_string
);
2338 /* Keep nonleaf frame pointers. */
2339 if (flag_omit_frame_pointer
)
2340 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2341 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2342 flag_omit_frame_pointer
= 1;
2344 /* If we're doing fast math, we don't care about comparison order
2345 wrt NaNs. This lets us use a shorter comparison sequence. */
2346 if (flag_finite_math_only
)
2347 target_flags
&= ~MASK_IEEE_FP
;
2349 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2350 since the insns won't need emulation. */
2351 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2352 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2354 /* Likewise, if the target doesn't have a 387, or we've specified
2355 software floating point, don't use 387 inline intrinsics. */
2357 target_flags
|= MASK_NO_FANCY_MATH_387
;
2359 /* Turn on SSE3 builtins for -mssse3. */
2361 target_flags
|= MASK_SSE3
;
2363 /* Turn on SSE3 builtins for -msse4a. */
2365 target_flags
|= MASK_SSE3
;
2367 /* Turn on SSE2 builtins for -msse3. */
2369 target_flags
|= MASK_SSE2
;
2371 /* Turn on SSE builtins for -msse2. */
2373 target_flags
|= MASK_SSE
;
2375 /* Turn on MMX builtins for -msse. */
2378 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2379 x86_prefetch_sse
= true;
2382 /* Turn on MMX builtins for 3Dnow. */
2384 target_flags
|= MASK_MMX
;
2386 /* Turn on POPCNT builtins for -mabm. */
2388 target_flags
|= MASK_POPCNT
;
2392 if (TARGET_ALIGN_DOUBLE
)
2393 error ("-malign-double makes no sense in the 64bit mode");
2395 error ("-mrtd calling convention not supported in the 64bit mode");
2397 /* Enable by default the SSE and MMX builtins. Do allow the user to
2398 explicitly disable any of these. In particular, disabling SSE and
2399 MMX for kernel code is extremely useful. */
2401 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2402 & ~target_flags_explicit
);
2406 /* i386 ABI does not specify red zone. It still makes sense to use it
2407 when programmer takes care to stack from being destroyed. */
2408 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2409 target_flags
|= MASK_NO_RED_ZONE
;
2412 /* Validate -mpreferred-stack-boundary= value, or provide default.
2413 The default of 128 bits is for Pentium III's SSE __m128. We can't
2414 change it because of optimize_size. Otherwise, we can't mix object
2415 files compiled with -Os and -On. */
2416 ix86_preferred_stack_boundary
= 128;
2417 if (ix86_preferred_stack_boundary_string
)
2419 i
= atoi (ix86_preferred_stack_boundary_string
);
2420 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2421 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2422 TARGET_64BIT
? 4 : 2);
2424 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2427 /* Accept -msseregparm only if at least SSE support is enabled. */
2428 if (TARGET_SSEREGPARM
2430 error ("-msseregparm used without SSE enabled");
2432 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2433 if (ix86_fpmath_string
!= 0)
2435 if (! strcmp (ix86_fpmath_string
, "387"))
2436 ix86_fpmath
= FPMATH_387
;
2437 else if (! strcmp (ix86_fpmath_string
, "sse"))
2441 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2442 ix86_fpmath
= FPMATH_387
;
2445 ix86_fpmath
= FPMATH_SSE
;
2447 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2448 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2452 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2453 ix86_fpmath
= FPMATH_387
;
2455 else if (!TARGET_80387
)
2457 warning (0, "387 instruction set disabled, using SSE arithmetics");
2458 ix86_fpmath
= FPMATH_SSE
;
2461 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2464 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2467 /* If the i387 is disabled, then do not return values in it. */
2469 target_flags
&= ~MASK_FLOAT_RETURNS
;
2471 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2472 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2474 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2476 /* ??? Unwind info is not correct around the CFG unless either a frame
2477 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2478 unwind info generation to be aware of the CFG and propagating states
2480 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2481 || flag_exceptions
|| flag_non_call_exceptions
)
2482 && flag_omit_frame_pointer
2483 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2485 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2486 warning (0, "unwind tables currently require either a frame pointer "
2487 "or -maccumulate-outgoing-args for correctness");
2488 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2491 /* For sane SSE instruction set generation we need fcomi instruction.
2492 It is safe to enable all CMOVE instructions. */
2496 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2499 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2500 p
= strchr (internal_label_prefix
, 'X');
2501 internal_label_prefix_len
= p
- internal_label_prefix
;
2505 /* When scheduling description is not available, disable scheduler pass
2506 so it won't slow down the compilation and make x87 code slower. */
2507 if (!TARGET_SCHEDULE
)
2508 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2510 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2511 set_param_value ("simultaneous-prefetches",
2512 ix86_cost
->simultaneous_prefetches
);
2513 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2514 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2517 /* switch to the appropriate section for output of DECL.
2518 DECL is either a `VAR_DECL' node or a constant of some sort.
2519 RELOC indicates whether forming the initial value of DECL requires
2520 link-time relocations. */
2523 x86_64_elf_select_section (tree decl
, int reloc
,
2524 unsigned HOST_WIDE_INT align
)
2526 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2527 && ix86_in_large_data_p (decl
))
2529 const char *sname
= NULL
;
2530 unsigned int flags
= SECTION_WRITE
;
2531 switch (categorize_decl_for_section (decl
, reloc
))
2536 case SECCAT_DATA_REL
:
2537 sname
= ".ldata.rel";
2539 case SECCAT_DATA_REL_LOCAL
:
2540 sname
= ".ldata.rel.local";
2542 case SECCAT_DATA_REL_RO
:
2543 sname
= ".ldata.rel.ro";
2545 case SECCAT_DATA_REL_RO_LOCAL
:
2546 sname
= ".ldata.rel.ro.local";
2550 flags
|= SECTION_BSS
;
2553 case SECCAT_RODATA_MERGE_STR
:
2554 case SECCAT_RODATA_MERGE_STR_INIT
:
2555 case SECCAT_RODATA_MERGE_CONST
:
2559 case SECCAT_SRODATA
:
2566 /* We don't split these for medium model. Place them into
2567 default sections and hope for best. */
2572 /* We might get called with string constants, but get_named_section
2573 doesn't like them as they are not DECLs. Also, we need to set
2574 flags in that case. */
2576 return get_section (sname
, flags
, NULL
);
2577 return get_named_section (decl
, sname
, reloc
);
2580 return default_elf_select_section (decl
, reloc
, align
);
2583 /* Build up a unique section name, expressed as a
2584 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2585 RELOC indicates whether the initial value of EXP requires
2586 link-time relocations. */
2589 x86_64_elf_unique_section (tree decl
, int reloc
)
2591 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2592 && ix86_in_large_data_p (decl
))
2594 const char *prefix
= NULL
;
2595 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2596 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2598 switch (categorize_decl_for_section (decl
, reloc
))
2601 case SECCAT_DATA_REL
:
2602 case SECCAT_DATA_REL_LOCAL
:
2603 case SECCAT_DATA_REL_RO
:
2604 case SECCAT_DATA_REL_RO_LOCAL
:
2605 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2608 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2611 case SECCAT_RODATA_MERGE_STR
:
2612 case SECCAT_RODATA_MERGE_STR_INIT
:
2613 case SECCAT_RODATA_MERGE_CONST
:
2614 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2616 case SECCAT_SRODATA
:
2623 /* We don't split these for medium model. Place them into
2624 default sections and hope for best. */
2632 plen
= strlen (prefix
);
2634 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2635 name
= targetm
.strip_name_encoding (name
);
2636 nlen
= strlen (name
);
2638 string
= alloca (nlen
+ plen
+ 1);
2639 memcpy (string
, prefix
, plen
);
2640 memcpy (string
+ plen
, name
, nlen
+ 1);
2642 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2646 default_unique_section (decl
, reloc
);
2649 #ifdef COMMON_ASM_OP
2650 /* This says how to output assembler code to declare an
2651 uninitialized external linkage data object.
2653 For medium model x86-64 we need to use .largecomm opcode for
2656 x86_elf_aligned_common (FILE *file
,
2657 const char *name
, unsigned HOST_WIDE_INT size
,
2660 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2661 && size
> (unsigned int)ix86_section_threshold
)
2662 fprintf (file
, ".largecomm\t");
2664 fprintf (file
, "%s", COMMON_ASM_OP
);
2665 assemble_name (file
, name
);
2666 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2667 size
, align
/ BITS_PER_UNIT
);
2670 /* Utility function for targets to use in implementing
2671 ASM_OUTPUT_ALIGNED_BSS. */
2674 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2675 const char *name
, unsigned HOST_WIDE_INT size
,
2678 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2679 && size
> (unsigned int)ix86_section_threshold
)
2680 switch_to_section (get_named_section (decl
, ".lbss", 0));
2682 switch_to_section (bss_section
);
2683 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2684 #ifdef ASM_DECLARE_OBJECT_NAME
2685 last_assemble_variable_decl
= decl
;
2686 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2688 /* Standard thing is just output label for the object. */
2689 ASM_OUTPUT_LABEL (file
, name
);
2690 #endif /* ASM_DECLARE_OBJECT_NAME */
2691 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2695 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2697 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2698 make the problem with not enough registers even worse. */
2699 #ifdef INSN_SCHEDULING
2701 flag_schedule_insns
= 0;
2705 /* The Darwin libraries never set errno, so we might as well
2706 avoid calling them when that's the only reason we would. */
2707 flag_errno_math
= 0;
2709 /* The default values of these switches depend on the TARGET_64BIT
2710 that is not known at this moment. Mark these values with 2 and
2711 let user the to override these. In case there is no command line option
2712 specifying them, we will set the defaults in override_options. */
2714 flag_omit_frame_pointer
= 2;
2715 flag_pcc_struct_return
= 2;
2716 flag_asynchronous_unwind_tables
= 2;
2717 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2718 SUBTARGET_OPTIMIZATION_OPTIONS
;
2722 /* Table of valid machine attributes. */
2723 const struct attribute_spec ix86_attribute_table
[] =
2725 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2726 /* Stdcall attribute says callee is responsible for popping arguments
2727 if they are not variable. */
2728 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2729 /* Fastcall attribute says callee is responsible for popping arguments
2730 if they are not variable. */
2731 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2732 /* Cdecl attribute says the callee is a normal C declaration */
2733 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2734 /* Regparm attribute specifies how many integer arguments are to be
2735 passed in registers. */
2736 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2737 /* Sseregparm attribute says we are using x86_64 calling conventions
2738 for FP arguments. */
2739 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2740 /* force_align_arg_pointer says this function realigns the stack at entry. */
2741 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2742 false, true, true, ix86_handle_cconv_attribute
},
2743 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2744 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2745 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2746 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2748 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2749 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2750 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2751 SUBTARGET_ATTRIBUTE_TABLE
,
2753 { NULL
, 0, 0, false, false, false, NULL
}
2756 /* Decide whether we can make a sibling call to a function. DECL is the
2757 declaration of the function being targeted by the call and EXP is the
2758 CALL_EXPR representing the call. */
2761 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2766 /* If we are generating position-independent code, we cannot sibcall
2767 optimize any indirect call, or a direct call to a global function,
2768 as the PLT requires %ebx be live. */
2769 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2776 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2777 if (POINTER_TYPE_P (func
))
2778 func
= TREE_TYPE (func
);
2781 /* Check that the return value locations are the same. Like
2782 if we are returning floats on the 80387 register stack, we cannot
2783 make a sibcall from a function that doesn't return a float to a
2784 function that does or, conversely, from a function that does return
2785 a float to a function that doesn't; the necessary stack adjustment
2786 would not be executed. This is also the place we notice
2787 differences in the return value ABI. Note that it is ok for one
2788 of the functions to have void return type as long as the return
2789 value of the other is passed in a register. */
2790 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2791 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2793 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2795 if (!rtx_equal_p (a
, b
))
2798 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2800 else if (!rtx_equal_p (a
, b
))
2803 /* If this call is indirect, we'll need to be able to use a call-clobbered
2804 register for the address of the target function. Make sure that all
2805 such registers are not used for passing parameters. */
2806 if (!decl
&& !TARGET_64BIT
)
2810 /* We're looking at the CALL_EXPR, we need the type of the function. */
2811 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2812 type
= TREE_TYPE (type
); /* pointer type */
2813 type
= TREE_TYPE (type
); /* function type */
2815 if (ix86_function_regparm (type
, NULL
) >= 3)
2817 /* ??? Need to count the actual number of registers to be used,
2818 not the possible number of registers. Fix later. */
2823 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2824 /* Dllimport'd functions are also called indirectly. */
2825 if (decl
&& DECL_DLLIMPORT_P (decl
)
2826 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2830 /* If we forced aligned the stack, then sibcalling would unalign the
2831 stack, which may break the called function. */
2832 if (cfun
->machine
->force_align_arg_pointer
)
2835 /* Otherwise okay. That also includes certain types of indirect calls. */
2839 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2840 calling convention attributes;
2841 arguments as in struct attribute_spec.handler. */
2844 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2846 int flags ATTRIBUTE_UNUSED
,
2849 if (TREE_CODE (*node
) != FUNCTION_TYPE
2850 && TREE_CODE (*node
) != METHOD_TYPE
2851 && TREE_CODE (*node
) != FIELD_DECL
2852 && TREE_CODE (*node
) != TYPE_DECL
)
2854 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2855 IDENTIFIER_POINTER (name
));
2856 *no_add_attrs
= true;
2860 /* Can combine regparm with all attributes but fastcall. */
2861 if (is_attribute_p ("regparm", name
))
2865 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2867 error ("fastcall and regparm attributes are not compatible");
2870 cst
= TREE_VALUE (args
);
2871 if (TREE_CODE (cst
) != INTEGER_CST
)
2873 warning (OPT_Wattributes
,
2874 "%qs attribute requires an integer constant argument",
2875 IDENTIFIER_POINTER (name
));
2876 *no_add_attrs
= true;
2878 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2880 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2881 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2882 *no_add_attrs
= true;
2886 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2887 TYPE_ATTRIBUTES (*node
))
2888 && compare_tree_int (cst
, REGPARM_MAX
-1))
2890 error ("%s functions limited to %d register parameters",
2891 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2899 warning (OPT_Wattributes
, "%qs attribute ignored",
2900 IDENTIFIER_POINTER (name
));
2901 *no_add_attrs
= true;
2905 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2906 if (is_attribute_p ("fastcall", name
))
2908 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2910 error ("fastcall and cdecl attributes are not compatible");
2912 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2914 error ("fastcall and stdcall attributes are not compatible");
2916 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2918 error ("fastcall and regparm attributes are not compatible");
2922 /* Can combine stdcall with fastcall (redundant), regparm and
2924 else if (is_attribute_p ("stdcall", name
))
2926 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2928 error ("stdcall and cdecl attributes are not compatible");
2930 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2932 error ("stdcall and fastcall attributes are not compatible");
2936 /* Can combine cdecl with regparm and sseregparm. */
2937 else if (is_attribute_p ("cdecl", name
))
2939 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2941 error ("stdcall and cdecl attributes are not compatible");
2943 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2945 error ("fastcall and cdecl attributes are not compatible");
2949 /* Can combine sseregparm with all attributes. */
2954 /* Return 0 if the attributes for two types are incompatible, 1 if they
2955 are compatible, and 2 if they are nearly compatible (which causes a
2956 warning to be generated). */
2959 ix86_comp_type_attributes (tree type1
, tree type2
)
2961 /* Check for mismatch of non-default calling convention. */
2962 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2964 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2967 /* Check for mismatched fastcall/regparm types. */
2968 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2969 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2970 || (ix86_function_regparm (type1
, NULL
)
2971 != ix86_function_regparm (type2
, NULL
)))
2974 /* Check for mismatched sseregparm types. */
2975 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2976 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2979 /* Check for mismatched return types (cdecl vs stdcall). */
2980 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2981 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2987 /* Return the regparm value for a function with the indicated TYPE and DECL.
2988 DECL may be NULL when calling function indirectly
2989 or considering a libcall. */
2992 ix86_function_regparm (tree type
, tree decl
)
2995 int regparm
= ix86_regparm
;
2996 bool user_convention
= false;
3000 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
3003 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
3004 user_convention
= true;
3007 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
3010 user_convention
= true;
3013 /* Use register calling convention for local functions when possible. */
3014 if (!TARGET_64BIT
&& !user_convention
&& decl
3015 && flag_unit_at_a_time
&& !profile_flag
)
3017 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3020 int local_regparm
, globals
= 0, regno
;
3022 /* Make sure no regparm register is taken by a global register
3024 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3025 if (global_regs
[local_regparm
])
3027 /* We can't use regparm(3) for nested functions as these use
3028 static chain pointer in third argument. */
3029 if (local_regparm
== 3
3030 && decl_function_context (decl
)
3031 && !DECL_NO_STATIC_CHAIN (decl
))
3033 /* If the function realigns its stackpointer, the
3034 prologue will clobber %ecx. If we've already
3035 generated code for the callee, the callee
3036 DECL_STRUCT_FUNCTION is gone, so we fall back to
3037 scanning the attributes for the self-realigning
3039 if ((DECL_STRUCT_FUNCTION (decl
)
3040 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
3041 || (!DECL_STRUCT_FUNCTION (decl
)
3042 && lookup_attribute (ix86_force_align_arg_pointer_string
,
3043 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3045 /* Each global register variable increases register preassure,
3046 so the more global reg vars there are, the smaller regparm
3047 optimization use, unless requested by the user explicitly. */
3048 for (regno
= 0; regno
< 6; regno
++)
3049 if (global_regs
[regno
])
3052 = globals
< local_regparm
? local_regparm
- globals
: 0;
3054 if (local_regparm
> regparm
)
3055 regparm
= local_regparm
;
3062 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3063 DFmode (2) arguments in SSE registers for a function with the
3064 indicated TYPE and DECL. DECL may be NULL when calling function
3065 indirectly or considering a libcall. Otherwise return 0. */
3068 ix86_function_sseregparm (tree type
, tree decl
)
3070 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3071 by the sseregparm attribute. */
3072 if (TARGET_SSEREGPARM
3074 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3079 error ("Calling %qD with attribute sseregparm without "
3080 "SSE/SSE2 enabled", decl
);
3082 error ("Calling %qT with attribute sseregparm without "
3083 "SSE/SSE2 enabled", type
);
3090 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3091 (and DFmode for SSE2) arguments in SSE registers,
3092 even for 32-bit targets. */
3093 if (!TARGET_64BIT
&& decl
3094 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3096 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3098 return TARGET_SSE2
? 2 : 1;
3104 /* Return true if EAX is live at the start of the function. Used by
3105 ix86_expand_prologue to determine if we need special help before
3106 calling allocate_stack_worker. */
3109 ix86_eax_live_at_start_p (void)
3111 /* Cheat. Don't bother working forward from ix86_function_regparm
3112 to the function type to whether an actual argument is located in
3113 eax. Instead just look at cfg info, which is still close enough
3114 to correct at this point. This gives false positives for broken
3115 functions that might use uninitialized data that happens to be
3116 allocated in eax, but who cares? */
3117 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3120 /* Value is the number of bytes of arguments automatically
3121 popped when returning from a subroutine call.
3122 FUNDECL is the declaration node of the function (as a tree),
3123 FUNTYPE is the data type of the function (as a tree),
3124 or for a library call it is an identifier node for the subroutine name.
3125 SIZE is the number of bytes of arguments passed on the stack.
3127 On the 80386, the RTD insn may be used to pop them if the number
3128 of args is fixed, but if the number is variable then the caller
3129 must pop them all. RTD can't be used for library calls now
3130 because the library is compiled with the Unix compiler.
3131 Use of RTD is a selectable option, since it is incompatible with
3132 standard Unix calling sequences. If the option is not selected,
3133 the caller must always pop the args.
3135 The attribute stdcall is equivalent to RTD on a per module basis. */
3138 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3140 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3142 /* Cdecl functions override -mrtd, and never pop the stack. */
3143 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
3145 /* Stdcall and fastcall functions will pop the stack if not
3147 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3148 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3152 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
3153 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
3154 == void_type_node
)))
3158 /* Lose any fake structure return argument if it is passed on the stack. */
3159 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3161 && !KEEP_AGGREGATE_RETURN_POINTER
)
3163 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3166 return GET_MODE_SIZE (Pmode
);
3172 /* Argument support functions. */
3174 /* Return true when register may be used to pass function parameters. */
3176 ix86_function_arg_regno_p (int regno
)
3182 return (regno
< REGPARM_MAX
3183 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3185 return (regno
< REGPARM_MAX
3186 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3187 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3188 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3189 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3194 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3199 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3200 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3203 /* RAX is used as hidden argument to va_arg functions. */
3206 for (i
= 0; i
< REGPARM_MAX
; i
++)
3207 if (regno
== x86_64_int_parameter_registers
[i
])
3212 /* Return if we do not know how to pass TYPE solely in registers. */
3215 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3217 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3220 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3221 The layout_type routine is crafty and tries to trick us into passing
3222 currently unsupported vector types on the stack by using TImode. */
3223 return (!TARGET_64BIT
&& mode
== TImode
3224 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3227 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3228 for a call to a function whose data type is FNTYPE.
3229 For a library call, FNTYPE is 0. */
3232 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3233 tree fntype
, /* tree ptr for function decl */
3234 rtx libname
, /* SYMBOL_REF of library name or 0 */
3237 static CUMULATIVE_ARGS zero_cum
;
3238 tree param
, next_param
;
3240 if (TARGET_DEBUG_ARG
)
3242 fprintf (stderr
, "\ninit_cumulative_args (");
3244 fprintf (stderr
, "fntype code = %s, ret code = %s",
3245 tree_code_name
[(int) TREE_CODE (fntype
)],
3246 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
3248 fprintf (stderr
, "no fntype");
3251 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
3256 /* Set up the number of registers to use for passing arguments. */
3257 cum
->nregs
= ix86_regparm
;
3259 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3261 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3262 cum
->warn_sse
= true;
3263 cum
->warn_mmx
= true;
3264 cum
->maybe_vaarg
= false;
3266 /* Use ecx and edx registers if function has fastcall attribute,
3267 else look for regparm information. */
3268 if (fntype
&& !TARGET_64BIT
)
3270 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3276 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3279 /* Set up the number of SSE registers used for passing SFmode
3280 and DFmode arguments. Warn for mismatching ABI. */
3281 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3283 /* Determine if this function has variable arguments. This is
3284 indicated by the last argument being 'void_type_mode' if there
3285 are no variable arguments. If there are variable arguments, then
3286 we won't pass anything in registers in 32-bit mode. */
3288 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
3290 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
3291 param
!= 0; param
= next_param
)
3293 next_param
= TREE_CHAIN (param
);
3294 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
3304 cum
->float_in_sse
= 0;
3306 cum
->maybe_vaarg
= true;
3310 if ((!fntype
&& !libname
)
3311 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
3312 cum
->maybe_vaarg
= true;
3314 if (TARGET_DEBUG_ARG
)
3315 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
3320 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3321 But in the case of vector types, it is some vector mode.
3323 When we have only some of our vector isa extensions enabled, then there
3324 are some modes for which vector_mode_supported_p is false. For these
3325 modes, the generic vector support in gcc will choose some non-vector mode
3326 in order to implement the type. By computing the natural mode, we'll
3327 select the proper ABI location for the operand and not depend on whatever
3328 the middle-end decides to do with these vector types. */
3330 static enum machine_mode
3331 type_natural_mode (tree type
)
3333 enum machine_mode mode
= TYPE_MODE (type
);
3335 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3337 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3338 if ((size
== 8 || size
== 16)
3339 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3340 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3342 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3344 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3345 mode
= MIN_MODE_VECTOR_FLOAT
;
3347 mode
= MIN_MODE_VECTOR_INT
;
3349 /* Get the mode which has this inner mode and number of units. */
3350 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3351 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3352 && GET_MODE_INNER (mode
) == innermode
)
3362 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3363 this may not agree with the mode that the type system has chosen for the
3364 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3365 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3368 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3373 if (orig_mode
!= BLKmode
)
3374 tmp
= gen_rtx_REG (orig_mode
, regno
);
3377 tmp
= gen_rtx_REG (mode
, regno
);
3378 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3379 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3385 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3386 of this code is to classify each 8bytes of incoming argument by the register
3387 class and assign registers accordingly. */
3389 /* Return the union class of CLASS1 and CLASS2.
3390 See the x86-64 PS ABI for details. */
3392 static enum x86_64_reg_class
3393 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3395 /* Rule #1: If both classes are equal, this is the resulting class. */
3396 if (class1
== class2
)
3399 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3401 if (class1
== X86_64_NO_CLASS
)
3403 if (class2
== X86_64_NO_CLASS
)
3406 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3407 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3408 return X86_64_MEMORY_CLASS
;
3410 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3411 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3412 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3413 return X86_64_INTEGERSI_CLASS
;
3414 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3415 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3416 return X86_64_INTEGER_CLASS
;
3418 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3420 if (class1
== X86_64_X87_CLASS
3421 || class1
== X86_64_X87UP_CLASS
3422 || class1
== X86_64_COMPLEX_X87_CLASS
3423 || class2
== X86_64_X87_CLASS
3424 || class2
== X86_64_X87UP_CLASS
3425 || class2
== X86_64_COMPLEX_X87_CLASS
)
3426 return X86_64_MEMORY_CLASS
;
3428 /* Rule #6: Otherwise class SSE is used. */
3429 return X86_64_SSE_CLASS
;
3432 /* Classify the argument of type TYPE and mode MODE.
3433 CLASSES will be filled by the register class used to pass each word
3434 of the operand. The number of words is returned. In case the parameter
3435 should be passed in memory, 0 is returned. As a special case for zero
3436 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3438 BIT_OFFSET is used internally for handling records and specifies offset
3439 of the offset in bits modulo 256 to avoid overflow cases.
3441 See the x86-64 PS ABI for details.
3445 classify_argument (enum machine_mode mode
, tree type
,
3446 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3448 HOST_WIDE_INT bytes
=
3449 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3450 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3452 /* Variable sized entities are always passed/returned in memory. */
3456 if (mode
!= VOIDmode
3457 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3460 if (type
&& AGGREGATE_TYPE_P (type
))
3464 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3466 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3470 for (i
= 0; i
< words
; i
++)
3471 classes
[i
] = X86_64_NO_CLASS
;
3473 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3474 signalize memory class, so handle it as special case. */
3477 classes
[0] = X86_64_NO_CLASS
;
3481 /* Classify each field of record and merge classes. */
3482 switch (TREE_CODE (type
))
3485 /* And now merge the fields of structure. */
3486 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3488 if (TREE_CODE (field
) == FIELD_DECL
)
3492 if (TREE_TYPE (field
) == error_mark_node
)
3495 /* Bitfields are always classified as integer. Handle them
3496 early, since later code would consider them to be
3497 misaligned integers. */
3498 if (DECL_BIT_FIELD (field
))
3500 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3501 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3502 + tree_low_cst (DECL_SIZE (field
), 0)
3505 merge_classes (X86_64_INTEGER_CLASS
,
3510 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3511 TREE_TYPE (field
), subclasses
,
3512 (int_bit_position (field
)
3513 + bit_offset
) % 256);
3516 for (i
= 0; i
< num
; i
++)
3519 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3521 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3529 /* Arrays are handled as small records. */
3532 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3533 TREE_TYPE (type
), subclasses
, bit_offset
);
3537 /* The partial classes are now full classes. */
3538 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3539 subclasses
[0] = X86_64_SSE_CLASS
;
3540 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3541 subclasses
[0] = X86_64_INTEGER_CLASS
;
3543 for (i
= 0; i
< words
; i
++)
3544 classes
[i
] = subclasses
[i
% num
];
3549 case QUAL_UNION_TYPE
:
3550 /* Unions are similar to RECORD_TYPE but offset is always 0.
3552 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3554 if (TREE_CODE (field
) == FIELD_DECL
)
3558 if (TREE_TYPE (field
) == error_mark_node
)
3561 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3562 TREE_TYPE (field
), subclasses
,
3566 for (i
= 0; i
< num
; i
++)
3567 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3576 /* Final merger cleanup. */
3577 for (i
= 0; i
< words
; i
++)
3579 /* If one class is MEMORY, everything should be passed in
3581 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3584 /* The X86_64_SSEUP_CLASS should be always preceded by
3585 X86_64_SSE_CLASS. */
3586 if (classes
[i
] == X86_64_SSEUP_CLASS
3587 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3588 classes
[i
] = X86_64_SSE_CLASS
;
3590 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3591 if (classes
[i
] == X86_64_X87UP_CLASS
3592 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3593 classes
[i
] = X86_64_SSE_CLASS
;
3598 /* Compute alignment needed. We align all types to natural boundaries with
3599 exception of XFmode that is aligned to 64bits. */
3600 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3602 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3605 mode_alignment
= 128;
3606 else if (mode
== XCmode
)
3607 mode_alignment
= 256;
3608 if (COMPLEX_MODE_P (mode
))
3609 mode_alignment
/= 2;
3610 /* Misaligned fields are always returned in memory. */
3611 if (bit_offset
% mode_alignment
)
3615 /* for V1xx modes, just use the base mode */
3616 if (VECTOR_MODE_P (mode
)
3617 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3618 mode
= GET_MODE_INNER (mode
);
3620 /* Classification of atomic types. */
3625 classes
[0] = X86_64_SSE_CLASS
;
3628 classes
[0] = X86_64_SSE_CLASS
;
3629 classes
[1] = X86_64_SSEUP_CLASS
;
3638 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3639 classes
[0] = X86_64_INTEGERSI_CLASS
;
3641 classes
[0] = X86_64_INTEGER_CLASS
;
3645 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3650 if (!(bit_offset
% 64))
3651 classes
[0] = X86_64_SSESF_CLASS
;
3653 classes
[0] = X86_64_SSE_CLASS
;
3656 classes
[0] = X86_64_SSEDF_CLASS
;
3659 classes
[0] = X86_64_X87_CLASS
;
3660 classes
[1] = X86_64_X87UP_CLASS
;
3663 classes
[0] = X86_64_SSE_CLASS
;
3664 classes
[1] = X86_64_SSEUP_CLASS
;
3667 classes
[0] = X86_64_SSE_CLASS
;
3670 classes
[0] = X86_64_SSEDF_CLASS
;
3671 classes
[1] = X86_64_SSEDF_CLASS
;
3674 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3677 /* This modes is larger than 16 bytes. */
3685 classes
[0] = X86_64_SSE_CLASS
;
3686 classes
[1] = X86_64_SSEUP_CLASS
;
3692 classes
[0] = X86_64_SSE_CLASS
;
3698 gcc_assert (VECTOR_MODE_P (mode
));
3703 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3705 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3706 classes
[0] = X86_64_INTEGERSI_CLASS
;
3708 classes
[0] = X86_64_INTEGER_CLASS
;
3709 classes
[1] = X86_64_INTEGER_CLASS
;
3710 return 1 + (bytes
> 8);
3714 /* Examine the argument and return set number of register required in each
3715 class. Return 0 iff parameter should be passed in memory. */
3717 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3718 int *int_nregs
, int *sse_nregs
)
3720 enum x86_64_reg_class
class[MAX_CLASSES
];
3721 int n
= classify_argument (mode
, type
, class, 0);
3727 for (n
--; n
>= 0; n
--)
3730 case X86_64_INTEGER_CLASS
:
3731 case X86_64_INTEGERSI_CLASS
:
3734 case X86_64_SSE_CLASS
:
3735 case X86_64_SSESF_CLASS
:
3736 case X86_64_SSEDF_CLASS
:
3739 case X86_64_NO_CLASS
:
3740 case X86_64_SSEUP_CLASS
:
3742 case X86_64_X87_CLASS
:
3743 case X86_64_X87UP_CLASS
:
3747 case X86_64_COMPLEX_X87_CLASS
:
3748 return in_return
? 2 : 0;
3749 case X86_64_MEMORY_CLASS
:
3755 /* Construct container for the argument used by GCC interface. See
3756 FUNCTION_ARG for the detailed description. */
3759 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3760 tree type
, int in_return
, int nintregs
, int nsseregs
,
3761 const int *intreg
, int sse_regno
)
3763 /* The following variables hold the static issued_error state. */
3764 static bool issued_sse_arg_error
;
3765 static bool issued_sse_ret_error
;
3766 static bool issued_x87_ret_error
;
3768 enum machine_mode tmpmode
;
3770 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3771 enum x86_64_reg_class
class[MAX_CLASSES
];
3775 int needed_sseregs
, needed_intregs
;
3776 rtx exp
[MAX_CLASSES
];
3779 n
= classify_argument (mode
, type
, class, 0);
3780 if (TARGET_DEBUG_ARG
)
3783 fprintf (stderr
, "Memory class\n");
3786 fprintf (stderr
, "Classes:");
3787 for (i
= 0; i
< n
; i
++)
3789 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3791 fprintf (stderr
, "\n");
3796 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3799 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3802 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3803 some less clueful developer tries to use floating-point anyway. */
3804 if (needed_sseregs
&& !TARGET_SSE
)
3808 if (!issued_sse_ret_error
)
3810 error ("SSE register return with SSE disabled");
3811 issued_sse_ret_error
= true;
3814 else if (!issued_sse_arg_error
)
3816 error ("SSE register argument with SSE disabled");
3817 issued_sse_arg_error
= true;
3822 /* Likewise, error if the ABI requires us to return values in the
3823 x87 registers and the user specified -mno-80387. */
3824 if (!TARGET_80387
&& in_return
)
3825 for (i
= 0; i
< n
; i
++)
3826 if (class[i
] == X86_64_X87_CLASS
3827 || class[i
] == X86_64_X87UP_CLASS
3828 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3830 if (!issued_x87_ret_error
)
3832 error ("x87 register return with x87 disabled");
3833 issued_x87_ret_error
= true;
3838 /* First construct simple cases. Avoid SCmode, since we want to use
3839 single register to pass this type. */
3840 if (n
== 1 && mode
!= SCmode
)
3843 case X86_64_INTEGER_CLASS
:
3844 case X86_64_INTEGERSI_CLASS
:
3845 return gen_rtx_REG (mode
, intreg
[0]);
3846 case X86_64_SSE_CLASS
:
3847 case X86_64_SSESF_CLASS
:
3848 case X86_64_SSEDF_CLASS
:
3849 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3850 case X86_64_X87_CLASS
:
3851 case X86_64_COMPLEX_X87_CLASS
:
3852 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3853 case X86_64_NO_CLASS
:
3854 /* Zero sized array, struct or class. */
3859 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3861 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3863 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3864 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3865 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3866 && class[1] == X86_64_INTEGER_CLASS
3867 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3868 && intreg
[0] + 1 == intreg
[1])
3869 return gen_rtx_REG (mode
, intreg
[0]);
3871 /* Otherwise figure out the entries of the PARALLEL. */
3872 for (i
= 0; i
< n
; i
++)
3876 case X86_64_NO_CLASS
:
3878 case X86_64_INTEGER_CLASS
:
3879 case X86_64_INTEGERSI_CLASS
:
3880 /* Merge TImodes on aligned occasions here too. */
3881 if (i
* 8 + 8 > bytes
)
3882 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3883 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3887 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3888 if (tmpmode
== BLKmode
)
3890 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3891 gen_rtx_REG (tmpmode
, *intreg
),
3895 case X86_64_SSESF_CLASS
:
3896 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3897 gen_rtx_REG (SFmode
,
3898 SSE_REGNO (sse_regno
)),
3902 case X86_64_SSEDF_CLASS
:
3903 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3904 gen_rtx_REG (DFmode
,
3905 SSE_REGNO (sse_regno
)),
3909 case X86_64_SSE_CLASS
:
3910 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3914 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3915 gen_rtx_REG (tmpmode
,
3916 SSE_REGNO (sse_regno
)),
3918 if (tmpmode
== TImode
)
3927 /* Empty aligned struct, union or class. */
3931 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3932 for (i
= 0; i
< nexps
; i
++)
3933 XVECEXP (ret
, 0, i
) = exp
[i
];
3937 /* Update the data in CUM to advance over an argument
3938 of mode MODE and data type TYPE.
3939 (TYPE is null for libcalls where that information may not be available.) */
3942 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3943 tree type
, int named
)
3946 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3947 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3950 mode
= type_natural_mode (type
);
3952 if (TARGET_DEBUG_ARG
)
3953 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3954 "mode=%s, named=%d)\n\n",
3955 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3956 GET_MODE_NAME (mode
), named
);
3960 int int_nregs
, sse_nregs
;
3961 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3962 cum
->words
+= words
;
3963 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3965 cum
->nregs
-= int_nregs
;
3966 cum
->sse_nregs
-= sse_nregs
;
3967 cum
->regno
+= int_nregs
;
3968 cum
->sse_regno
+= sse_nregs
;
3971 cum
->words
+= words
;
3989 cum
->words
+= words
;
3990 cum
->nregs
-= words
;
3991 cum
->regno
+= words
;
3993 if (cum
->nregs
<= 0)
4001 if (cum
->float_in_sse
< 2)
4004 if (cum
->float_in_sse
< 1)
4015 if (!type
|| !AGGREGATE_TYPE_P (type
))
4017 cum
->sse_words
+= words
;
4018 cum
->sse_nregs
-= 1;
4019 cum
->sse_regno
+= 1;
4020 if (cum
->sse_nregs
<= 0)
4032 if (!type
|| !AGGREGATE_TYPE_P (type
))
4034 cum
->mmx_words
+= words
;
4035 cum
->mmx_nregs
-= 1;
4036 cum
->mmx_regno
+= 1;
4037 if (cum
->mmx_nregs
<= 0)
4048 /* Define where to put the arguments to a function.
4049 Value is zero to push the argument on the stack,
4050 or a hard register in which to store the argument.
4052 MODE is the argument's machine mode.
4053 TYPE is the data type of the argument (as a tree).
4054 This is null for libcalls where that information may
4056 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4057 the preceding args and about the function being called.
4058 NAMED is nonzero if this argument is a named parameter
4059 (otherwise it is an extra parameter matching an ellipsis). */
4062 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
4063 tree type
, int named
)
4065 enum machine_mode mode
= orig_mode
;
4068 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
4069 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4070 static bool warnedsse
, warnedmmx
;
4072 /* To simplify the code below, represent vector types with a vector mode
4073 even if MMX/SSE are not active. */
4074 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4075 mode
= type_natural_mode (type
);
4077 /* Handle a hidden AL argument containing number of registers for varargs
4078 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4080 if (mode
== VOIDmode
)
4083 return GEN_INT (cum
->maybe_vaarg
4084 ? (cum
->sse_nregs
< 0
4092 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4094 &x86_64_int_parameter_registers
[cum
->regno
],
4099 /* For now, pass fp/complex values on the stack. */
4111 if (words
<= cum
->nregs
)
4113 int regno
= cum
->regno
;
4115 /* Fastcall allocates the first two DWORD (SImode) or
4116 smaller arguments to ECX and EDX. */
4119 if (mode
== BLKmode
|| mode
== DImode
)
4122 /* ECX not EAX is the first allocated register. */
4126 ret
= gen_rtx_REG (mode
, regno
);
4130 if (cum
->float_in_sse
< 2)
4133 if (cum
->float_in_sse
< 1)
4143 if (!type
|| !AGGREGATE_TYPE_P (type
))
4145 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4148 warning (0, "SSE vector argument without SSE enabled "
4152 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4153 cum
->sse_regno
+ FIRST_SSE_REG
);
4160 if (!type
|| !AGGREGATE_TYPE_P (type
))
4162 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4165 warning (0, "MMX vector argument without MMX enabled "
4169 ret
= gen_reg_or_parallel (mode
, orig_mode
,
4170 cum
->mmx_regno
+ FIRST_MMX_REG
);
4175 if (TARGET_DEBUG_ARG
)
4178 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4179 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
4182 print_simple_rtl (stderr
, ret
);
4184 fprintf (stderr
, ", stack");
4186 fprintf (stderr
, " )\n");
4192 /* A C expression that indicates when an argument must be passed by
4193 reference. If nonzero for an argument, a copy of that argument is
4194 made in memory and a pointer to the argument is passed instead of
4195 the argument itself. The pointer is passed in whatever way is
4196 appropriate for passing a pointer to that type. */
4199 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4200 enum machine_mode mode ATTRIBUTE_UNUSED
,
4201 tree type
, bool named ATTRIBUTE_UNUSED
)
4206 if (type
&& int_size_in_bytes (type
) == -1)
4208 if (TARGET_DEBUG_ARG
)
4209 fprintf (stderr
, "function_arg_pass_by_reference\n");
4216 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4217 ABI. Only called if TARGET_SSE. */
4219 contains_128bit_aligned_vector_p (tree type
)
4221 enum machine_mode mode
= TYPE_MODE (type
);
4222 if (SSE_REG_MODE_P (mode
)
4223 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4225 if (TYPE_ALIGN (type
) < 128)
4228 if (AGGREGATE_TYPE_P (type
))
4230 /* Walk the aggregates recursively. */
4231 switch (TREE_CODE (type
))
4235 case QUAL_UNION_TYPE
:
4239 /* Walk all the structure fields. */
4240 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4242 if (TREE_CODE (field
) == FIELD_DECL
4243 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4250 /* Just for use if some languages passes arrays by value. */
4251 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4262 /* Gives the alignment boundary, in bits, of an argument with the
4263 specified mode and type. */
4266 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4270 align
= TYPE_ALIGN (type
);
4272 align
= GET_MODE_ALIGNMENT (mode
);
4273 if (align
< PARM_BOUNDARY
)
4274 align
= PARM_BOUNDARY
;
4277 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4278 make an exception for SSE modes since these require 128bit
4281 The handling here differs from field_alignment. ICC aligns MMX
4282 arguments to 4 byte boundaries, while structure fields are aligned
4283 to 8 byte boundaries. */
4285 align
= PARM_BOUNDARY
;
4288 if (!SSE_REG_MODE_P (mode
))
4289 align
= PARM_BOUNDARY
;
4293 if (!contains_128bit_aligned_vector_p (type
))
4294 align
= PARM_BOUNDARY
;
4302 /* Return true if N is a possible register number of function value. */
4304 ix86_function_value_regno_p (int regno
)
4310 return ((regno
) == 0
4311 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4312 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
4314 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
4315 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
4316 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
4321 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4322 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4326 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4333 /* Define how to find the value returned by a function.
4334 VALTYPE is the data type of the value (as a tree).
4335 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4336 otherwise, FUNC is 0. */
4338 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4339 bool outgoing ATTRIBUTE_UNUSED
)
4341 enum machine_mode natmode
= type_natural_mode (valtype
);
4345 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4346 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4347 x86_64_int_return_registers
, 0);
4348 /* For zero sized structures, construct_container return NULL, but we
4349 need to keep rest of compiler happy by returning meaningful value. */
4351 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4356 tree fn
= NULL_TREE
, fntype
;
4358 && DECL_P (fntype_or_decl
))
4359 fn
= fntype_or_decl
;
4360 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4361 return gen_rtx_REG (TYPE_MODE (valtype
),
4362 ix86_value_regno (natmode
, fn
, fntype
));
4366 /* Return true iff type is returned in memory. */
4368 ix86_return_in_memory (tree type
)
4370 int needed_intregs
, needed_sseregs
, size
;
4371 enum machine_mode mode
= type_natural_mode (type
);
4374 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4376 if (mode
== BLKmode
)
4379 size
= int_size_in_bytes (type
);
4381 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4384 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4386 /* User-created vectors small enough to fit in EAX. */
4390 /* MMX/3dNow values are returned in MM0,
4391 except when it doesn't exits. */
4393 return (TARGET_MMX
? 0 : 1);
4395 /* SSE values are returned in XMM0, except when it doesn't exist. */
4397 return (TARGET_SSE
? 0 : 1);
4411 /* When returning SSE vector types, we have a choice of either
4412 (1) being abi incompatible with a -march switch, or
4413 (2) generating an error.
4414 Given no good solution, I think the safest thing is one warning.
4415 The user won't be able to use -Werror, but....
4417 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4418 called in response to actually generating a caller or callee that
4419 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4420 via aggregate_value_p for general type probing from tree-ssa. */
4423 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4425 static bool warnedsse
, warnedmmx
;
4429 /* Look at the return type of the function, not the function type. */
4430 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4432 if (!TARGET_SSE
&& !warnedsse
)
4435 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4438 warning (0, "SSE vector return without SSE enabled "
4443 if (!TARGET_MMX
&& !warnedmmx
)
4445 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4448 warning (0, "MMX vector return without MMX enabled "
4457 /* Define how to find the value returned by a library function
4458 assuming the value has mode MODE. */
4460 ix86_libcall_value (enum machine_mode mode
)
4474 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4477 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4481 return gen_rtx_REG (mode
, 0);
4485 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4488 /* Given a mode, return the register to use for a return value. */
4491 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4493 gcc_assert (!TARGET_64BIT
);
4495 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4496 we normally prevent this case when mmx is not available. However
4497 some ABIs may require the result to be returned like DImode. */
4498 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4499 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4501 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4502 we prevent this case when sse is not available. However some ABIs
4503 may require the result to be returned like integer TImode. */
4504 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4505 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4507 /* Decimal floating point values can go in %eax, unlike other float modes. */
4508 if (DECIMAL_FLOAT_MODE_P (mode
))
4511 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4512 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4515 /* Floating point return values in %st(0), except for local functions when
4516 SSE math is enabled or for functions with sseregparm attribute. */
4517 if ((func
|| fntype
)
4518 && (mode
== SFmode
|| mode
== DFmode
))
4520 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4521 if ((sse_level
>= 1 && mode
== SFmode
)
4522 || (sse_level
== 2 && mode
== DFmode
))
4523 return FIRST_SSE_REG
;
4526 return FIRST_FLOAT_REG
;
4529 /* Create the va_list data type. */
4532 ix86_build_builtin_va_list (void)
4534 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4536 /* For i386 we use plain pointer to argument area. */
4538 return build_pointer_type (char_type_node
);
4540 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4541 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4543 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4544 unsigned_type_node
);
4545 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4546 unsigned_type_node
);
4547 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4549 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4552 va_list_gpr_counter_field
= f_gpr
;
4553 va_list_fpr_counter_field
= f_fpr
;
4555 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4556 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4557 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4558 DECL_FIELD_CONTEXT (f_sav
) = record
;
4560 TREE_CHAIN (record
) = type_decl
;
4561 TYPE_NAME (record
) = type_decl
;
4562 TYPE_FIELDS (record
) = f_gpr
;
4563 TREE_CHAIN (f_gpr
) = f_fpr
;
4564 TREE_CHAIN (f_fpr
) = f_ovf
;
4565 TREE_CHAIN (f_ovf
) = f_sav
;
4567 layout_type (record
);
4569 /* The correct type is an array type of one element. */
4570 return build_array_type (record
, build_index_type (size_zero_node
));
4573 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4576 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4577 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4580 CUMULATIVE_ARGS next_cum
;
4581 rtx save_area
= NULL_RTX
, mem
;
4594 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4597 /* Indicate to allocate space on the stack for varargs save area. */
4598 ix86_save_varrargs_registers
= 1;
4600 cfun
->stack_alignment_needed
= 128;
4602 fntype
= TREE_TYPE (current_function_decl
);
4603 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4604 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4605 != void_type_node
));
4607 /* For varargs, we do not want to skip the dummy va_dcl argument.
4608 For stdargs, we do want to skip the last named argument. */
4611 function_arg_advance (&next_cum
, mode
, type
, 1);
4614 save_area
= frame_pointer_rtx
;
4616 set
= get_varargs_alias_set ();
4618 for (i
= next_cum
.regno
;
4620 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4623 mem
= gen_rtx_MEM (Pmode
,
4624 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4625 MEM_NOTRAP_P (mem
) = 1;
4626 set_mem_alias_set (mem
, set
);
4627 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4628 x86_64_int_parameter_registers
[i
]));
4631 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4633 /* Now emit code to save SSE registers. The AX parameter contains number
4634 of SSE parameter registers used to call this function. We use
4635 sse_prologue_save insn template that produces computed jump across
4636 SSE saves. We need some preparation work to get this working. */
4638 label
= gen_label_rtx ();
4639 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4641 /* Compute address to jump to :
4642 label - 5*eax + nnamed_sse_arguments*5 */
4643 tmp_reg
= gen_reg_rtx (Pmode
);
4644 nsse_reg
= gen_reg_rtx (Pmode
);
4645 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4646 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4647 gen_rtx_MULT (Pmode
, nsse_reg
,
4649 if (next_cum
.sse_regno
)
4652 gen_rtx_CONST (DImode
,
4653 gen_rtx_PLUS (DImode
,
4655 GEN_INT (next_cum
.sse_regno
* 4))));
4657 emit_move_insn (nsse_reg
, label_ref
);
4658 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4660 /* Compute address of memory block we save into. We always use pointer
4661 pointing 127 bytes after first byte to store - this is needed to keep
4662 instruction size limited by 4 bytes. */
4663 tmp_reg
= gen_reg_rtx (Pmode
);
4664 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4665 plus_constant (save_area
,
4666 8 * REGPARM_MAX
+ 127)));
4667 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4668 MEM_NOTRAP_P (mem
) = 1;
4669 set_mem_alias_set (mem
, set
);
4670 set_mem_align (mem
, BITS_PER_WORD
);
4672 /* And finally do the dirty job! */
4673 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4674 GEN_INT (next_cum
.sse_regno
), label
));
4679 /* Implement va_start. */
4682 ix86_va_start (tree valist
, rtx nextarg
)
4684 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4685 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4686 tree gpr
, fpr
, ovf
, sav
, t
;
4689 /* Only 64bit target needs something special. */
4692 std_expand_builtin_va_start (valist
, nextarg
);
4696 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4697 f_fpr
= TREE_CHAIN (f_gpr
);
4698 f_ovf
= TREE_CHAIN (f_fpr
);
4699 f_sav
= TREE_CHAIN (f_ovf
);
4701 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4702 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4703 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4704 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4705 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4707 /* Count number of gp and fp argument registers used. */
4708 words
= current_function_args_info
.words
;
4709 n_gpr
= current_function_args_info
.regno
;
4710 n_fpr
= current_function_args_info
.sse_regno
;
4712 if (TARGET_DEBUG_ARG
)
4713 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4714 (int) words
, (int) n_gpr
, (int) n_fpr
);
4716 if (cfun
->va_list_gpr_size
)
4718 type
= TREE_TYPE (gpr
);
4719 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4720 build_int_cst (type
, n_gpr
* 8));
4721 TREE_SIDE_EFFECTS (t
) = 1;
4722 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4725 if (cfun
->va_list_fpr_size
)
4727 type
= TREE_TYPE (fpr
);
4728 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4729 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4730 TREE_SIDE_EFFECTS (t
) = 1;
4731 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4734 /* Find the overflow area. */
4735 type
= TREE_TYPE (ovf
);
4736 t
= make_tree (type
, virtual_incoming_args_rtx
);
4738 t
= build2 (PLUS_EXPR
, type
, t
,
4739 build_int_cst (type
, words
* UNITS_PER_WORD
));
4740 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4741 TREE_SIDE_EFFECTS (t
) = 1;
4742 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4744 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4746 /* Find the register save area.
4747 Prologue of the function save it right above stack frame. */
4748 type
= TREE_TYPE (sav
);
4749 t
= make_tree (type
, frame_pointer_rtx
);
4750 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4751 TREE_SIDE_EFFECTS (t
) = 1;
4752 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4756 /* Implement va_arg. */
4759 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4761 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4762 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4763 tree gpr
, fpr
, ovf
, sav
, t
;
4765 tree lab_false
, lab_over
= NULL_TREE
;
4770 enum machine_mode nat_mode
;
4772 /* Only 64bit target needs something special. */
4774 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4776 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4777 f_fpr
= TREE_CHAIN (f_gpr
);
4778 f_ovf
= TREE_CHAIN (f_fpr
);
4779 f_sav
= TREE_CHAIN (f_ovf
);
4781 valist
= build_va_arg_indirect_ref (valist
);
4782 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4783 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4784 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4785 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4787 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4789 type
= build_pointer_type (type
);
4790 size
= int_size_in_bytes (type
);
4791 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4793 nat_mode
= type_natural_mode (type
);
4794 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4795 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4797 /* Pull the value out of the saved registers. */
4799 addr
= create_tmp_var (ptr_type_node
, "addr");
4800 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4804 int needed_intregs
, needed_sseregs
;
4806 tree int_addr
, sse_addr
;
4808 lab_false
= create_artificial_label ();
4809 lab_over
= create_artificial_label ();
4811 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4813 need_temp
= (!REG_P (container
)
4814 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4815 || TYPE_ALIGN (type
) > 128));
4817 /* In case we are passing structure, verify that it is consecutive block
4818 on the register save area. If not we need to do moves. */
4819 if (!need_temp
&& !REG_P (container
))
4821 /* Verify that all registers are strictly consecutive */
4822 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4826 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4828 rtx slot
= XVECEXP (container
, 0, i
);
4829 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4830 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4838 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4840 rtx slot
= XVECEXP (container
, 0, i
);
4841 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4842 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4854 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4855 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4856 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4857 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4860 /* First ensure that we fit completely in registers. */
4863 t
= build_int_cst (TREE_TYPE (gpr
),
4864 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4865 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4866 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4867 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4868 gimplify_and_add (t
, pre_p
);
4872 t
= build_int_cst (TREE_TYPE (fpr
),
4873 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4875 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4876 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4877 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4878 gimplify_and_add (t
, pre_p
);
4881 /* Compute index to start of area used for integer regs. */
4884 /* int_addr = gpr + sav; */
4885 t
= fold_convert (ptr_type_node
, gpr
);
4886 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4887 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4888 gimplify_and_add (t
, pre_p
);
4892 /* sse_addr = fpr + sav; */
4893 t
= fold_convert (ptr_type_node
, fpr
);
4894 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4895 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4896 gimplify_and_add (t
, pre_p
);
4901 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4904 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4905 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4906 gimplify_and_add (t
, pre_p
);
4908 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4910 rtx slot
= XVECEXP (container
, 0, i
);
4911 rtx reg
= XEXP (slot
, 0);
4912 enum machine_mode mode
= GET_MODE (reg
);
4913 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4914 tree addr_type
= build_pointer_type (piece_type
);
4917 tree dest_addr
, dest
;
4919 if (SSE_REGNO_P (REGNO (reg
)))
4921 src_addr
= sse_addr
;
4922 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4926 src_addr
= int_addr
;
4927 src_offset
= REGNO (reg
) * 8;
4929 src_addr
= fold_convert (addr_type
, src_addr
);
4930 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4931 size_int (src_offset
));
4932 src
= build_va_arg_indirect_ref (src_addr
);
4934 dest_addr
= fold_convert (addr_type
, addr
);
4935 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4936 size_int (INTVAL (XEXP (slot
, 1))));
4937 dest
= build_va_arg_indirect_ref (dest_addr
);
4939 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4940 gimplify_and_add (t
, pre_p
);
4946 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4947 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4948 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4949 gimplify_and_add (t
, pre_p
);
4953 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4954 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4955 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4956 gimplify_and_add (t
, pre_p
);
4959 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4960 gimplify_and_add (t
, pre_p
);
4962 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4963 append_to_statement_list (t
, pre_p
);
4966 /* ... otherwise out of the overflow area. */
4968 /* Care for on-stack alignment if needed. */
4969 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4970 || integer_zerop (TYPE_SIZE (type
)))
4974 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4975 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4976 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4977 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4978 build_int_cst (TREE_TYPE (t
), -align
));
4980 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4982 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4983 gimplify_and_add (t2
, pre_p
);
4985 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4986 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4987 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4988 gimplify_and_add (t
, pre_p
);
4992 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4993 append_to_statement_list (t
, pre_p
);
4996 ptrtype
= build_pointer_type (type
);
4997 addr
= fold_convert (ptrtype
, addr
);
5000 addr
= build_va_arg_indirect_ref (addr
);
5001 return build_va_arg_indirect_ref (addr
);
5004 /* Return nonzero if OPNUM's MEM should be matched
5005 in movabs* patterns. */
5008 ix86_check_movabs (rtx insn
, int opnum
)
5012 set
= PATTERN (insn
);
5013 if (GET_CODE (set
) == PARALLEL
)
5014 set
= XVECEXP (set
, 0, 0);
5015 gcc_assert (GET_CODE (set
) == SET
);
5016 mem
= XEXP (set
, opnum
);
5017 while (GET_CODE (mem
) == SUBREG
)
5018 mem
= SUBREG_REG (mem
);
5019 gcc_assert (MEM_P (mem
));
5020 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5023 /* Initialize the table of extra 80387 mathematical constants. */
5026 init_ext_80387_constants (void)
5028 static const char * cst
[5] =
5030 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5031 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5032 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5033 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5034 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5038 for (i
= 0; i
< 5; i
++)
5040 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5041 /* Ensure each constant is rounded to XFmode precision. */
5042 real_convert (&ext_80387_constants_table
[i
],
5043 XFmode
, &ext_80387_constants_table
[i
]);
5046 ext_80387_constants_init
= 1;
5049 /* Return true if the constant is something that can be loaded with
5050 a special instruction. */
5053 standard_80387_constant_p (rtx x
)
5057 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
5060 if (x
== CONST0_RTX (GET_MODE (x
)))
5062 if (x
== CONST1_RTX (GET_MODE (x
)))
5065 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5067 /* For XFmode constants, try to find a special 80387 instruction when
5068 optimizing for size or on those CPUs that benefit from them. */
5069 if (GET_MODE (x
) == XFmode
5070 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5074 if (! ext_80387_constants_init
)
5075 init_ext_80387_constants ();
5077 for (i
= 0; i
< 5; i
++)
5078 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5082 /* Load of the constant -0.0 or -1.0 will be split as
5083 fldz;fchs or fld1;fchs sequence. */
5084 if (real_isnegzero (&r
))
5086 if (real_identical (&r
, &dconstm1
))
5092 /* Return the opcode of the special instruction to be used to load
5096 standard_80387_constant_opcode (rtx x
)
5098 switch (standard_80387_constant_p (x
))
5122 /* Return the CONST_DOUBLE representing the 80387 constant that is
5123 loaded by the specified special instruction. The argument IDX
5124 matches the return value from standard_80387_constant_p. */
5127 standard_80387_constant_rtx (int idx
)
5131 if (! ext_80387_constants_init
)
5132 init_ext_80387_constants ();
5148 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5152 /* Return 1 if mode is a valid mode for sse. */
5154 standard_sse_mode_p (enum machine_mode mode
)
5171 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5174 standard_sse_constant_p (rtx x
)
5176 enum machine_mode mode
= GET_MODE (x
);
5178 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5180 if (vector_all_ones_operand (x
, mode
)
5181 && standard_sse_mode_p (mode
))
5182 return TARGET_SSE2
? 2 : -1;
5187 /* Return the opcode of the special instruction to be used to load
5191 standard_sse_constant_opcode (rtx insn
, rtx x
)
5193 switch (standard_sse_constant_p (x
))
5196 if (get_attr_mode (insn
) == MODE_V4SF
)
5197 return "xorps\t%0, %0";
5198 else if (get_attr_mode (insn
) == MODE_V2DF
)
5199 return "xorpd\t%0, %0";
5201 return "pxor\t%0, %0";
5203 return "pcmpeqd\t%0, %0";
5208 /* Returns 1 if OP contains a symbol reference */
5211 symbolic_reference_mentioned_p (rtx op
)
5216 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5219 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5220 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5226 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5227 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5231 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5238 /* Return 1 if it is appropriate to emit `ret' instructions in the
5239 body of a function. Do this only if the epilogue is simple, needing a
5240 couple of insns. Prior to reloading, we can't tell how many registers
5241 must be saved, so return 0 then. Return 0 if there is no frame
5242 marker to de-allocate. */
5245 ix86_can_use_return_insn_p (void)
5247 struct ix86_frame frame
;
5249 if (! reload_completed
|| frame_pointer_needed
)
5252 /* Don't allow more than 32 pop, since that's all we can do
5253 with one instruction. */
5254 if (current_function_pops_args
5255 && current_function_args_size
>= 32768)
5258 ix86_compute_frame_layout (&frame
);
5259 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5262 /* Value should be nonzero if functions must have frame pointers.
5263 Zero means the frame pointer need not be set up (and parms may
5264 be accessed via the stack pointer) in functions that seem suitable. */
5267 ix86_frame_pointer_required (void)
5269 /* If we accessed previous frames, then the generated code expects
5270 to be able to access the saved ebp value in our frame. */
5271 if (cfun
->machine
->accesses_prev_frame
)
5274 /* Several x86 os'es need a frame pointer for other reasons,
5275 usually pertaining to setjmp. */
5276 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5279 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5280 the frame pointer by default. Turn it back on now if we've not
5281 got a leaf function. */
5282 if (TARGET_OMIT_LEAF_FRAME_POINTER
5283 && (!current_function_is_leaf
5284 || ix86_current_function_calls_tls_descriptor
))
5287 if (current_function_profile
)
5293 /* Record that the current function accesses previous call frames. */
5296 ix86_setup_frame_addresses (void)
5298 cfun
->machine
->accesses_prev_frame
= 1;
5301 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5302 # define USE_HIDDEN_LINKONCE 1
5304 # define USE_HIDDEN_LINKONCE 0
5307 static int pic_labels_used
;
5309 /* Fills in the label name that should be used for a pc thunk for
5310 the given register. */
5313 get_pc_thunk_name (char name
[32], unsigned int regno
)
5315 gcc_assert (!TARGET_64BIT
);
5317 if (USE_HIDDEN_LINKONCE
)
5318 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5320 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5324 /* This function generates code for -fpic that loads %ebx with
5325 the return address of the caller and then returns. */
5328 ix86_file_end (void)
5333 for (regno
= 0; regno
< 8; ++regno
)
5337 if (! ((pic_labels_used
>> regno
) & 1))
5340 get_pc_thunk_name (name
, regno
);
5345 switch_to_section (darwin_sections
[text_coal_section
]);
5346 fputs ("\t.weak_definition\t", asm_out_file
);
5347 assemble_name (asm_out_file
, name
);
5348 fputs ("\n\t.private_extern\t", asm_out_file
);
5349 assemble_name (asm_out_file
, name
);
5350 fputs ("\n", asm_out_file
);
5351 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5355 if (USE_HIDDEN_LINKONCE
)
5359 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5361 TREE_PUBLIC (decl
) = 1;
5362 TREE_STATIC (decl
) = 1;
5363 DECL_ONE_ONLY (decl
) = 1;
5365 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5366 switch_to_section (get_named_section (decl
, NULL
, 0));
5368 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5369 fputs ("\t.hidden\t", asm_out_file
);
5370 assemble_name (asm_out_file
, name
);
5371 fputc ('\n', asm_out_file
);
5372 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5376 switch_to_section (text_section
);
5377 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5380 xops
[0] = gen_rtx_REG (SImode
, regno
);
5381 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5382 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5383 output_asm_insn ("ret", xops
);
5386 if (NEED_INDICATE_EXEC_STACK
)
5387 file_end_indicate_exec_stack ();
5390 /* Emit code for the SET_GOT patterns. */
5393 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5399 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5401 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5402 xops
[2] = gen_rtx_MEM (Pmode
,
5403 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5404 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5406 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5407 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5408 an unadorned address. */
5409 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5410 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5411 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5415 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5417 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5419 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5422 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5424 output_asm_insn ("call\t%a2", xops
);
5427 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5428 is what will be referenced by the Mach-O PIC subsystem. */
5430 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5433 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5434 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5437 output_asm_insn ("pop{l}\t%0", xops
);
5442 get_pc_thunk_name (name
, REGNO (dest
));
5443 pic_labels_used
|= 1 << REGNO (dest
);
5445 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5446 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5447 output_asm_insn ("call\t%X2", xops
);
5448 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5449 is what will be referenced by the Mach-O PIC subsystem. */
5452 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5454 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5455 CODE_LABEL_NUMBER (label
));
5462 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5463 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5465 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5470 /* Generate an "push" pattern for input ARG. */
5475 return gen_rtx_SET (VOIDmode
,
5477 gen_rtx_PRE_DEC (Pmode
,
5478 stack_pointer_rtx
)),
5482 /* Return >= 0 if there is an unused call-clobbered register available
5483 for the entire function. */
5486 ix86_select_alt_pic_regnum (void)
5488 if (current_function_is_leaf
&& !current_function_profile
5489 && !ix86_current_function_calls_tls_descriptor
)
5492 for (i
= 2; i
>= 0; --i
)
5493 if (!regs_ever_live
[i
])
5497 return INVALID_REGNUM
;
5500 /* Return 1 if we need to save REGNO. */
5502 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5504 if (pic_offset_table_rtx
5505 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5506 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5507 || current_function_profile
5508 || current_function_calls_eh_return
5509 || current_function_uses_const_pool
))
5511 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5516 if (current_function_calls_eh_return
&& maybe_eh_return
)
5521 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5522 if (test
== INVALID_REGNUM
)
5529 if (cfun
->machine
->force_align_arg_pointer
5530 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5533 return (regs_ever_live
[regno
]
5534 && !call_used_regs
[regno
]
5535 && !fixed_regs
[regno
]
5536 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5539 /* Return number of registers to be saved on the stack. */
5542 ix86_nsaved_regs (void)
5547 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5548 if (ix86_save_reg (regno
, true))
5553 /* Return the offset between two registers, one to be eliminated, and the other
5554 its replacement, at the start of a routine. */
5557 ix86_initial_elimination_offset (int from
, int to
)
5559 struct ix86_frame frame
;
5560 ix86_compute_frame_layout (&frame
);
5562 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5563 return frame
.hard_frame_pointer_offset
;
5564 else if (from
== FRAME_POINTER_REGNUM
5565 && to
== HARD_FRAME_POINTER_REGNUM
)
5566 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5569 gcc_assert (to
== STACK_POINTER_REGNUM
);
5571 if (from
== ARG_POINTER_REGNUM
)
5572 return frame
.stack_pointer_offset
;
5574 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5575 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5579 /* Fill structure ix86_frame about frame of currently computed function. */
5582 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5584 HOST_WIDE_INT total_size
;
5585 unsigned int stack_alignment_needed
;
5586 HOST_WIDE_INT offset
;
5587 unsigned int preferred_alignment
;
5588 HOST_WIDE_INT size
= get_frame_size ();
5590 frame
->nregs
= ix86_nsaved_regs ();
5593 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5594 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5596 /* During reload iteration the amount of registers saved can change.
5597 Recompute the value as needed. Do not recompute when amount of registers
5598 didn't change as reload does multiple calls to the function and does not
5599 expect the decision to change within single iteration. */
5601 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5603 int count
= frame
->nregs
;
5605 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5606 /* The fast prologue uses move instead of push to save registers. This
5607 is significantly longer, but also executes faster as modern hardware
5608 can execute the moves in parallel, but can't do that for push/pop.
5610 Be careful about choosing what prologue to emit: When function takes
5611 many instructions to execute we may use slow version as well as in
5612 case function is known to be outside hot spot (this is known with
5613 feedback only). Weight the size of function by number of registers
5614 to save as it is cheap to use one or two push instructions but very
5615 slow to use many of them. */
5617 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5618 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5619 || (flag_branch_probabilities
5620 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5621 cfun
->machine
->use_fast_prologue_epilogue
= false;
5623 cfun
->machine
->use_fast_prologue_epilogue
5624 = !expensive_function_p (count
);
5626 if (TARGET_PROLOGUE_USING_MOVE
5627 && cfun
->machine
->use_fast_prologue_epilogue
)
5628 frame
->save_regs_using_mov
= true;
5630 frame
->save_regs_using_mov
= false;
5633 /* Skip return address and saved base pointer. */
5634 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5636 frame
->hard_frame_pointer_offset
= offset
;
5638 /* Do some sanity checking of stack_alignment_needed and
5639 preferred_alignment, since i386 port is the only using those features
5640 that may break easily. */
5642 gcc_assert (!size
|| stack_alignment_needed
);
5643 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5644 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5645 gcc_assert (stack_alignment_needed
5646 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5648 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5649 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5651 /* Register save area */
5652 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5655 if (ix86_save_varrargs_registers
)
5657 offset
+= X86_64_VARARGS_SIZE
;
5658 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5661 frame
->va_arg_size
= 0;
5663 /* Align start of frame for local function. */
5664 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5665 & -stack_alignment_needed
) - offset
;
5667 offset
+= frame
->padding1
;
5669 /* Frame pointer points here. */
5670 frame
->frame_pointer_offset
= offset
;
5674 /* Add outgoing arguments area. Can be skipped if we eliminated
5675 all the function calls as dead code.
5676 Skipping is however impossible when function calls alloca. Alloca
5677 expander assumes that last current_function_outgoing_args_size
5678 of stack frame are unused. */
5679 if (ACCUMULATE_OUTGOING_ARGS
5680 && (!current_function_is_leaf
|| current_function_calls_alloca
5681 || ix86_current_function_calls_tls_descriptor
))
5683 offset
+= current_function_outgoing_args_size
;
5684 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5687 frame
->outgoing_arguments_size
= 0;
5689 /* Align stack boundary. Only needed if we're calling another function
5691 if (!current_function_is_leaf
|| current_function_calls_alloca
5692 || ix86_current_function_calls_tls_descriptor
)
5693 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5694 & -preferred_alignment
) - offset
;
5696 frame
->padding2
= 0;
5698 offset
+= frame
->padding2
;
5700 /* We've reached end of stack frame. */
5701 frame
->stack_pointer_offset
= offset
;
5703 /* Size prologue needs to allocate. */
5704 frame
->to_allocate
=
5705 (size
+ frame
->padding1
+ frame
->padding2
5706 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5708 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5709 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5710 frame
->save_regs_using_mov
= false;
5712 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5713 && current_function_is_leaf
5714 && !ix86_current_function_calls_tls_descriptor
)
5716 frame
->red_zone_size
= frame
->to_allocate
;
5717 if (frame
->save_regs_using_mov
)
5718 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5719 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5720 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5723 frame
->red_zone_size
= 0;
5724 frame
->to_allocate
-= frame
->red_zone_size
;
5725 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5727 fprintf (stderr
, "\n");
5728 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5729 fprintf (stderr
, "size: %ld\n", (long)size
);
5730 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5731 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5732 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5733 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5734 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5735 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5736 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5737 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5738 (long)frame
->hard_frame_pointer_offset
);
5739 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5740 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5741 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5742 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5746 /* Emit code to save registers in the prologue. */
5749 ix86_emit_save_regs (void)
5754 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5755 if (ix86_save_reg (regno
, true))
5757 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5758 RTX_FRAME_RELATED_P (insn
) = 1;
5762 /* Emit code to save registers using MOV insns. First register
5763 is restored from POINTER + OFFSET. */
5765 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5770 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5771 if (ix86_save_reg (regno
, true))
5773 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5775 gen_rtx_REG (Pmode
, regno
));
5776 RTX_FRAME_RELATED_P (insn
) = 1;
5777 offset
+= UNITS_PER_WORD
;
5781 /* Expand prologue or epilogue stack adjustment.
5782 The pattern exist to put a dependency on all ebp-based memory accesses.
5783 STYLE should be negative if instructions should be marked as frame related,
5784 zero if %r11 register is live and cannot be freely used and positive
5788 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5793 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5794 else if (x86_64_immediate_operand (offset
, DImode
))
5795 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5799 /* r11 is used by indirect sibcall return as well, set before the
5800 epilogue and used after the epilogue. ATM indirect sibcall
5801 shouldn't be used together with huge frame sizes in one
5802 function because of the frame_size check in sibcall.c. */
5804 r11
= gen_rtx_REG (DImode
, R11_REG
);
5805 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5807 RTX_FRAME_RELATED_P (insn
) = 1;
5808 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5812 RTX_FRAME_RELATED_P (insn
) = 1;
5815 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5818 ix86_internal_arg_pointer (void)
5820 bool has_force_align_arg_pointer
=
5821 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5822 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5823 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5824 && DECL_NAME (current_function_decl
)
5825 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5826 && DECL_FILE_SCOPE_P (current_function_decl
))
5827 || ix86_force_align_arg_pointer
5828 || has_force_align_arg_pointer
)
5830 /* Nested functions can't realign the stack due to a register
5832 if (DECL_CONTEXT (current_function_decl
)
5833 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5835 if (ix86_force_align_arg_pointer
)
5836 warning (0, "-mstackrealign ignored for nested functions");
5837 if (has_force_align_arg_pointer
)
5838 error ("%s not supported for nested functions",
5839 ix86_force_align_arg_pointer_string
);
5840 return virtual_incoming_args_rtx
;
5842 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5843 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5846 return virtual_incoming_args_rtx
;
5849 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5850 This is called from dwarf2out.c to emit call frame instructions
5851 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5853 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5855 rtx unspec
= SET_SRC (pattern
);
5856 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5860 case UNSPEC_REG_SAVE
:
5861 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5862 SET_DEST (pattern
));
5864 case UNSPEC_DEF_CFA
:
5865 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5866 INTVAL (XVECEXP (unspec
, 0, 0)));
5873 /* Expand the prologue into a bunch of separate insns. */
5876 ix86_expand_prologue (void)
5880 struct ix86_frame frame
;
5881 HOST_WIDE_INT allocate
;
5883 ix86_compute_frame_layout (&frame
);
5885 if (cfun
->machine
->force_align_arg_pointer
)
5889 /* Grab the argument pointer. */
5890 x
= plus_constant (stack_pointer_rtx
, 4);
5891 y
= cfun
->machine
->force_align_arg_pointer
;
5892 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5893 RTX_FRAME_RELATED_P (insn
) = 1;
5895 /* The unwind info consists of two parts: install the fafp as the cfa,
5896 and record the fafp as the "save register" of the stack pointer.
5897 The later is there in order that the unwinder can see where it
5898 should restore the stack pointer across the and insn. */
5899 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5900 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5901 RTX_FRAME_RELATED_P (x
) = 1;
5902 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5904 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5905 RTX_FRAME_RELATED_P (y
) = 1;
5906 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5907 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5908 REG_NOTES (insn
) = x
;
5910 /* Align the stack. */
5911 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5914 /* And here we cheat like madmen with the unwind info. We force the
5915 cfa register back to sp+4, which is exactly what it was at the
5916 start of the function. Re-pushing the return address results in
5917 the return at the same spot relative to the cfa, and thus is
5918 correct wrt the unwind info. */
5919 x
= cfun
->machine
->force_align_arg_pointer
;
5920 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5921 insn
= emit_insn (gen_push (x
));
5922 RTX_FRAME_RELATED_P (insn
) = 1;
5925 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5926 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5927 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5928 REG_NOTES (insn
) = x
;
5931 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5932 slower on all targets. Also sdb doesn't like it. */
5934 if (frame_pointer_needed
)
5936 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5937 RTX_FRAME_RELATED_P (insn
) = 1;
5939 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5940 RTX_FRAME_RELATED_P (insn
) = 1;
5943 allocate
= frame
.to_allocate
;
5945 if (!frame
.save_regs_using_mov
)
5946 ix86_emit_save_regs ();
5948 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5950 /* When using red zone we may start register saving before allocating
5951 the stack frame saving one cycle of the prologue. */
5952 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5953 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5954 : stack_pointer_rtx
,
5955 -frame
.nregs
* UNITS_PER_WORD
);
5959 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5960 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5961 GEN_INT (-allocate
), -1);
5964 /* Only valid for Win32. */
5965 rtx eax
= gen_rtx_REG (SImode
, 0);
5966 bool eax_live
= ix86_eax_live_at_start_p ();
5969 gcc_assert (!TARGET_64BIT
);
5973 emit_insn (gen_push (eax
));
5977 emit_move_insn (eax
, GEN_INT (allocate
));
5979 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5980 RTX_FRAME_RELATED_P (insn
) = 1;
5981 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5982 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5983 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5984 t
, REG_NOTES (insn
));
5988 if (frame_pointer_needed
)
5989 t
= plus_constant (hard_frame_pointer_rtx
,
5992 - frame
.nregs
* UNITS_PER_WORD
);
5994 t
= plus_constant (stack_pointer_rtx
, allocate
);
5995 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5999 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6001 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6002 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6004 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6005 -frame
.nregs
* UNITS_PER_WORD
);
6008 pic_reg_used
= false;
6009 if (pic_offset_table_rtx
6010 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
6011 || current_function_profile
))
6013 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6015 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6016 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
6018 pic_reg_used
= true;
6025 if (ix86_cmodel
== CM_LARGE_PIC
)
6027 rtx tmp_reg
= gen_rtx_REG (DImode
,
6028 FIRST_REX_INT_REG
+ 3 /* R11 */);
6029 rtx label
= gen_label_rtx ();
6031 LABEL_PRESERVE_P (label
) = 1;
6032 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6033 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6034 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6035 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6036 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6037 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6038 pic_offset_table_rtx
, tmp_reg
));
6041 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6044 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6046 /* Even with accurate pre-reload life analysis, we can wind up
6047 deleting all references to the pic register after reload.
6048 Consider if cross-jumping unifies two sides of a branch
6049 controlled by a comparison vs the only read from a global.
6050 In which case, allow the set_got to be deleted, though we're
6051 too late to do anything about the ebx save in the prologue. */
6052 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6055 /* Prevent function calls from be scheduled before the call to mcount.
6056 In the pic_reg_used case, make sure that the got load isn't deleted. */
6057 if (current_function_profile
)
6058 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6061 /* Emit code to restore saved registers using MOV insns. First register
6062 is restored from POINTER + OFFSET. */
6064 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6065 int maybe_eh_return
)
6068 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6070 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6071 if (ix86_save_reg (regno
, maybe_eh_return
))
6073 /* Ensure that adjust_address won't be forced to produce pointer
6074 out of range allowed by x86-64 instruction set. */
6075 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6079 r11
= gen_rtx_REG (DImode
, R11_REG
);
6080 emit_move_insn (r11
, GEN_INT (offset
));
6081 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6082 base_address
= gen_rtx_MEM (Pmode
, r11
);
6085 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6086 adjust_address (base_address
, Pmode
, offset
));
6087 offset
+= UNITS_PER_WORD
;
6091 /* Restore function stack, frame, and registers. */
6094 ix86_expand_epilogue (int style
)
6097 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6098 struct ix86_frame frame
;
6099 HOST_WIDE_INT offset
;
6101 ix86_compute_frame_layout (&frame
);
6103 /* Calculate start of saved registers relative to ebp. Special care
6104 must be taken for the normal return case of a function using
6105 eh_return: the eax and edx registers are marked as saved, but not
6106 restored along this path. */
6107 offset
= frame
.nregs
;
6108 if (current_function_calls_eh_return
&& style
!= 2)
6110 offset
*= -UNITS_PER_WORD
;
6112 /* If we're only restoring one register and sp is not valid then
6113 using a move instruction to restore the register since it's
6114 less work than reloading sp and popping the register.
6116 The default code result in stack adjustment using add/lea instruction,
6117 while this code results in LEAVE instruction (or discrete equivalent),
6118 so it is profitable in some other cases as well. Especially when there
6119 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6120 and there is exactly one register to pop. This heuristic may need some
6121 tuning in future. */
6122 if ((!sp_valid
&& frame
.nregs
<= 1)
6123 || (TARGET_EPILOGUE_USING_MOVE
6124 && cfun
->machine
->use_fast_prologue_epilogue
6125 && (frame
.nregs
> 1 || frame
.to_allocate
))
6126 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6127 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6128 && cfun
->machine
->use_fast_prologue_epilogue
6129 && frame
.nregs
== 1)
6130 || current_function_calls_eh_return
)
6132 /* Restore registers. We can use ebp or esp to address the memory
6133 locations. If both are available, default to ebp, since offsets
6134 are known to be small. Only exception is esp pointing directly to the
6135 end of block of saved registers, where we may simplify addressing
6138 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6139 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6140 frame
.to_allocate
, style
== 2);
6142 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6143 offset
, style
== 2);
6145 /* eh_return epilogues need %ecx added to the stack pointer. */
6148 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6150 if (frame_pointer_needed
)
6152 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6153 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6154 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6156 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6157 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6159 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6164 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6165 tmp
= plus_constant (tmp
, (frame
.to_allocate
6166 + frame
.nregs
* UNITS_PER_WORD
));
6167 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6170 else if (!frame_pointer_needed
)
6171 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6172 GEN_INT (frame
.to_allocate
6173 + frame
.nregs
* UNITS_PER_WORD
),
6175 /* If not an i386, mov & pop is faster than "leave". */
6176 else if (TARGET_USE_LEAVE
|| optimize_size
6177 || !cfun
->machine
->use_fast_prologue_epilogue
)
6178 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6181 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6182 hard_frame_pointer_rtx
,
6185 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6187 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6192 /* First step is to deallocate the stack frame so that we can
6193 pop the registers. */
6196 gcc_assert (frame_pointer_needed
);
6197 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6198 hard_frame_pointer_rtx
,
6199 GEN_INT (offset
), style
);
6201 else if (frame
.to_allocate
)
6202 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6203 GEN_INT (frame
.to_allocate
), style
);
6205 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6206 if (ix86_save_reg (regno
, false))
6209 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6211 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6213 if (frame_pointer_needed
)
6215 /* Leave results in shorter dependency chains on CPUs that are
6216 able to grok it fast. */
6217 if (TARGET_USE_LEAVE
)
6218 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6219 else if (TARGET_64BIT
)
6220 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6222 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6226 if (cfun
->machine
->force_align_arg_pointer
)
6228 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6229 cfun
->machine
->force_align_arg_pointer
,
6233 /* Sibcall epilogues don't want a return instruction. */
6237 if (current_function_pops_args
&& current_function_args_size
)
6239 rtx popc
= GEN_INT (current_function_pops_args
);
6241 /* i386 can only pop 64K bytes. If asked to pop more, pop
6242 return address, do explicit add, and jump indirectly to the
6245 if (current_function_pops_args
>= 65536)
6247 rtx ecx
= gen_rtx_REG (SImode
, 2);
6249 /* There is no "pascal" calling convention in 64bit ABI. */
6250 gcc_assert (!TARGET_64BIT
);
6252 emit_insn (gen_popsi1 (ecx
));
6253 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6254 emit_jump_insn (gen_return_indirect_internal (ecx
));
6257 emit_jump_insn (gen_return_pop_internal (popc
));
6260 emit_jump_insn (gen_return_internal ());
6263 /* Reset from the function's potential modifications. */
6266 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6267 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6269 if (pic_offset_table_rtx
)
6270 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6272 /* Mach-O doesn't support labels at the end of objects, so if
6273 it looks like we might want one, insert a NOP. */
6275 rtx insn
= get_last_insn ();
6278 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6279 insn
= PREV_INSN (insn
);
6283 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6284 fputs ("\tnop\n", file
);
6290 /* Extract the parts of an RTL expression that is a valid memory address
6291 for an instruction. Return 0 if the structure of the address is
6292 grossly off. Return -1 if the address contains ASHIFT, so it is not
6293 strictly valid, but still used for computing length of lea instruction. */
6296 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6298 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6299 rtx base_reg
, index_reg
;
6300 HOST_WIDE_INT scale
= 1;
6301 rtx scale_rtx
= NULL_RTX
;
6303 enum ix86_address_seg seg
= SEG_DEFAULT
;
6305 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6307 else if (GET_CODE (addr
) == PLUS
)
6317 addends
[n
++] = XEXP (op
, 1);
6320 while (GET_CODE (op
) == PLUS
);
6325 for (i
= n
; i
>= 0; --i
)
6328 switch (GET_CODE (op
))
6333 index
= XEXP (op
, 0);
6334 scale_rtx
= XEXP (op
, 1);
6338 if (XINT (op
, 1) == UNSPEC_TP
6339 && TARGET_TLS_DIRECT_SEG_REFS
6340 && seg
== SEG_DEFAULT
)
6341 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6370 else if (GET_CODE (addr
) == MULT
)
6372 index
= XEXP (addr
, 0); /* index*scale */
6373 scale_rtx
= XEXP (addr
, 1);
6375 else if (GET_CODE (addr
) == ASHIFT
)
6379 /* We're called for lea too, which implements ashift on occasion. */
6380 index
= XEXP (addr
, 0);
6381 tmp
= XEXP (addr
, 1);
6382 if (!CONST_INT_P (tmp
))
6384 scale
= INTVAL (tmp
);
6385 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6391 disp
= addr
; /* displacement */
6393 /* Extract the integral value of scale. */
6396 if (!CONST_INT_P (scale_rtx
))
6398 scale
= INTVAL (scale_rtx
);
6401 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6402 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6404 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6405 if (base_reg
&& index_reg
&& scale
== 1
6406 && (index_reg
== arg_pointer_rtx
6407 || index_reg
== frame_pointer_rtx
6408 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6411 tmp
= base
, base
= index
, index
= tmp
;
6412 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6415 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6416 if ((base_reg
== hard_frame_pointer_rtx
6417 || base_reg
== frame_pointer_rtx
6418 || base_reg
== arg_pointer_rtx
) && !disp
)
6421 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6422 Avoid this by transforming to [%esi+0]. */
6423 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6424 && base_reg
&& !index_reg
&& !disp
6426 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6429 /* Special case: encode reg+reg instead of reg*2. */
6430 if (!base
&& index
&& scale
&& scale
== 2)
6431 base
= index
, base_reg
= index_reg
, scale
= 1;
6433 /* Special case: scaling cannot be encoded without base or displacement. */
6434 if (!base
&& !disp
&& index
&& scale
!= 1)
6446 /* Return cost of the memory address x.
6447 For i386, it is better to use a complex address than let gcc copy
6448 the address into a reg and make a new pseudo. But not if the address
6449 requires to two regs - that would mean more pseudos with longer
6452 ix86_address_cost (rtx x
)
6454 struct ix86_address parts
;
6456 int ok
= ix86_decompose_address (x
, &parts
);
6460 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6461 parts
.base
= SUBREG_REG (parts
.base
);
6462 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6463 parts
.index
= SUBREG_REG (parts
.index
);
6465 /* More complex memory references are better. */
6466 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6468 if (parts
.seg
!= SEG_DEFAULT
)
6471 /* Attempt to minimize number of registers in the address. */
6473 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6475 && (!REG_P (parts
.index
)
6476 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6480 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6482 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6483 && parts
.base
!= parts
.index
)
6486 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6487 since it's predecode logic can't detect the length of instructions
6488 and it degenerates to vector decoded. Increase cost of such
6489 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6490 to split such addresses or even refuse such addresses at all.
6492 Following addressing modes are affected:
6497 The first and last case may be avoidable by explicitly coding the zero in
6498 memory address, but I don't have AMD-K6 machine handy to check this
6502 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6503 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6504 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6510 /* If X is a machine specific address (i.e. a symbol or label being
6511 referenced as a displacement from the GOT implemented using an
6512 UNSPEC), then return the base term. Otherwise return X. */
6515 ix86_find_base_term (rtx x
)
6521 if (GET_CODE (x
) != CONST
)
6524 if (GET_CODE (term
) == PLUS
6525 && (CONST_INT_P (XEXP (term
, 1))
6526 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6527 term
= XEXP (term
, 0);
6528 if (GET_CODE (term
) != UNSPEC
6529 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6532 term
= XVECEXP (term
, 0, 0);
6534 if (GET_CODE (term
) != SYMBOL_REF
6535 && GET_CODE (term
) != LABEL_REF
)
6541 term
= ix86_delegitimize_address (x
);
6543 if (GET_CODE (term
) != SYMBOL_REF
6544 && GET_CODE (term
) != LABEL_REF
)
6550 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6551 this is used for to form addresses to local data when -fPIC is in
6555 darwin_local_data_pic (rtx disp
)
6557 if (GET_CODE (disp
) == MINUS
)
6559 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6560 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6561 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6563 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6564 if (! strcmp (sym_name
, "<pic base>"))
6572 /* Determine if a given RTX is a valid constant. We already know this
6573 satisfies CONSTANT_P. */
6576 legitimate_constant_p (rtx x
)
6578 switch (GET_CODE (x
))
6583 if (GET_CODE (x
) == PLUS
)
6585 if (!CONST_INT_P (XEXP (x
, 1)))
6590 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6593 /* Only some unspecs are valid as "constants". */
6594 if (GET_CODE (x
) == UNSPEC
)
6595 switch (XINT (x
, 1))
6600 return TARGET_64BIT
;
6603 x
= XVECEXP (x
, 0, 0);
6604 return (GET_CODE (x
) == SYMBOL_REF
6605 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6607 x
= XVECEXP (x
, 0, 0);
6608 return (GET_CODE (x
) == SYMBOL_REF
6609 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6614 /* We must have drilled down to a symbol. */
6615 if (GET_CODE (x
) == LABEL_REF
)
6617 if (GET_CODE (x
) != SYMBOL_REF
)
6622 /* TLS symbols are never valid. */
6623 if (SYMBOL_REF_TLS_MODEL (x
))
6628 if (GET_MODE (x
) == TImode
6629 && x
!= CONST0_RTX (TImode
)
6635 if (x
== CONST0_RTX (GET_MODE (x
)))
6643 /* Otherwise we handle everything else in the move patterns. */
6647 /* Determine if it's legal to put X into the constant pool. This
6648 is not possible for the address of thread-local symbols, which
6649 is checked above. */
6652 ix86_cannot_force_const_mem (rtx x
)
6654 /* We can always put integral constants and vectors in memory. */
6655 switch (GET_CODE (x
))
6665 return !legitimate_constant_p (x
);
6668 /* Determine if a given RTX is a valid constant address. */
6671 constant_address_p (rtx x
)
6673 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6676 /* Nonzero if the constant value X is a legitimate general operand
6677 when generating PIC code. It is given that flag_pic is on and
6678 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6681 legitimate_pic_operand_p (rtx x
)
6685 switch (GET_CODE (x
))
6688 inner
= XEXP (x
, 0);
6689 if (GET_CODE (inner
) == PLUS
6690 && CONST_INT_P (XEXP (inner
, 1)))
6691 inner
= XEXP (inner
, 0);
6693 /* Only some unspecs are valid as "constants". */
6694 if (GET_CODE (inner
) == UNSPEC
)
6695 switch (XINT (inner
, 1))
6700 return TARGET_64BIT
;
6702 x
= XVECEXP (inner
, 0, 0);
6703 return (GET_CODE (x
) == SYMBOL_REF
6704 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6712 return legitimate_pic_address_disp_p (x
);
6719 /* Determine if a given CONST RTX is a valid memory displacement
6723 legitimate_pic_address_disp_p (rtx disp
)
6727 /* In 64bit mode we can allow direct addresses of symbols and labels
6728 when they are not dynamic symbols. */
6731 rtx op0
= disp
, op1
;
6733 switch (GET_CODE (disp
))
6739 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6741 op0
= XEXP (XEXP (disp
, 0), 0);
6742 op1
= XEXP (XEXP (disp
, 0), 1);
6743 if (!CONST_INT_P (op1
)
6744 || INTVAL (op1
) >= 16*1024*1024
6745 || INTVAL (op1
) < -16*1024*1024)
6747 if (GET_CODE (op0
) == LABEL_REF
)
6749 if (GET_CODE (op0
) != SYMBOL_REF
)
6754 /* TLS references should always be enclosed in UNSPEC. */
6755 if (SYMBOL_REF_TLS_MODEL (op0
))
6757 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6758 && ix86_cmodel
!= CM_LARGE_PIC
)
6766 if (GET_CODE (disp
) != CONST
)
6768 disp
= XEXP (disp
, 0);
6772 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6773 of GOT tables. We should not need these anyway. */
6774 if (GET_CODE (disp
) != UNSPEC
6775 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6776 && XINT (disp
, 1) != UNSPEC_GOTOFF
6777 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6780 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6781 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6787 if (GET_CODE (disp
) == PLUS
)
6789 if (!CONST_INT_P (XEXP (disp
, 1)))
6791 disp
= XEXP (disp
, 0);
6795 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6798 if (GET_CODE (disp
) != UNSPEC
)
6801 switch (XINT (disp
, 1))
6806 /* We need to check for both symbols and labels because VxWorks loads
6807 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6809 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6810 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6812 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6813 While ABI specify also 32bit relocation but we don't produce it in
6814 small PIC model at all. */
6815 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6816 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6818 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6820 case UNSPEC_GOTTPOFF
:
6821 case UNSPEC_GOTNTPOFF
:
6822 case UNSPEC_INDNTPOFF
:
6825 disp
= XVECEXP (disp
, 0, 0);
6826 return (GET_CODE (disp
) == SYMBOL_REF
6827 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6829 disp
= XVECEXP (disp
, 0, 0);
6830 return (GET_CODE (disp
) == SYMBOL_REF
6831 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6833 disp
= XVECEXP (disp
, 0, 0);
6834 return (GET_CODE (disp
) == SYMBOL_REF
6835 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6841 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6842 memory address for an instruction. The MODE argument is the machine mode
6843 for the MEM expression that wants to use this address.
6845 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6846 convert common non-canonical forms to canonical form so that they will
6850 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6852 struct ix86_address parts
;
6853 rtx base
, index
, disp
;
6854 HOST_WIDE_INT scale
;
6855 const char *reason
= NULL
;
6856 rtx reason_rtx
= NULL_RTX
;
6858 if (TARGET_DEBUG_ADDR
)
6861 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6862 GET_MODE_NAME (mode
), strict
);
6866 if (ix86_decompose_address (addr
, &parts
) <= 0)
6868 reason
= "decomposition failed";
6873 index
= parts
.index
;
6875 scale
= parts
.scale
;
6877 /* Validate base register.
6879 Don't allow SUBREG's that span more than a word here. It can lead to spill
6880 failures when the base is one word out of a two word structure, which is
6881 represented internally as a DImode int. */
6890 else if (GET_CODE (base
) == SUBREG
6891 && REG_P (SUBREG_REG (base
))
6892 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6894 reg
= SUBREG_REG (base
);
6897 reason
= "base is not a register";
6901 if (GET_MODE (base
) != Pmode
)
6903 reason
= "base is not in Pmode";
6907 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6908 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6910 reason
= "base is not valid";
6915 /* Validate index register.
6917 Don't allow SUBREG's that span more than a word here -- same as above. */
6926 else if (GET_CODE (index
) == SUBREG
6927 && REG_P (SUBREG_REG (index
))
6928 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6930 reg
= SUBREG_REG (index
);
6933 reason
= "index is not a register";
6937 if (GET_MODE (index
) != Pmode
)
6939 reason
= "index is not in Pmode";
6943 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6944 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6946 reason
= "index is not valid";
6951 /* Validate scale factor. */
6954 reason_rtx
= GEN_INT (scale
);
6957 reason
= "scale without index";
6961 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6963 reason
= "scale is not a valid multiplier";
6968 /* Validate displacement. */
6973 if (GET_CODE (disp
) == CONST
6974 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6975 switch (XINT (XEXP (disp
, 0), 1))
6977 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6978 used. While ABI specify also 32bit relocations, we don't produce
6979 them at all and use IP relative instead. */
6982 gcc_assert (flag_pic
);
6984 goto is_legitimate_pic
;
6985 reason
= "64bit address unspec";
6988 case UNSPEC_GOTPCREL
:
6989 gcc_assert (flag_pic
);
6990 goto is_legitimate_pic
;
6992 case UNSPEC_GOTTPOFF
:
6993 case UNSPEC_GOTNTPOFF
:
6994 case UNSPEC_INDNTPOFF
:
7000 reason
= "invalid address unspec";
7004 else if (SYMBOLIC_CONST (disp
)
7008 && MACHOPIC_INDIRECT
7009 && !machopic_operand_p (disp
)
7015 if (TARGET_64BIT
&& (index
|| base
))
7017 /* foo@dtpoff(%rX) is ok. */
7018 if (GET_CODE (disp
) != CONST
7019 || GET_CODE (XEXP (disp
, 0)) != PLUS
7020 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7021 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7022 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7023 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7025 reason
= "non-constant pic memory reference";
7029 else if (! legitimate_pic_address_disp_p (disp
))
7031 reason
= "displacement is an invalid pic construct";
7035 /* This code used to verify that a symbolic pic displacement
7036 includes the pic_offset_table_rtx register.
7038 While this is good idea, unfortunately these constructs may
7039 be created by "adds using lea" optimization for incorrect
7048 This code is nonsensical, but results in addressing
7049 GOT table with pic_offset_table_rtx base. We can't
7050 just refuse it easily, since it gets matched by
7051 "addsi3" pattern, that later gets split to lea in the
7052 case output register differs from input. While this
7053 can be handled by separate addsi pattern for this case
7054 that never results in lea, this seems to be easier and
7055 correct fix for crash to disable this test. */
7057 else if (GET_CODE (disp
) != LABEL_REF
7058 && !CONST_INT_P (disp
)
7059 && (GET_CODE (disp
) != CONST
7060 || !legitimate_constant_p (disp
))
7061 && (GET_CODE (disp
) != SYMBOL_REF
7062 || !legitimate_constant_p (disp
)))
7064 reason
= "displacement is not constant";
7067 else if (TARGET_64BIT
7068 && !x86_64_immediate_operand (disp
, VOIDmode
))
7070 reason
= "displacement is out of range";
7075 /* Everything looks valid. */
7076 if (TARGET_DEBUG_ADDR
)
7077 fprintf (stderr
, "Success.\n");
7081 if (TARGET_DEBUG_ADDR
)
7083 fprintf (stderr
, "Error: %s\n", reason
);
7084 debug_rtx (reason_rtx
);
7089 /* Return a unique alias set for the GOT. */
7091 static HOST_WIDE_INT
7092 ix86_GOT_alias_set (void)
7094 static HOST_WIDE_INT set
= -1;
7096 set
= new_alias_set ();
7100 /* Return a legitimate reference for ORIG (an address) using the
7101 register REG. If REG is 0, a new pseudo is generated.
7103 There are two types of references that must be handled:
7105 1. Global data references must load the address from the GOT, via
7106 the PIC reg. An insn is emitted to do this load, and the reg is
7109 2. Static data references, constant pool addresses, and code labels
7110 compute the address as an offset from the GOT, whose base is in
7111 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7112 differentiate them from global data objects. The returned
7113 address is the PIC reg + an unspec constant.
7115 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7116 reg also appears in the address. */
7119 legitimize_pic_address (rtx orig
, rtx reg
)
7126 if (TARGET_MACHO
&& !TARGET_64BIT
)
7129 reg
= gen_reg_rtx (Pmode
);
7130 /* Use the generic Mach-O PIC machinery. */
7131 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7135 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7137 else if (TARGET_64BIT
7138 && ix86_cmodel
!= CM_SMALL_PIC
7139 && gotoff_operand (addr
, Pmode
))
7142 /* This symbol may be referenced via a displacement from the PIC
7143 base address (@GOTOFF). */
7145 if (reload_in_progress
)
7146 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7147 if (GET_CODE (addr
) == CONST
)
7148 addr
= XEXP (addr
, 0);
7149 if (GET_CODE (addr
) == PLUS
)
7151 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7152 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7155 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7156 new = gen_rtx_CONST (Pmode
, new);
7158 tmpreg
= gen_reg_rtx (Pmode
);
7161 emit_move_insn (tmpreg
, new);
7165 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7166 tmpreg
, 1, OPTAB_DIRECT
);
7169 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7171 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7173 /* This symbol may be referenced via a displacement from the PIC
7174 base address (@GOTOFF). */
7176 if (reload_in_progress
)
7177 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7178 if (GET_CODE (addr
) == CONST
)
7179 addr
= XEXP (addr
, 0);
7180 if (GET_CODE (addr
) == PLUS
)
7182 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7183 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7186 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7187 new = gen_rtx_CONST (Pmode
, new);
7188 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7192 emit_move_insn (reg
, new);
7196 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7197 /* We can't use @GOTOFF for text labels on VxWorks;
7198 see gotoff_operand. */
7199 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7201 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7203 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7204 new = gen_rtx_CONST (Pmode
, new);
7205 new = gen_const_mem (Pmode
, new);
7206 set_mem_alias_set (new, ix86_GOT_alias_set ());
7209 reg
= gen_reg_rtx (Pmode
);
7210 /* Use directly gen_movsi, otherwise the address is loaded
7211 into register for CSE. We don't want to CSE this addresses,
7212 instead we CSE addresses from the GOT table, so skip this. */
7213 emit_insn (gen_movsi (reg
, new));
7218 /* This symbol must be referenced via a load from the
7219 Global Offset Table (@GOT). */
7221 if (reload_in_progress
)
7222 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7223 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7224 new = gen_rtx_CONST (Pmode
, new);
7226 new = force_reg (Pmode
, new);
7227 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7228 new = gen_const_mem (Pmode
, new);
7229 set_mem_alias_set (new, ix86_GOT_alias_set ());
7232 reg
= gen_reg_rtx (Pmode
);
7233 emit_move_insn (reg
, new);
7239 if (CONST_INT_P (addr
)
7240 && !x86_64_immediate_operand (addr
, VOIDmode
))
7244 emit_move_insn (reg
, addr
);
7248 new = force_reg (Pmode
, addr
);
7250 else if (GET_CODE (addr
) == CONST
)
7252 addr
= XEXP (addr
, 0);
7254 /* We must match stuff we generate before. Assume the only
7255 unspecs that can get here are ours. Not that we could do
7256 anything with them anyway.... */
7257 if (GET_CODE (addr
) == UNSPEC
7258 || (GET_CODE (addr
) == PLUS
7259 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7261 gcc_assert (GET_CODE (addr
) == PLUS
);
7263 if (GET_CODE (addr
) == PLUS
)
7265 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7267 /* Check first to see if this is a constant offset from a @GOTOFF
7268 symbol reference. */
7269 if (gotoff_operand (op0
, Pmode
)
7270 && CONST_INT_P (op1
))
7274 if (reload_in_progress
)
7275 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7276 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7278 new = gen_rtx_PLUS (Pmode
, new, op1
);
7279 new = gen_rtx_CONST (Pmode
, new);
7280 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7284 emit_move_insn (reg
, new);
7290 if (INTVAL (op1
) < -16*1024*1024
7291 || INTVAL (op1
) >= 16*1024*1024)
7293 if (!x86_64_immediate_operand (op1
, Pmode
))
7294 op1
= force_reg (Pmode
, op1
);
7295 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7301 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7302 new = legitimize_pic_address (XEXP (addr
, 1),
7303 base
== reg
? NULL_RTX
: reg
);
7305 if (CONST_INT_P (new))
7306 new = plus_constant (base
, INTVAL (new));
7309 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7311 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7312 new = XEXP (new, 1);
7314 new = gen_rtx_PLUS (Pmode
, base
, new);
7322 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7325 get_thread_pointer (int to_reg
)
7329 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7333 reg
= gen_reg_rtx (Pmode
);
7334 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7335 insn
= emit_insn (insn
);
7340 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7341 false if we expect this to be used for a memory address and true if
7342 we expect to load the address into a register. */
7345 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7347 rtx dest
, base
, off
, pic
, tp
;
7352 case TLS_MODEL_GLOBAL_DYNAMIC
:
7353 dest
= gen_reg_rtx (Pmode
);
7354 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7356 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7358 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7361 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7362 insns
= get_insns ();
7365 emit_libcall_block (insns
, dest
, rax
, x
);
7367 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7368 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7370 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7372 if (TARGET_GNU2_TLS
)
7374 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7376 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7380 case TLS_MODEL_LOCAL_DYNAMIC
:
7381 base
= gen_reg_rtx (Pmode
);
7382 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7384 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7386 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7389 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7390 insns
= get_insns ();
7393 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7394 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7395 emit_libcall_block (insns
, base
, rax
, note
);
7397 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7398 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7400 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7402 if (TARGET_GNU2_TLS
)
7404 rtx x
= ix86_tls_module_base ();
7406 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7407 gen_rtx_MINUS (Pmode
, x
, tp
));
7410 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7411 off
= gen_rtx_CONST (Pmode
, off
);
7413 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7415 if (TARGET_GNU2_TLS
)
7417 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7419 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7424 case TLS_MODEL_INITIAL_EXEC
:
7428 type
= UNSPEC_GOTNTPOFF
;
7432 if (reload_in_progress
)
7433 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7434 pic
= pic_offset_table_rtx
;
7435 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7437 else if (!TARGET_ANY_GNU_TLS
)
7439 pic
= gen_reg_rtx (Pmode
);
7440 emit_insn (gen_set_got (pic
));
7441 type
= UNSPEC_GOTTPOFF
;
7446 type
= UNSPEC_INDNTPOFF
;
7449 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7450 off
= gen_rtx_CONST (Pmode
, off
);
7452 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7453 off
= gen_const_mem (Pmode
, off
);
7454 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7456 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7458 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7459 off
= force_reg (Pmode
, off
);
7460 return gen_rtx_PLUS (Pmode
, base
, off
);
7464 base
= get_thread_pointer (true);
7465 dest
= gen_reg_rtx (Pmode
);
7466 emit_insn (gen_subsi3 (dest
, base
, off
));
7470 case TLS_MODEL_LOCAL_EXEC
:
7471 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7472 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7473 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7474 off
= gen_rtx_CONST (Pmode
, off
);
7476 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7478 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7479 return gen_rtx_PLUS (Pmode
, base
, off
);
7483 base
= get_thread_pointer (true);
7484 dest
= gen_reg_rtx (Pmode
);
7485 emit_insn (gen_subsi3 (dest
, base
, off
));
7496 /* Try machine-dependent ways of modifying an illegitimate address
7497 to be legitimate. If we find one, return the new, valid address.
7498 This macro is used in only one place: `memory_address' in explow.c.
7500 OLDX is the address as it was before break_out_memory_refs was called.
7501 In some cases it is useful to look at this to decide what needs to be done.
7503 MODE and WIN are passed so that this macro can use
7504 GO_IF_LEGITIMATE_ADDRESS.
7506 It is always safe for this macro to do nothing. It exists to recognize
7507 opportunities to optimize the output.
7509 For the 80386, we handle X+REG by loading X into a register R and
7510 using R+REG. R will go in a general reg and indexing will be used.
7511 However, if REG is a broken-out memory address or multiplication,
7512 nothing needs to be done because REG can certainly go in a general reg.
7514 When -fpic is used, special handling is needed for symbolic references.
7515 See comments by legitimize_pic_address in i386.c for details. */
7518 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7523 if (TARGET_DEBUG_ADDR
)
7525 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7526 GET_MODE_NAME (mode
));
7530 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7532 return legitimize_tls_address (x
, log
, false);
7533 if (GET_CODE (x
) == CONST
7534 && GET_CODE (XEXP (x
, 0)) == PLUS
7535 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7536 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7538 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7539 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7542 if (flag_pic
&& SYMBOLIC_CONST (x
))
7543 return legitimize_pic_address (x
, 0);
7545 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7546 if (GET_CODE (x
) == ASHIFT
7547 && CONST_INT_P (XEXP (x
, 1))
7548 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7551 log
= INTVAL (XEXP (x
, 1));
7552 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7553 GEN_INT (1 << log
));
7556 if (GET_CODE (x
) == PLUS
)
7558 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7560 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7561 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7562 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7565 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7566 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7567 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7568 GEN_INT (1 << log
));
7571 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7572 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7573 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7576 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7577 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7578 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7579 GEN_INT (1 << log
));
7582 /* Put multiply first if it isn't already. */
7583 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7585 rtx tmp
= XEXP (x
, 0);
7586 XEXP (x
, 0) = XEXP (x
, 1);
7591 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7592 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7593 created by virtual register instantiation, register elimination, and
7594 similar optimizations. */
7595 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7598 x
= gen_rtx_PLUS (Pmode
,
7599 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7600 XEXP (XEXP (x
, 1), 0)),
7601 XEXP (XEXP (x
, 1), 1));
7605 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7606 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7607 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7608 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7609 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7610 && CONSTANT_P (XEXP (x
, 1)))
7613 rtx other
= NULL_RTX
;
7615 if (CONST_INT_P (XEXP (x
, 1)))
7617 constant
= XEXP (x
, 1);
7618 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7620 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7622 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7623 other
= XEXP (x
, 1);
7631 x
= gen_rtx_PLUS (Pmode
,
7632 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7633 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7634 plus_constant (other
, INTVAL (constant
)));
7638 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7641 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7644 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7647 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7650 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7654 && REG_P (XEXP (x
, 1))
7655 && REG_P (XEXP (x
, 0)))
7658 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7661 x
= legitimize_pic_address (x
, 0);
7664 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7667 if (REG_P (XEXP (x
, 0)))
7669 rtx temp
= gen_reg_rtx (Pmode
);
7670 rtx val
= force_operand (XEXP (x
, 1), temp
);
7672 emit_move_insn (temp
, val
);
7678 else if (REG_P (XEXP (x
, 1)))
7680 rtx temp
= gen_reg_rtx (Pmode
);
7681 rtx val
= force_operand (XEXP (x
, 0), temp
);
7683 emit_move_insn (temp
, val
);
7693 /* Print an integer constant expression in assembler syntax. Addition
7694 and subtraction are the only arithmetic that may appear in these
7695 expressions. FILE is the stdio stream to write to, X is the rtx, and
7696 CODE is the operand print code from the output string. */
7699 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7703 switch (GET_CODE (x
))
7706 gcc_assert (flag_pic
);
7711 if (! TARGET_MACHO
|| TARGET_64BIT
)
7712 output_addr_const (file
, x
);
7715 const char *name
= XSTR (x
, 0);
7717 /* Mark the decl as referenced so that cgraph will output the function. */
7718 if (SYMBOL_REF_DECL (x
))
7719 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7722 if (MACHOPIC_INDIRECT
7723 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7724 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7726 assemble_name (file
, name
);
7728 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7729 fputs ("@PLT", file
);
7736 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7737 assemble_name (asm_out_file
, buf
);
7741 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7745 /* This used to output parentheses around the expression,
7746 but that does not work on the 386 (either ATT or BSD assembler). */
7747 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7751 if (GET_MODE (x
) == VOIDmode
)
7753 /* We can use %d if the number is <32 bits and positive. */
7754 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7755 fprintf (file
, "0x%lx%08lx",
7756 (unsigned long) CONST_DOUBLE_HIGH (x
),
7757 (unsigned long) CONST_DOUBLE_LOW (x
));
7759 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7762 /* We can't handle floating point constants;
7763 PRINT_OPERAND must handle them. */
7764 output_operand_lossage ("floating constant misused");
7768 /* Some assemblers need integer constants to appear first. */
7769 if (CONST_INT_P (XEXP (x
, 0)))
7771 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7773 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7777 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7778 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7780 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7786 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7787 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7789 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7791 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7795 gcc_assert (XVECLEN (x
, 0) == 1);
7796 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7797 switch (XINT (x
, 1))
7800 fputs ("@GOT", file
);
7803 fputs ("@GOTOFF", file
);
7806 fputs ("@PLTOFF", file
);
7808 case UNSPEC_GOTPCREL
:
7809 fputs ("@GOTPCREL(%rip)", file
);
7811 case UNSPEC_GOTTPOFF
:
7812 /* FIXME: This might be @TPOFF in Sun ld too. */
7813 fputs ("@GOTTPOFF", file
);
7816 fputs ("@TPOFF", file
);
7820 fputs ("@TPOFF", file
);
7822 fputs ("@NTPOFF", file
);
7825 fputs ("@DTPOFF", file
);
7827 case UNSPEC_GOTNTPOFF
:
7829 fputs ("@GOTTPOFF(%rip)", file
);
7831 fputs ("@GOTNTPOFF", file
);
7833 case UNSPEC_INDNTPOFF
:
7834 fputs ("@INDNTPOFF", file
);
7837 output_operand_lossage ("invalid UNSPEC as operand");
7843 output_operand_lossage ("invalid expression as operand");
7847 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7848 We need to emit DTP-relative relocations. */
7851 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7853 fputs (ASM_LONG
, file
);
7854 output_addr_const (file
, x
);
7855 fputs ("@DTPOFF", file
);
7861 fputs (", 0", file
);
7868 /* In the name of slightly smaller debug output, and to cater to
7869 general assembler lossage, recognize PIC+GOTOFF and turn it back
7870 into a direct symbol reference.
7872 On Darwin, this is necessary to avoid a crash, because Darwin
7873 has a different PIC label for each routine but the DWARF debugging
7874 information is not associated with any particular routine, so it's
7875 necessary to remove references to the PIC label from RTL stored by
7876 the DWARF output code. */
7879 ix86_delegitimize_address (rtx orig_x
)
7882 /* reg_addend is NULL or a multiple of some register. */
7883 rtx reg_addend
= NULL_RTX
;
7884 /* const_addend is NULL or a const_int. */
7885 rtx const_addend
= NULL_RTX
;
7886 /* This is the result, or NULL. */
7887 rtx result
= NULL_RTX
;
7894 if (GET_CODE (x
) != CONST
7895 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7896 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7899 return XVECEXP (XEXP (x
, 0), 0, 0);
7902 if (GET_CODE (x
) != PLUS
7903 || GET_CODE (XEXP (x
, 1)) != CONST
)
7906 if (REG_P (XEXP (x
, 0))
7907 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7908 /* %ebx + GOT/GOTOFF */
7910 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7912 /* %ebx + %reg * scale + GOT/GOTOFF */
7913 reg_addend
= XEXP (x
, 0);
7914 if (REG_P (XEXP (reg_addend
, 0))
7915 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7916 reg_addend
= XEXP (reg_addend
, 1);
7917 else if (REG_P (XEXP (reg_addend
, 1))
7918 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7919 reg_addend
= XEXP (reg_addend
, 0);
7922 if (!REG_P (reg_addend
)
7923 && GET_CODE (reg_addend
) != MULT
7924 && GET_CODE (reg_addend
) != ASHIFT
)
7930 x
= XEXP (XEXP (x
, 1), 0);
7931 if (GET_CODE (x
) == PLUS
7932 && CONST_INT_P (XEXP (x
, 1)))
7934 const_addend
= XEXP (x
, 1);
7938 if (GET_CODE (x
) == UNSPEC
7939 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7940 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7941 result
= XVECEXP (x
, 0, 0);
7943 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7945 result
= XEXP (x
, 0);
7951 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7953 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7958 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7963 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7965 enum rtx_code second_code
, bypass_code
;
7966 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7967 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7968 code
= ix86_fp_compare_code_to_integer (code
);
7972 code
= reverse_condition (code
);
7983 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7987 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7988 Those same assemblers have the same but opposite lossage on cmov. */
7989 gcc_assert (mode
== CCmode
);
7990 suffix
= fp
? "nbe" : "a";
8010 gcc_assert (mode
== CCmode
);
8032 gcc_assert (mode
== CCmode
);
8033 suffix
= fp
? "nb" : "ae";
8036 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8040 gcc_assert (mode
== CCmode
);
8044 suffix
= fp
? "u" : "p";
8047 suffix
= fp
? "nu" : "np";
8052 fputs (suffix
, file
);
8055 /* Print the name of register X to FILE based on its machine mode and number.
8056 If CODE is 'w', pretend the mode is HImode.
8057 If CODE is 'b', pretend the mode is QImode.
8058 If CODE is 'k', pretend the mode is SImode.
8059 If CODE is 'q', pretend the mode is DImode.
8060 If CODE is 'h', pretend the reg is the 'high' byte register.
8061 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8064 print_reg (rtx x
, int code
, FILE *file
)
8066 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8067 && REGNO (x
) != FRAME_POINTER_REGNUM
8068 && REGNO (x
) != FLAGS_REG
8069 && REGNO (x
) != FPSR_REG
8070 && REGNO (x
) != FPCR_REG
);
8072 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8075 if (code
== 'w' || MMX_REG_P (x
))
8077 else if (code
== 'b')
8079 else if (code
== 'k')
8081 else if (code
== 'q')
8083 else if (code
== 'y')
8085 else if (code
== 'h')
8088 code
= GET_MODE_SIZE (GET_MODE (x
));
8090 /* Irritatingly, AMD extended registers use different naming convention
8091 from the normal registers. */
8092 if (REX_INT_REG_P (x
))
8094 gcc_assert (TARGET_64BIT
);
8098 error ("extended registers have no high halves");
8101 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8104 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8107 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8110 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8113 error ("unsupported operand size for extended register");
8121 if (STACK_TOP_P (x
))
8123 fputs ("st(0)", file
);
8130 if (! ANY_FP_REG_P (x
))
8131 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8136 fputs (hi_reg_name
[REGNO (x
)], file
);
8139 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8141 fputs (qi_reg_name
[REGNO (x
)], file
);
8144 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8146 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8153 /* Locate some local-dynamic symbol still in use by this function
8154 so that we can print its name in some tls_local_dynamic_base
8158 get_some_local_dynamic_name (void)
8162 if (cfun
->machine
->some_ld_name
)
8163 return cfun
->machine
->some_ld_name
;
8165 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8167 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8168 return cfun
->machine
->some_ld_name
;
8174 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8178 if (GET_CODE (x
) == SYMBOL_REF
8179 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8181 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8189 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8190 C -- print opcode suffix for set/cmov insn.
8191 c -- like C, but print reversed condition
8192 F,f -- likewise, but for floating-point.
8193 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8195 R -- print the prefix for register names.
8196 z -- print the opcode suffix for the size of the current operand.
8197 * -- print a star (in certain assembler syntax)
8198 A -- print an absolute memory reference.
8199 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8200 s -- print a shift double count, followed by the assemblers argument
8202 b -- print the QImode name of the register for the indicated operand.
8203 %b0 would print %al if operands[0] is reg 0.
8204 w -- likewise, print the HImode name of the register.
8205 k -- likewise, print the SImode name of the register.
8206 q -- likewise, print the DImode name of the register.
8207 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8208 y -- print "st(0)" instead of "st" as a register.
8209 D -- print condition for SSE cmp instruction.
8210 P -- if PIC, print an @PLT suffix.
8211 X -- don't print any sort of PIC '@' suffix for a symbol.
8212 & -- print some in-use local-dynamic symbol name.
8213 H -- print a memory address offset by 8; used for sse high-parts
8217 print_operand (FILE *file
, rtx x
, int code
)
8224 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8229 assemble_name (file
, get_some_local_dynamic_name ());
8233 switch (ASSEMBLER_DIALECT
)
8240 /* Intel syntax. For absolute addresses, registers should not
8241 be surrounded by braces. */
8245 PRINT_OPERAND (file
, x
, 0);
8255 PRINT_OPERAND (file
, x
, 0);
8260 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8265 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8270 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8275 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8280 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8285 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8290 /* 387 opcodes don't get size suffixes if the operands are
8292 if (STACK_REG_P (x
))
8295 /* Likewise if using Intel opcodes. */
8296 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8299 /* This is the size of op from size of operand. */
8300 switch (GET_MODE_SIZE (GET_MODE (x
)))
8307 #ifdef HAVE_GAS_FILDS_FISTS
8313 if (GET_MODE (x
) == SFmode
)
8328 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8330 #ifdef GAS_MNEMONICS
8356 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8358 PRINT_OPERAND (file
, x
, 0);
8364 /* Little bit of braindamage here. The SSE compare instructions
8365 does use completely different names for the comparisons that the
8366 fp conditional moves. */
8367 switch (GET_CODE (x
))
8382 fputs ("unord", file
);
8386 fputs ("neq", file
);
8390 fputs ("nlt", file
);
8394 fputs ("nle", file
);
8397 fputs ("ord", file
);
8404 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8405 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8407 switch (GET_MODE (x
))
8409 case HImode
: putc ('w', file
); break;
8411 case SFmode
: putc ('l', file
); break;
8413 case DFmode
: putc ('q', file
); break;
8414 default: gcc_unreachable ();
8421 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8424 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8425 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8428 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8431 /* Like above, but reverse condition */
8433 /* Check to see if argument to %c is really a constant
8434 and not a condition code which needs to be reversed. */
8435 if (!COMPARISON_P (x
))
8437 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8440 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8443 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8444 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8447 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8451 /* It doesn't actually matter what mode we use here, as we're
8452 only going to use this for printing. */
8453 x
= adjust_address_nv (x
, DImode
, 8);
8460 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8463 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8466 int pred_val
= INTVAL (XEXP (x
, 0));
8468 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8469 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8471 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8472 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8474 /* Emit hints only in the case default branch prediction
8475 heuristics would fail. */
8476 if (taken
!= cputaken
)
8478 /* We use 3e (DS) prefix for taken branches and
8479 2e (CS) prefix for not taken branches. */
8481 fputs ("ds ; ", file
);
8483 fputs ("cs ; ", file
);
8490 output_operand_lossage ("invalid operand code '%c'", code
);
8495 print_reg (x
, code
, file
);
8499 /* No `byte ptr' prefix for call instructions. */
8500 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8503 switch (GET_MODE_SIZE (GET_MODE (x
)))
8505 case 1: size
= "BYTE"; break;
8506 case 2: size
= "WORD"; break;
8507 case 4: size
= "DWORD"; break;
8508 case 8: size
= "QWORD"; break;
8509 case 12: size
= "XWORD"; break;
8510 case 16: size
= "XMMWORD"; break;
8515 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8518 else if (code
== 'w')
8520 else if (code
== 'k')
8524 fputs (" PTR ", file
);
8528 /* Avoid (%rip) for call operands. */
8529 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8530 && !CONST_INT_P (x
))
8531 output_addr_const (file
, x
);
8532 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8533 output_operand_lossage ("invalid constraints for operand");
8538 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8543 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8544 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8546 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8548 fprintf (file
, "0x%08lx", l
);
8551 /* These float cases don't actually occur as immediate operands. */
8552 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8556 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8557 fprintf (file
, "%s", dstr
);
8560 else if (GET_CODE (x
) == CONST_DOUBLE
8561 && GET_MODE (x
) == XFmode
)
8565 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8566 fprintf (file
, "%s", dstr
);
8571 /* We have patterns that allow zero sets of memory, for instance.
8572 In 64-bit mode, we should probably support all 8-byte vectors,
8573 since we can in fact encode that into an immediate. */
8574 if (GET_CODE (x
) == CONST_VECTOR
)
8576 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8582 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8584 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8587 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8588 || GET_CODE (x
) == LABEL_REF
)
8590 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8593 fputs ("OFFSET FLAT:", file
);
8596 if (CONST_INT_P (x
))
8597 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8599 output_pic_addr_const (file
, x
, code
);
8601 output_addr_const (file
, x
);
8605 /* Print a memory operand whose address is ADDR. */
8608 print_operand_address (FILE *file
, rtx addr
)
8610 struct ix86_address parts
;
8611 rtx base
, index
, disp
;
8613 int ok
= ix86_decompose_address (addr
, &parts
);
8618 index
= parts
.index
;
8620 scale
= parts
.scale
;
8628 if (USER_LABEL_PREFIX
[0] == 0)
8630 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8636 if (!base
&& !index
)
8638 /* Displacement only requires special attention. */
8640 if (CONST_INT_P (disp
))
8642 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8644 if (USER_LABEL_PREFIX
[0] == 0)
8646 fputs ("ds:", file
);
8648 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8651 output_pic_addr_const (file
, disp
, 0);
8653 output_addr_const (file
, disp
);
8655 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8658 if (GET_CODE (disp
) == CONST
8659 && GET_CODE (XEXP (disp
, 0)) == PLUS
8660 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8661 disp
= XEXP (XEXP (disp
, 0), 0);
8662 if (GET_CODE (disp
) == LABEL_REF
8663 || (GET_CODE (disp
) == SYMBOL_REF
8664 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8665 fputs ("(%rip)", file
);
8670 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8675 output_pic_addr_const (file
, disp
, 0);
8676 else if (GET_CODE (disp
) == LABEL_REF
)
8677 output_asm_label (disp
);
8679 output_addr_const (file
, disp
);
8684 print_reg (base
, 0, file
);
8688 print_reg (index
, 0, file
);
8690 fprintf (file
, ",%d", scale
);
8696 rtx offset
= NULL_RTX
;
8700 /* Pull out the offset of a symbol; print any symbol itself. */
8701 if (GET_CODE (disp
) == CONST
8702 && GET_CODE (XEXP (disp
, 0)) == PLUS
8703 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8705 offset
= XEXP (XEXP (disp
, 0), 1);
8706 disp
= gen_rtx_CONST (VOIDmode
,
8707 XEXP (XEXP (disp
, 0), 0));
8711 output_pic_addr_const (file
, disp
, 0);
8712 else if (GET_CODE (disp
) == LABEL_REF
)
8713 output_asm_label (disp
);
8714 else if (CONST_INT_P (disp
))
8717 output_addr_const (file
, disp
);
8723 print_reg (base
, 0, file
);
8726 if (INTVAL (offset
) >= 0)
8728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8732 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8739 print_reg (index
, 0, file
);
8741 fprintf (file
, "*%d", scale
);
8749 output_addr_const_extra (FILE *file
, rtx x
)
8753 if (GET_CODE (x
) != UNSPEC
)
8756 op
= XVECEXP (x
, 0, 0);
8757 switch (XINT (x
, 1))
8759 case UNSPEC_GOTTPOFF
:
8760 output_addr_const (file
, op
);
8761 /* FIXME: This might be @TPOFF in Sun ld. */
8762 fputs ("@GOTTPOFF", file
);
8765 output_addr_const (file
, op
);
8766 fputs ("@TPOFF", file
);
8769 output_addr_const (file
, op
);
8771 fputs ("@TPOFF", file
);
8773 fputs ("@NTPOFF", file
);
8776 output_addr_const (file
, op
);
8777 fputs ("@DTPOFF", file
);
8779 case UNSPEC_GOTNTPOFF
:
8780 output_addr_const (file
, op
);
8782 fputs ("@GOTTPOFF(%rip)", file
);
8784 fputs ("@GOTNTPOFF", file
);
8786 case UNSPEC_INDNTPOFF
:
8787 output_addr_const (file
, op
);
8788 fputs ("@INDNTPOFF", file
);
8798 /* Split one or more DImode RTL references into pairs of SImode
8799 references. The RTL can be REG, offsettable MEM, integer constant, or
8800 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8801 split and "num" is its length. lo_half and hi_half are output arrays
8802 that parallel "operands". */
8805 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8809 rtx op
= operands
[num
];
8811 /* simplify_subreg refuse to split volatile memory addresses,
8812 but we still have to handle it. */
8815 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8816 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8820 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8821 GET_MODE (op
) == VOIDmode
8822 ? DImode
: GET_MODE (op
), 0);
8823 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8824 GET_MODE (op
) == VOIDmode
8825 ? DImode
: GET_MODE (op
), 4);
8829 /* Split one or more TImode RTL references into pairs of DImode
8830 references. The RTL can be REG, offsettable MEM, integer constant, or
8831 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8832 split and "num" is its length. lo_half and hi_half are output arrays
8833 that parallel "operands". */
8836 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8840 rtx op
= operands
[num
];
8842 /* simplify_subreg refuse to split volatile memory addresses, but we
8843 still have to handle it. */
8846 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8847 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8851 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8852 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8857 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8858 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8859 is the expression of the binary operation. The output may either be
8860 emitted here, or returned to the caller, like all output_* functions.
8862 There is no guarantee that the operands are the same mode, as they
8863 might be within FLOAT or FLOAT_EXTEND expressions. */
8865 #ifndef SYSV386_COMPAT
8866 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8867 wants to fix the assemblers because that causes incompatibility
8868 with gcc. No-one wants to fix gcc because that causes
8869 incompatibility with assemblers... You can use the option of
8870 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8871 #define SYSV386_COMPAT 1
8875 output_387_binary_op (rtx insn
, rtx
*operands
)
8877 static char buf
[30];
8880 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8882 #ifdef ENABLE_CHECKING
8883 /* Even if we do not want to check the inputs, this documents input
8884 constraints. Which helps in understanding the following code. */
8885 if (STACK_REG_P (operands
[0])
8886 && ((REG_P (operands
[1])
8887 && REGNO (operands
[0]) == REGNO (operands
[1])
8888 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8889 || (REG_P (operands
[2])
8890 && REGNO (operands
[0]) == REGNO (operands
[2])
8891 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8892 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8895 gcc_assert (is_sse
);
8898 switch (GET_CODE (operands
[3]))
8901 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8902 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8910 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8911 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8919 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8920 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8928 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8929 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8943 if (GET_MODE (operands
[0]) == SFmode
)
8944 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8946 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8951 switch (GET_CODE (operands
[3]))
8955 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8957 rtx temp
= operands
[2];
8958 operands
[2] = operands
[1];
8962 /* know operands[0] == operands[1]. */
8964 if (MEM_P (operands
[2]))
8970 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8972 if (STACK_TOP_P (operands
[0]))
8973 /* How is it that we are storing to a dead operand[2]?
8974 Well, presumably operands[1] is dead too. We can't
8975 store the result to st(0) as st(0) gets popped on this
8976 instruction. Instead store to operands[2] (which I
8977 think has to be st(1)). st(1) will be popped later.
8978 gcc <= 2.8.1 didn't have this check and generated
8979 assembly code that the Unixware assembler rejected. */
8980 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8982 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8986 if (STACK_TOP_P (operands
[0]))
8987 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8989 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8994 if (MEM_P (operands
[1]))
9000 if (MEM_P (operands
[2]))
9006 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9009 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9010 derived assemblers, confusingly reverse the direction of
9011 the operation for fsub{r} and fdiv{r} when the
9012 destination register is not st(0). The Intel assembler
9013 doesn't have this brain damage. Read !SYSV386_COMPAT to
9014 figure out what the hardware really does. */
9015 if (STACK_TOP_P (operands
[0]))
9016 p
= "{p\t%0, %2|rp\t%2, %0}";
9018 p
= "{rp\t%2, %0|p\t%0, %2}";
9020 if (STACK_TOP_P (operands
[0]))
9021 /* As above for fmul/fadd, we can't store to st(0). */
9022 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9024 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9029 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9032 if (STACK_TOP_P (operands
[0]))
9033 p
= "{rp\t%0, %1|p\t%1, %0}";
9035 p
= "{p\t%1, %0|rp\t%0, %1}";
9037 if (STACK_TOP_P (operands
[0]))
9038 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9040 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9045 if (STACK_TOP_P (operands
[0]))
9047 if (STACK_TOP_P (operands
[1]))
9048 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9050 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9053 else if (STACK_TOP_P (operands
[1]))
9056 p
= "{\t%1, %0|r\t%0, %1}";
9058 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9064 p
= "{r\t%2, %0|\t%0, %2}";
9066 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9079 /* Return needed mode for entity in optimize_mode_switching pass. */
9082 ix86_mode_needed (int entity
, rtx insn
)
9084 enum attr_i387_cw mode
;
9086 /* The mode UNINITIALIZED is used to store control word after a
9087 function call or ASM pattern. The mode ANY specify that function
9088 has no requirements on the control word and make no changes in the
9089 bits we are interested in. */
9092 || (NONJUMP_INSN_P (insn
)
9093 && (asm_noperands (PATTERN (insn
)) >= 0
9094 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9095 return I387_CW_UNINITIALIZED
;
9097 if (recog_memoized (insn
) < 0)
9100 mode
= get_attr_i387_cw (insn
);
9105 if (mode
== I387_CW_TRUNC
)
9110 if (mode
== I387_CW_FLOOR
)
9115 if (mode
== I387_CW_CEIL
)
9120 if (mode
== I387_CW_MASK_PM
)
9131 /* Output code to initialize control word copies used by trunc?f?i and
9132 rounding patterns. CURRENT_MODE is set to current control word,
9133 while NEW_MODE is set to new control word. */
9136 emit_i387_cw_initialization (int mode
)
9138 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9143 rtx reg
= gen_reg_rtx (HImode
);
9145 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9146 emit_move_insn (reg
, copy_rtx (stored_mode
));
9148 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9153 /* round toward zero (truncate) */
9154 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9155 slot
= SLOT_CW_TRUNC
;
9159 /* round down toward -oo */
9160 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9161 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9162 slot
= SLOT_CW_FLOOR
;
9166 /* round up toward +oo */
9167 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9168 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9169 slot
= SLOT_CW_CEIL
;
9172 case I387_CW_MASK_PM
:
9173 /* mask precision exception for nearbyint() */
9174 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9175 slot
= SLOT_CW_MASK_PM
;
9187 /* round toward zero (truncate) */
9188 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9189 slot
= SLOT_CW_TRUNC
;
9193 /* round down toward -oo */
9194 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9195 slot
= SLOT_CW_FLOOR
;
9199 /* round up toward +oo */
9200 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9201 slot
= SLOT_CW_CEIL
;
9204 case I387_CW_MASK_PM
:
9205 /* mask precision exception for nearbyint() */
9206 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9207 slot
= SLOT_CW_MASK_PM
;
9215 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9217 new_mode
= assign_386_stack_local (HImode
, slot
);
9218 emit_move_insn (new_mode
, reg
);
9221 /* Output code for INSN to convert a float to a signed int. OPERANDS
9222 are the insn operands. The output may be [HSD]Imode and the input
9223 operand may be [SDX]Fmode. */
9226 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9228 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9229 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9230 int round_mode
= get_attr_i387_cw (insn
);
9232 /* Jump through a hoop or two for DImode, since the hardware has no
9233 non-popping instruction. We used to do this a different way, but
9234 that was somewhat fragile and broke with post-reload splitters. */
9235 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9236 output_asm_insn ("fld\t%y1", operands
);
9238 gcc_assert (STACK_TOP_P (operands
[1]));
9239 gcc_assert (MEM_P (operands
[0]));
9242 output_asm_insn ("fisttp%z0\t%0", operands
);
9245 if (round_mode
!= I387_CW_ANY
)
9246 output_asm_insn ("fldcw\t%3", operands
);
9247 if (stack_top_dies
|| dimode_p
)
9248 output_asm_insn ("fistp%z0\t%0", operands
);
9250 output_asm_insn ("fist%z0\t%0", operands
);
9251 if (round_mode
!= I387_CW_ANY
)
9252 output_asm_insn ("fldcw\t%2", operands
);
9258 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9259 have the values zero or one, indicates the ffreep insn's operand
9260 from the OPERANDS array. */
9263 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9265 if (TARGET_USE_FFREEP
)
9266 #if HAVE_AS_IX86_FFREEP
9267 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9270 static char retval
[] = ".word\t0xc_df";
9271 int regno
= REGNO (operands
[opno
]);
9273 gcc_assert (FP_REGNO_P (regno
));
9275 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9280 return opno
? "fstp\t%y1" : "fstp\t%y0";
9284 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9285 should be used. UNORDERED_P is true when fucom should be used. */
9288 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9291 rtx cmp_op0
, cmp_op1
;
9292 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9296 cmp_op0
= operands
[0];
9297 cmp_op1
= operands
[1];
9301 cmp_op0
= operands
[1];
9302 cmp_op1
= operands
[2];
9307 if (GET_MODE (operands
[0]) == SFmode
)
9309 return "ucomiss\t{%1, %0|%0, %1}";
9311 return "comiss\t{%1, %0|%0, %1}";
9314 return "ucomisd\t{%1, %0|%0, %1}";
9316 return "comisd\t{%1, %0|%0, %1}";
9319 gcc_assert (STACK_TOP_P (cmp_op0
));
9321 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9323 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9327 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9328 return output_387_ffreep (operands
, 1);
9331 return "ftst\n\tfnstsw\t%0";
9334 if (STACK_REG_P (cmp_op1
)
9336 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9337 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9339 /* If both the top of the 387 stack dies, and the other operand
9340 is also a stack register that dies, then this must be a
9341 `fcompp' float compare */
9345 /* There is no double popping fcomi variant. Fortunately,
9346 eflags is immune from the fstp's cc clobbering. */
9348 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9350 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9351 return output_387_ffreep (operands
, 0);
9356 return "fucompp\n\tfnstsw\t%0";
9358 return "fcompp\n\tfnstsw\t%0";
9363 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9365 static const char * const alt
[16] =
9367 "fcom%z2\t%y2\n\tfnstsw\t%0",
9368 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9369 "fucom%z2\t%y2\n\tfnstsw\t%0",
9370 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9372 "ficom%z2\t%y2\n\tfnstsw\t%0",
9373 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9377 "fcomi\t{%y1, %0|%0, %y1}",
9378 "fcomip\t{%y1, %0|%0, %y1}",
9379 "fucomi\t{%y1, %0|%0, %y1}",
9380 "fucomip\t{%y1, %0|%0, %y1}",
9391 mask
= eflags_p
<< 3;
9392 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9393 mask
|= unordered_p
<< 1;
9394 mask
|= stack_top_dies
;
9396 gcc_assert (mask
< 16);
9405 ix86_output_addr_vec_elt (FILE *file
, int value
)
9407 const char *directive
= ASM_LONG
;
9411 directive
= ASM_QUAD
;
9413 gcc_assert (!TARGET_64BIT
);
9416 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9420 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9422 const char *directive
= ASM_LONG
;
9425 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9426 directive
= ASM_QUAD
;
9428 gcc_assert (!TARGET_64BIT
);
9430 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9431 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9432 fprintf (file
, "%s%s%d-%s%d\n",
9433 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9434 else if (HAVE_AS_GOTOFF_IN_DATA
)
9435 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9437 else if (TARGET_MACHO
)
9439 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9440 machopic_output_function_base_name (file
);
9441 fprintf(file
, "\n");
9445 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9446 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9449 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9453 ix86_expand_clear (rtx dest
)
9457 /* We play register width games, which are only valid after reload. */
9458 gcc_assert (reload_completed
);
9460 /* Avoid HImode and its attendant prefix byte. */
9461 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9462 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9464 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9466 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9467 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9469 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9470 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9476 /* X is an unchanging MEM. If it is a constant pool reference, return
9477 the constant pool rtx, else NULL. */
9480 maybe_get_pool_constant (rtx x
)
9482 x
= ix86_delegitimize_address (XEXP (x
, 0));
9484 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9485 return get_pool_constant (x
);
9491 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9493 int strict
= (reload_in_progress
|| reload_completed
);
9495 enum tls_model model
;
9500 if (GET_CODE (op1
) == SYMBOL_REF
)
9502 model
= SYMBOL_REF_TLS_MODEL (op1
);
9505 op1
= legitimize_tls_address (op1
, model
, true);
9506 op1
= force_operand (op1
, op0
);
9511 else if (GET_CODE (op1
) == CONST
9512 && GET_CODE (XEXP (op1
, 0)) == PLUS
9513 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9515 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9518 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9519 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9520 op1
= force_operand (op1
, NULL
);
9521 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9522 op0
, 1, OPTAB_DIRECT
);
9528 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9530 if (TARGET_MACHO
&& !TARGET_64BIT
)
9535 rtx temp
= ((reload_in_progress
9536 || ((op0
&& REG_P (op0
))
9538 ? op0
: gen_reg_rtx (Pmode
));
9539 op1
= machopic_indirect_data_reference (op1
, temp
);
9540 op1
= machopic_legitimize_pic_address (op1
, mode
,
9541 temp
== op1
? 0 : temp
);
9543 else if (MACHOPIC_INDIRECT
)
9544 op1
= machopic_indirect_data_reference (op1
, 0);
9552 op1
= force_reg (Pmode
, op1
);
9553 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9555 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9556 op1
= legitimize_pic_address (op1
, reg
);
9565 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9566 || !push_operand (op0
, mode
))
9568 op1
= force_reg (mode
, op1
);
9570 if (push_operand (op0
, mode
)
9571 && ! general_no_elim_operand (op1
, mode
))
9572 op1
= copy_to_mode_reg (mode
, op1
);
9574 /* Force large constants in 64bit compilation into register
9575 to get them CSEed. */
9576 if (TARGET_64BIT
&& mode
== DImode
9577 && immediate_operand (op1
, mode
)
9578 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9579 && !register_operand (op0
, mode
)
9580 && optimize
&& !reload_completed
&& !reload_in_progress
)
9581 op1
= copy_to_mode_reg (mode
, op1
);
9583 if (FLOAT_MODE_P (mode
))
9585 /* If we are loading a floating point constant to a register,
9586 force the value to memory now, since we'll get better code
9587 out the back end. */
9591 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9593 op1
= validize_mem (force_const_mem (mode
, op1
));
9594 if (!register_operand (op0
, mode
))
9596 rtx temp
= gen_reg_rtx (mode
);
9597 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9598 emit_move_insn (op0
, temp
);
9605 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9609 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9611 rtx op0
= operands
[0], op1
= operands
[1];
9613 /* Force constants other than zero into memory. We do not know how
9614 the instructions used to build constants modify the upper 64 bits
9615 of the register, once we have that information we may be able
9616 to handle some of them more efficiently. */
9617 if ((reload_in_progress
| reload_completed
) == 0
9618 && register_operand (op0
, mode
)
9620 && standard_sse_constant_p (op1
) <= 0)
9621 op1
= validize_mem (force_const_mem (mode
, op1
));
9623 /* Make operand1 a register if it isn't already. */
9625 && !register_operand (op0
, mode
)
9626 && !register_operand (op1
, mode
))
9628 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9632 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9635 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9636 straight to ix86_expand_vector_move. */
9637 /* Code generation for scalar reg-reg moves of single and double precision data:
9638 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9642 if (x86_sse_partial_reg_dependency == true)
9647 Code generation for scalar loads of double precision data:
9648 if (x86_sse_split_regs == true)
9649 movlpd mem, reg (gas syntax)
9653 Code generation for unaligned packed loads of single precision data
9654 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9655 if (x86_sse_unaligned_move_optimal)
9658 if (x86_sse_partial_reg_dependency == true)
9670 Code generation for unaligned packed loads of double precision data
9671 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9672 if (x86_sse_unaligned_move_optimal)
9675 if (x86_sse_split_regs == true)
9688 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9697 /* If we're optimizing for size, movups is the smallest. */
9700 op0
= gen_lowpart (V4SFmode
, op0
);
9701 op1
= gen_lowpart (V4SFmode
, op1
);
9702 emit_insn (gen_sse_movups (op0
, op1
));
9706 /* ??? If we have typed data, then it would appear that using
9707 movdqu is the only way to get unaligned data loaded with
9709 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9711 op0
= gen_lowpart (V16QImode
, op0
);
9712 op1
= gen_lowpart (V16QImode
, op1
);
9713 emit_insn (gen_sse2_movdqu (op0
, op1
));
9717 if (TARGET_SSE2
&& mode
== V2DFmode
)
9721 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9723 op0
= gen_lowpart (V2DFmode
, op0
);
9724 op1
= gen_lowpart (V2DFmode
, op1
);
9725 emit_insn (gen_sse2_movupd (op0
, op1
));
9729 /* When SSE registers are split into halves, we can avoid
9730 writing to the top half twice. */
9731 if (TARGET_SSE_SPLIT_REGS
)
9733 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9738 /* ??? Not sure about the best option for the Intel chips.
9739 The following would seem to satisfy; the register is
9740 entirely cleared, breaking the dependency chain. We
9741 then store to the upper half, with a dependency depth
9742 of one. A rumor has it that Intel recommends two movsd
9743 followed by an unpacklpd, but this is unconfirmed. And
9744 given that the dependency depth of the unpacklpd would
9745 still be one, I'm not sure why this would be better. */
9746 zero
= CONST0_RTX (V2DFmode
);
9749 m
= adjust_address (op1
, DFmode
, 0);
9750 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9751 m
= adjust_address (op1
, DFmode
, 8);
9752 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9756 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9758 op0
= gen_lowpart (V4SFmode
, op0
);
9759 op1
= gen_lowpart (V4SFmode
, op1
);
9760 emit_insn (gen_sse_movups (op0
, op1
));
9764 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9765 emit_move_insn (op0
, CONST0_RTX (mode
));
9767 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9769 if (mode
!= V4SFmode
)
9770 op0
= gen_lowpart (V4SFmode
, op0
);
9771 m
= adjust_address (op1
, V2SFmode
, 0);
9772 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9773 m
= adjust_address (op1
, V2SFmode
, 8);
9774 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9777 else if (MEM_P (op0
))
9779 /* If we're optimizing for size, movups is the smallest. */
9782 op0
= gen_lowpart (V4SFmode
, op0
);
9783 op1
= gen_lowpart (V4SFmode
, op1
);
9784 emit_insn (gen_sse_movups (op0
, op1
));
9788 /* ??? Similar to above, only less clear because of quote
9789 typeless stores unquote. */
9790 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9791 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9793 op0
= gen_lowpart (V16QImode
, op0
);
9794 op1
= gen_lowpart (V16QImode
, op1
);
9795 emit_insn (gen_sse2_movdqu (op0
, op1
));
9799 if (TARGET_SSE2
&& mode
== V2DFmode
)
9801 m
= adjust_address (op0
, DFmode
, 0);
9802 emit_insn (gen_sse2_storelpd (m
, op1
));
9803 m
= adjust_address (op0
, DFmode
, 8);
9804 emit_insn (gen_sse2_storehpd (m
, op1
));
9808 if (mode
!= V4SFmode
)
9809 op1
= gen_lowpart (V4SFmode
, op1
);
9810 m
= adjust_address (op0
, V2SFmode
, 0);
9811 emit_insn (gen_sse_storelps (m
, op1
));
9812 m
= adjust_address (op0
, V2SFmode
, 8);
9813 emit_insn (gen_sse_storehps (m
, op1
));
9820 /* Expand a push in MODE. This is some mode for which we do not support
9821 proper push instructions, at least from the registers that we expect
9822 the value to live in. */
9825 ix86_expand_push (enum machine_mode mode
, rtx x
)
9829 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9830 GEN_INT (-GET_MODE_SIZE (mode
)),
9831 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9832 if (tmp
!= stack_pointer_rtx
)
9833 emit_move_insn (stack_pointer_rtx
, tmp
);
9835 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9836 emit_move_insn (tmp
, x
);
9839 /* Helper function of ix86_fixup_binary_operands to canonicalize
9840 operand order. Returns true if the operands should be swapped. */
9843 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9846 rtx dst
= operands
[0];
9847 rtx src1
= operands
[1];
9848 rtx src2
= operands
[2];
9850 /* If the operation is not commutative, we can't do anything. */
9851 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9854 /* Highest priority is that src1 should match dst. */
9855 if (rtx_equal_p (dst
, src1
))
9857 if (rtx_equal_p (dst
, src2
))
9860 /* Next highest priority is that immediate constants come second. */
9861 if (immediate_operand (src2
, mode
))
9863 if (immediate_operand (src1
, mode
))
9866 /* Lowest priority is that memory references should come second. */
9876 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9877 destination to use for the operation. If different from the true
9878 destination in operands[0], a copy operation will be required. */
9881 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9884 rtx dst
= operands
[0];
9885 rtx src1
= operands
[1];
9886 rtx src2
= operands
[2];
9888 /* Canonicalize operand order. */
9889 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9896 /* Both source operands cannot be in memory. */
9897 if (MEM_P (src1
) && MEM_P (src2
))
9899 /* Optimization: Only read from memory once. */
9900 if (rtx_equal_p (src1
, src2
))
9902 src2
= force_reg (mode
, src2
);
9906 src2
= force_reg (mode
, src2
);
9909 /* If the destination is memory, and we do not have matching source
9910 operands, do things in registers. */
9911 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9912 dst
= gen_reg_rtx (mode
);
9914 /* Source 1 cannot be a constant. */
9915 if (CONSTANT_P (src1
))
9916 src1
= force_reg (mode
, src1
);
9918 /* Source 1 cannot be a non-matching memory. */
9919 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9920 src1
= force_reg (mode
, src1
);
9927 /* Similarly, but assume that the destination has already been
9931 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9932 enum machine_mode mode
, rtx operands
[])
9934 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9935 gcc_assert (dst
== operands
[0]);
9938 /* Attempt to expand a binary operator. Make the expansion closer to the
9939 actual machine, then just general_operand, which will allow 3 separate
9940 memory references (one output, two input) in a single insn. */
9943 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9946 rtx src1
, src2
, dst
, op
, clob
;
9948 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9952 /* Emit the instruction. */
9954 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9955 if (reload_in_progress
)
9957 /* Reload doesn't know about the flags register, and doesn't know that
9958 it doesn't want to clobber it. We can only do this with PLUS. */
9959 gcc_assert (code
== PLUS
);
9964 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9965 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9968 /* Fix up the destination if needed. */
9969 if (dst
!= operands
[0])
9970 emit_move_insn (operands
[0], dst
);
9973 /* Return TRUE or FALSE depending on whether the binary operator meets the
9974 appropriate constraints. */
9977 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9980 rtx dst
= operands
[0];
9981 rtx src1
= operands
[1];
9982 rtx src2
= operands
[2];
9984 /* Both source operands cannot be in memory. */
9985 if (MEM_P (src1
) && MEM_P (src2
))
9988 /* Canonicalize operand order for commutative operators. */
9989 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9996 /* If the destination is memory, we must have a matching source operand. */
9997 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10000 /* Source 1 cannot be a constant. */
10001 if (CONSTANT_P (src1
))
10004 /* Source 1 cannot be a non-matching memory. */
10005 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10011 /* Attempt to expand a unary operator. Make the expansion closer to the
10012 actual machine, then just general_operand, which will allow 2 separate
10013 memory references (one output, one input) in a single insn. */
10016 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10019 int matching_memory
;
10020 rtx src
, dst
, op
, clob
;
10025 /* If the destination is memory, and we do not have matching source
10026 operands, do things in registers. */
10027 matching_memory
= 0;
10030 if (rtx_equal_p (dst
, src
))
10031 matching_memory
= 1;
10033 dst
= gen_reg_rtx (mode
);
10036 /* When source operand is memory, destination must match. */
10037 if (MEM_P (src
) && !matching_memory
)
10038 src
= force_reg (mode
, src
);
10040 /* Emit the instruction. */
10042 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10043 if (reload_in_progress
|| code
== NOT
)
10045 /* Reload doesn't know about the flags register, and doesn't know that
10046 it doesn't want to clobber it. */
10047 gcc_assert (code
== NOT
);
10052 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10053 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10056 /* Fix up the destination if needed. */
10057 if (dst
!= operands
[0])
10058 emit_move_insn (operands
[0], dst
);
10061 /* Return TRUE or FALSE depending on whether the unary operator meets the
10062 appropriate constraints. */
10065 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10066 enum machine_mode mode ATTRIBUTE_UNUSED
,
10067 rtx operands
[2] ATTRIBUTE_UNUSED
)
10069 /* If one of operands is memory, source and destination must match. */
10070 if ((MEM_P (operands
[0])
10071 || MEM_P (operands
[1]))
10072 && ! rtx_equal_p (operands
[0], operands
[1]))
10077 /* Post-reload splitter for converting an SF or DFmode value in an
10078 SSE register into an unsigned SImode. */
10081 ix86_split_convert_uns_si_sse (rtx operands
[])
10083 enum machine_mode vecmode
;
10084 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10086 large
= operands
[1];
10087 zero_or_two31
= operands
[2];
10088 input
= operands
[3];
10089 two31
= operands
[4];
10090 vecmode
= GET_MODE (large
);
10091 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10093 /* Load up the value into the low element. We must ensure that the other
10094 elements are valid floats -- zero is the easiest such value. */
10097 if (vecmode
== V4SFmode
)
10098 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10100 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10104 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10105 emit_move_insn (value
, CONST0_RTX (vecmode
));
10106 if (vecmode
== V4SFmode
)
10107 emit_insn (gen_sse_movss (value
, value
, input
));
10109 emit_insn (gen_sse2_movsd (value
, value
, input
));
10112 emit_move_insn (large
, two31
);
10113 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10115 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10116 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10118 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10119 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10121 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10122 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10124 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10125 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10127 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10128 if (vecmode
== V4SFmode
)
10129 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10131 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10134 emit_insn (gen_xorv4si3 (value
, value
, large
));
10137 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10138 Expects the 64-bit DImode to be supplied in a pair of integral
10139 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10140 -mfpmath=sse, !optimize_size only. */
10143 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10145 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10146 rtx int_xmm
, fp_xmm
;
10147 rtx biases
, exponents
;
10150 int_xmm
= gen_reg_rtx (V4SImode
);
10151 if (TARGET_INTER_UNIT_MOVES
)
10152 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10153 else if (TARGET_SSE_SPLIT_REGS
)
10155 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10156 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10160 x
= gen_reg_rtx (V2DImode
);
10161 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10162 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10165 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10166 gen_rtvec (4, GEN_INT (0x43300000UL
),
10167 GEN_INT (0x45300000UL
),
10168 const0_rtx
, const0_rtx
));
10169 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10171 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10172 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10174 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10175 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10176 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10177 (0x1.0p84 + double(fp_value_hi_xmm)).
10178 Note these exponents differ by 32. */
10180 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10182 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10183 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10184 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10185 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10186 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10187 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10188 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10189 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10190 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10192 /* Add the upper and lower DFmode values together. */
10194 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10197 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10198 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10199 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10202 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10205 /* Convert an unsigned SImode value into a DFmode. Only currently used
10206 for SSE, but applicable anywhere. */
10209 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10211 REAL_VALUE_TYPE TWO31r
;
10214 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10215 NULL
, 1, OPTAB_DIRECT
);
10217 fp
= gen_reg_rtx (DFmode
);
10218 emit_insn (gen_floatsidf2 (fp
, x
));
10220 real_ldexp (&TWO31r
, &dconst1
, 31);
10221 x
= const_double_from_real_value (TWO31r
, DFmode
);
10223 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10225 emit_move_insn (target
, x
);
10228 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10229 32-bit mode; otherwise we have a direct convert instruction. */
10232 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10234 REAL_VALUE_TYPE TWO32r
;
10235 rtx fp_lo
, fp_hi
, x
;
10237 fp_lo
= gen_reg_rtx (DFmode
);
10238 fp_hi
= gen_reg_rtx (DFmode
);
10240 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10242 real_ldexp (&TWO32r
, &dconst1
, 32);
10243 x
= const_double_from_real_value (TWO32r
, DFmode
);
10244 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10246 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10248 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10251 emit_move_insn (target
, x
);
10254 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10255 For x86_32, -mfpmath=sse, !optimize_size only. */
10257 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10259 REAL_VALUE_TYPE ONE16r
;
10260 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10262 real_ldexp (&ONE16r
, &dconst1
, 16);
10263 x
= const_double_from_real_value (ONE16r
, SFmode
);
10264 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10265 NULL
, 0, OPTAB_DIRECT
);
10266 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10267 NULL
, 0, OPTAB_DIRECT
);
10268 fp_hi
= gen_reg_rtx (SFmode
);
10269 fp_lo
= gen_reg_rtx (SFmode
);
10270 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10271 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10272 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10274 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10276 if (!rtx_equal_p (target
, fp_hi
))
10277 emit_move_insn (target
, fp_hi
);
10280 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10281 then replicate the value for all elements of the vector
10285 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10292 v
= gen_rtvec (4, value
, value
, value
, value
);
10294 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10295 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10296 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10300 v
= gen_rtvec (2, value
, value
);
10302 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10303 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10306 gcc_unreachable ();
10310 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10311 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10312 true, then replicate the mask for all elements of the vector register.
10313 If INVERT is true, then create a mask excluding the sign bit. */
10316 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10318 enum machine_mode vec_mode
;
10319 HOST_WIDE_INT hi
, lo
;
10324 /* Find the sign bit, sign extended to 2*HWI. */
10325 if (mode
== SFmode
)
10326 lo
= 0x80000000, hi
= lo
< 0;
10327 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10328 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10330 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10333 lo
= ~lo
, hi
= ~hi
;
10335 /* Force this value into the low part of a fp vector constant. */
10336 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10337 mask
= gen_lowpart (mode
, mask
);
10339 v
= ix86_build_const_vector (mode
, vect
, mask
);
10340 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10341 return force_reg (vec_mode
, v
);
10344 /* Generate code for floating point ABS or NEG. */
10347 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10350 rtx mask
, set
, use
, clob
, dst
, src
;
10351 bool matching_memory
;
10352 bool use_sse
= false;
10353 bool vector_mode
= VECTOR_MODE_P (mode
);
10354 enum machine_mode elt_mode
= mode
;
10358 elt_mode
= GET_MODE_INNER (mode
);
10361 else if (TARGET_SSE_MATH
)
10362 use_sse
= SSE_FLOAT_MODE_P (mode
);
10364 /* NEG and ABS performed with SSE use bitwise mask operations.
10365 Create the appropriate mask now. */
10367 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10374 /* If the destination is memory, and we don't have matching source
10375 operands or we're using the x87, do things in registers. */
10376 matching_memory
= false;
10379 if (use_sse
&& rtx_equal_p (dst
, src
))
10380 matching_memory
= true;
10382 dst
= gen_reg_rtx (mode
);
10384 if (MEM_P (src
) && !matching_memory
)
10385 src
= force_reg (mode
, src
);
10389 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10390 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10395 set
= gen_rtx_fmt_e (code
, mode
, src
);
10396 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10399 use
= gen_rtx_USE (VOIDmode
, mask
);
10400 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10401 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10402 gen_rtvec (3, set
, use
, clob
)));
10408 if (dst
!= operands
[0])
10409 emit_move_insn (operands
[0], dst
);
10412 /* Expand a copysign operation. Special case operand 0 being a constant. */
10415 ix86_expand_copysign (rtx operands
[])
10417 enum machine_mode mode
, vmode
;
10418 rtx dest
, op0
, op1
, mask
, nmask
;
10420 dest
= operands
[0];
10424 mode
= GET_MODE (dest
);
10425 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10427 if (GET_CODE (op0
) == CONST_DOUBLE
)
10431 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10432 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10434 if (op0
== CONST0_RTX (mode
))
10435 op0
= CONST0_RTX (vmode
);
10438 if (mode
== SFmode
)
10439 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10440 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10442 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10443 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10446 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10448 if (mode
== SFmode
)
10449 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10451 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10455 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10456 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10458 if (mode
== SFmode
)
10459 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10461 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10465 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10466 be a constant, and so has already been expanded into a vector constant. */
10469 ix86_split_copysign_const (rtx operands
[])
10471 enum machine_mode mode
, vmode
;
10472 rtx dest
, op0
, op1
, mask
, x
;
10474 dest
= operands
[0];
10477 mask
= operands
[3];
10479 mode
= GET_MODE (dest
);
10480 vmode
= GET_MODE (mask
);
10482 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10483 x
= gen_rtx_AND (vmode
, dest
, mask
);
10484 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10486 if (op0
!= CONST0_RTX (vmode
))
10488 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10489 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10493 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10494 so we have to do two masks. */
10497 ix86_split_copysign_var (rtx operands
[])
10499 enum machine_mode mode
, vmode
;
10500 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10502 dest
= operands
[0];
10503 scratch
= operands
[1];
10506 nmask
= operands
[4];
10507 mask
= operands
[5];
10509 mode
= GET_MODE (dest
);
10510 vmode
= GET_MODE (mask
);
10512 if (rtx_equal_p (op0
, op1
))
10514 /* Shouldn't happen often (it's useless, obviously), but when it does
10515 we'd generate incorrect code if we continue below. */
10516 emit_move_insn (dest
, op0
);
10520 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10522 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10524 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10525 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10528 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10529 x
= gen_rtx_NOT (vmode
, dest
);
10530 x
= gen_rtx_AND (vmode
, x
, op0
);
10531 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10535 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10537 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10539 else /* alternative 2,4 */
10541 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10542 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10543 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10545 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10547 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10549 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10550 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10552 else /* alternative 3,4 */
10554 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10556 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10557 x
= gen_rtx_AND (vmode
, dest
, op0
);
10559 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10562 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10563 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10566 /* Return TRUE or FALSE depending on whether the first SET in INSN
10567 has source and destination with matching CC modes, and that the
10568 CC mode is at least as constrained as REQ_MODE. */
10571 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10574 enum machine_mode set_mode
;
10576 set
= PATTERN (insn
);
10577 if (GET_CODE (set
) == PARALLEL
)
10578 set
= XVECEXP (set
, 0, 0);
10579 gcc_assert (GET_CODE (set
) == SET
);
10580 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10582 set_mode
= GET_MODE (SET_DEST (set
));
10586 if (req_mode
!= CCNOmode
10587 && (req_mode
!= CCmode
10588 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10592 if (req_mode
== CCGCmode
)
10596 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10600 if (req_mode
== CCZmode
)
10607 gcc_unreachable ();
10610 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10613 /* Generate insn patterns to do an integer compare of OPERANDS. */
10616 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10618 enum machine_mode cmpmode
;
10621 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10622 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10624 /* This is very simple, but making the interface the same as in the
10625 FP case makes the rest of the code easier. */
10626 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10627 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10629 /* Return the test that should be put into the flags user, i.e.
10630 the bcc, scc, or cmov instruction. */
10631 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10634 /* Figure out whether to use ordered or unordered fp comparisons.
10635 Return the appropriate mode to use. */
10638 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10640 /* ??? In order to make all comparisons reversible, we do all comparisons
10641 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10642 all forms trapping and nontrapping comparisons, we can make inequality
10643 comparisons trapping again, since it results in better code when using
10644 FCOM based compares. */
10645 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10649 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10651 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10652 return ix86_fp_compare_mode (code
);
10655 /* Only zero flag is needed. */
10656 case EQ
: /* ZF=0 */
10657 case NE
: /* ZF!=0 */
10659 /* Codes needing carry flag. */
10660 case GEU
: /* CF=0 */
10661 case GTU
: /* CF=0 & ZF=0 */
10662 case LTU
: /* CF=1 */
10663 case LEU
: /* CF=1 | ZF=1 */
10665 /* Codes possibly doable only with sign flag when
10666 comparing against zero. */
10667 case GE
: /* SF=OF or SF=0 */
10668 case LT
: /* SF<>OF or SF=1 */
10669 if (op1
== const0_rtx
)
10672 /* For other cases Carry flag is not required. */
10674 /* Codes doable only with sign flag when comparing
10675 against zero, but we miss jump instruction for it
10676 so we need to use relational tests against overflow
10677 that thus needs to be zero. */
10678 case GT
: /* ZF=0 & SF=OF */
10679 case LE
: /* ZF=1 | SF<>OF */
10680 if (op1
== const0_rtx
)
10684 /* strcmp pattern do (use flags) and combine may ask us for proper
10689 gcc_unreachable ();
10693 /* Return the fixed registers used for condition codes. */
10696 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10703 /* If two condition code modes are compatible, return a condition code
10704 mode which is compatible with both. Otherwise, return
10707 static enum machine_mode
10708 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10713 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10716 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10717 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10723 gcc_unreachable ();
10745 /* These are only compatible with themselves, which we already
10751 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10754 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10756 enum rtx_code swapped_code
= swap_condition (code
);
10757 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10758 || (ix86_fp_comparison_cost (swapped_code
)
10759 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10762 /* Swap, force into registers, or otherwise massage the two operands
10763 to a fp comparison. The operands are updated in place; the new
10764 comparison code is returned. */
10766 static enum rtx_code
10767 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10769 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10770 rtx op0
= *pop0
, op1
= *pop1
;
10771 enum machine_mode op_mode
= GET_MODE (op0
);
10772 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10774 /* All of the unordered compare instructions only work on registers.
10775 The same is true of the fcomi compare instructions. The XFmode
10776 compare instructions require registers except when comparing
10777 against zero or when converting operand 1 from fixed point to
10781 && (fpcmp_mode
== CCFPUmode
10782 || (op_mode
== XFmode
10783 && ! (standard_80387_constant_p (op0
) == 1
10784 || standard_80387_constant_p (op1
) == 1)
10785 && GET_CODE (op1
) != FLOAT
)
10786 || ix86_use_fcomi_compare (code
)))
10788 op0
= force_reg (op_mode
, op0
);
10789 op1
= force_reg (op_mode
, op1
);
10793 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10794 things around if they appear profitable, otherwise force op0
10795 into a register. */
10797 if (standard_80387_constant_p (op0
) == 0
10799 && ! (standard_80387_constant_p (op1
) == 0
10803 tmp
= op0
, op0
= op1
, op1
= tmp
;
10804 code
= swap_condition (code
);
10808 op0
= force_reg (op_mode
, op0
);
10810 if (CONSTANT_P (op1
))
10812 int tmp
= standard_80387_constant_p (op1
);
10814 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10818 op1
= force_reg (op_mode
, op1
);
10821 op1
= force_reg (op_mode
, op1
);
10825 /* Try to rearrange the comparison to make it cheaper. */
10826 if (ix86_fp_comparison_cost (code
)
10827 > ix86_fp_comparison_cost (swap_condition (code
))
10828 && (REG_P (op1
) || !no_new_pseudos
))
10831 tmp
= op0
, op0
= op1
, op1
= tmp
;
10832 code
= swap_condition (code
);
10834 op0
= force_reg (op_mode
, op0
);
10842 /* Convert comparison codes we use to represent FP comparison to integer
10843 code that will result in proper branch. Return UNKNOWN if no such code
10847 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10876 /* Split comparison code CODE into comparisons we can do using branch
10877 instructions. BYPASS_CODE is comparison code for branch that will
10878 branch around FIRST_CODE and SECOND_CODE. If some of branches
10879 is not required, set value to UNKNOWN.
10880 We never require more than two branches. */
10883 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10884 enum rtx_code
*first_code
,
10885 enum rtx_code
*second_code
)
10887 *first_code
= code
;
10888 *bypass_code
= UNKNOWN
;
10889 *second_code
= UNKNOWN
;
10891 /* The fcomi comparison sets flags as follows:
10901 case GT
: /* GTU - CF=0 & ZF=0 */
10902 case GE
: /* GEU - CF=0 */
10903 case ORDERED
: /* PF=0 */
10904 case UNORDERED
: /* PF=1 */
10905 case UNEQ
: /* EQ - ZF=1 */
10906 case UNLT
: /* LTU - CF=1 */
10907 case UNLE
: /* LEU - CF=1 | ZF=1 */
10908 case LTGT
: /* EQ - ZF=0 */
10910 case LT
: /* LTU - CF=1 - fails on unordered */
10911 *first_code
= UNLT
;
10912 *bypass_code
= UNORDERED
;
10914 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10915 *first_code
= UNLE
;
10916 *bypass_code
= UNORDERED
;
10918 case EQ
: /* EQ - ZF=1 - fails on unordered */
10919 *first_code
= UNEQ
;
10920 *bypass_code
= UNORDERED
;
10922 case NE
: /* NE - ZF=0 - fails on unordered */
10923 *first_code
= LTGT
;
10924 *second_code
= UNORDERED
;
10926 case UNGE
: /* GEU - CF=0 - fails on unordered */
10928 *second_code
= UNORDERED
;
10930 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10932 *second_code
= UNORDERED
;
10935 gcc_unreachable ();
10937 if (!TARGET_IEEE_FP
)
10939 *second_code
= UNKNOWN
;
10940 *bypass_code
= UNKNOWN
;
10944 /* Return cost of comparison done fcom + arithmetics operations on AX.
10945 All following functions do use number of instructions as a cost metrics.
10946 In future this should be tweaked to compute bytes for optimize_size and
10947 take into account performance of various instructions on various CPUs. */
10949 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10951 if (!TARGET_IEEE_FP
)
10953 /* The cost of code output by ix86_expand_fp_compare. */
10977 gcc_unreachable ();
10981 /* Return cost of comparison done using fcomi operation.
10982 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10984 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10986 enum rtx_code bypass_code
, first_code
, second_code
;
10987 /* Return arbitrarily high cost when instruction is not supported - this
10988 prevents gcc from using it. */
10991 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10992 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10995 /* Return cost of comparison done using sahf operation.
10996 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10998 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11000 enum rtx_code bypass_code
, first_code
, second_code
;
11001 /* Return arbitrarily high cost when instruction is not preferred - this
11002 avoids gcc from using it. */
11003 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11005 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11006 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11009 /* Compute cost of the comparison done using any method.
11010 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11012 ix86_fp_comparison_cost (enum rtx_code code
)
11014 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11017 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11018 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11020 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11021 if (min
> sahf_cost
)
11023 if (min
> fcomi_cost
)
11028 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11031 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11032 rtx
*second_test
, rtx
*bypass_test
)
11034 enum machine_mode fpcmp_mode
, intcmp_mode
;
11036 int cost
= ix86_fp_comparison_cost (code
);
11037 enum rtx_code bypass_code
, first_code
, second_code
;
11039 fpcmp_mode
= ix86_fp_compare_mode (code
);
11040 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11043 *second_test
= NULL_RTX
;
11045 *bypass_test
= NULL_RTX
;
11047 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11049 /* Do fcomi/sahf based test when profitable. */
11050 if ((TARGET_CMOVE
|| TARGET_SAHF
)
11051 && (bypass_code
== UNKNOWN
|| bypass_test
)
11052 && (second_code
== UNKNOWN
|| second_test
)
11053 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
11057 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11058 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11064 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11065 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11067 scratch
= gen_reg_rtx (HImode
);
11068 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11069 emit_insn (gen_x86_sahf_1 (scratch
));
11072 /* The FP codes work out to act like unsigned. */
11073 intcmp_mode
= fpcmp_mode
;
11075 if (bypass_code
!= UNKNOWN
)
11076 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11077 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11079 if (second_code
!= UNKNOWN
)
11080 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11081 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11086 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11087 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11088 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11090 scratch
= gen_reg_rtx (HImode
);
11091 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11093 /* In the unordered case, we have to check C2 for NaN's, which
11094 doesn't happen to work out to anything nice combination-wise.
11095 So do some bit twiddling on the value we've got in AH to come
11096 up with an appropriate set of condition codes. */
11098 intcmp_mode
= CCNOmode
;
11103 if (code
== GT
|| !TARGET_IEEE_FP
)
11105 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11110 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11111 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11112 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11113 intcmp_mode
= CCmode
;
11119 if (code
== LT
&& TARGET_IEEE_FP
)
11121 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11122 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11123 intcmp_mode
= CCmode
;
11128 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11134 if (code
== GE
|| !TARGET_IEEE_FP
)
11136 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11141 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11142 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11149 if (code
== LE
&& TARGET_IEEE_FP
)
11151 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11152 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11153 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11154 intcmp_mode
= CCmode
;
11159 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11165 if (code
== EQ
&& TARGET_IEEE_FP
)
11167 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11168 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11169 intcmp_mode
= CCmode
;
11174 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11181 if (code
== NE
&& TARGET_IEEE_FP
)
11183 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11184 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11190 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11196 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11200 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11205 gcc_unreachable ();
11209 /* Return the test that should be put into the flags user, i.e.
11210 the bcc, scc, or cmov instruction. */
11211 return gen_rtx_fmt_ee (code
, VOIDmode
,
11212 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11217 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11220 op0
= ix86_compare_op0
;
11221 op1
= ix86_compare_op1
;
11224 *second_test
= NULL_RTX
;
11226 *bypass_test
= NULL_RTX
;
11228 if (ix86_compare_emitted
)
11230 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11231 ix86_compare_emitted
= NULL_RTX
;
11233 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11234 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11235 second_test
, bypass_test
);
11237 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11242 /* Return true if the CODE will result in nontrivial jump sequence. */
11244 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11246 enum rtx_code bypass_code
, first_code
, second_code
;
11249 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11250 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11254 ix86_expand_branch (enum rtx_code code
, rtx label
)
11258 /* If we have emitted a compare insn, go straight to simple.
11259 ix86_expand_compare won't emit anything if ix86_compare_emitted
11261 if (ix86_compare_emitted
)
11264 switch (GET_MODE (ix86_compare_op0
))
11270 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11271 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11272 gen_rtx_LABEL_REF (VOIDmode
, label
),
11274 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11283 enum rtx_code bypass_code
, first_code
, second_code
;
11285 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11286 &ix86_compare_op1
);
11288 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11290 /* Check whether we will use the natural sequence with one jump. If
11291 so, we can expand jump early. Otherwise delay expansion by
11292 creating compound insn to not confuse optimizers. */
11293 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11296 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11297 gen_rtx_LABEL_REF (VOIDmode
, label
),
11298 pc_rtx
, NULL_RTX
, NULL_RTX
);
11302 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11303 ix86_compare_op0
, ix86_compare_op1
);
11304 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11305 gen_rtx_LABEL_REF (VOIDmode
, label
),
11307 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11309 use_fcomi
= ix86_use_fcomi_compare (code
);
11310 vec
= rtvec_alloc (3 + !use_fcomi
);
11311 RTVEC_ELT (vec
, 0) = tmp
;
11313 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11315 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11318 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11320 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11329 /* Expand DImode branch into multiple compare+branch. */
11331 rtx lo
[2], hi
[2], label2
;
11332 enum rtx_code code1
, code2
, code3
;
11333 enum machine_mode submode
;
11335 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11337 tmp
= ix86_compare_op0
;
11338 ix86_compare_op0
= ix86_compare_op1
;
11339 ix86_compare_op1
= tmp
;
11340 code
= swap_condition (code
);
11342 if (GET_MODE (ix86_compare_op0
) == DImode
)
11344 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11345 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11350 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11351 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11355 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11356 avoid two branches. This costs one extra insn, so disable when
11357 optimizing for size. */
11359 if ((code
== EQ
|| code
== NE
)
11361 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11366 if (hi
[1] != const0_rtx
)
11367 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11368 NULL_RTX
, 0, OPTAB_WIDEN
);
11371 if (lo
[1] != const0_rtx
)
11372 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11373 NULL_RTX
, 0, OPTAB_WIDEN
);
11375 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11376 NULL_RTX
, 0, OPTAB_WIDEN
);
11378 ix86_compare_op0
= tmp
;
11379 ix86_compare_op1
= const0_rtx
;
11380 ix86_expand_branch (code
, label
);
11384 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11385 op1 is a constant and the low word is zero, then we can just
11386 examine the high word. */
11388 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11391 case LT
: case LTU
: case GE
: case GEU
:
11392 ix86_compare_op0
= hi
[0];
11393 ix86_compare_op1
= hi
[1];
11394 ix86_expand_branch (code
, label
);
11400 /* Otherwise, we need two or three jumps. */
11402 label2
= gen_label_rtx ();
11405 code2
= swap_condition (code
);
11406 code3
= unsigned_condition (code
);
11410 case LT
: case GT
: case LTU
: case GTU
:
11413 case LE
: code1
= LT
; code2
= GT
; break;
11414 case GE
: code1
= GT
; code2
= LT
; break;
11415 case LEU
: code1
= LTU
; code2
= GTU
; break;
11416 case GEU
: code1
= GTU
; code2
= LTU
; break;
11418 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11419 case NE
: code2
= UNKNOWN
; break;
11422 gcc_unreachable ();
11427 * if (hi(a) < hi(b)) goto true;
11428 * if (hi(a) > hi(b)) goto false;
11429 * if (lo(a) < lo(b)) goto true;
11433 ix86_compare_op0
= hi
[0];
11434 ix86_compare_op1
= hi
[1];
11436 if (code1
!= UNKNOWN
)
11437 ix86_expand_branch (code1
, label
);
11438 if (code2
!= UNKNOWN
)
11439 ix86_expand_branch (code2
, label2
);
11441 ix86_compare_op0
= lo
[0];
11442 ix86_compare_op1
= lo
[1];
11443 ix86_expand_branch (code3
, label
);
11445 if (code2
!= UNKNOWN
)
11446 emit_label (label2
);
11451 gcc_unreachable ();
11455 /* Split branch based on floating point condition. */
11457 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11458 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11460 rtx second
, bypass
;
11461 rtx label
= NULL_RTX
;
11463 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11466 if (target2
!= pc_rtx
)
11469 code
= reverse_condition_maybe_unordered (code
);
11474 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11475 tmp
, &second
, &bypass
);
11477 /* Remove pushed operand from stack. */
11479 ix86_free_from_memory (GET_MODE (pushed
));
11481 if (split_branch_probability
>= 0)
11483 /* Distribute the probabilities across the jumps.
11484 Assume the BYPASS and SECOND to be always test
11486 probability
= split_branch_probability
;
11488 /* Value of 1 is low enough to make no need for probability
11489 to be updated. Later we may run some experiments and see
11490 if unordered values are more frequent in practice. */
11492 bypass_probability
= 1;
11494 second_probability
= 1;
11496 if (bypass
!= NULL_RTX
)
11498 label
= gen_label_rtx ();
11499 i
= emit_jump_insn (gen_rtx_SET
11501 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11503 gen_rtx_LABEL_REF (VOIDmode
,
11506 if (bypass_probability
>= 0)
11508 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11509 GEN_INT (bypass_probability
),
11512 i
= emit_jump_insn (gen_rtx_SET
11514 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11515 condition
, target1
, target2
)));
11516 if (probability
>= 0)
11518 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11519 GEN_INT (probability
),
11521 if (second
!= NULL_RTX
)
11523 i
= emit_jump_insn (gen_rtx_SET
11525 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11527 if (second_probability
>= 0)
11529 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11530 GEN_INT (second_probability
),
11533 if (label
!= NULL_RTX
)
11534 emit_label (label
);
11538 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11540 rtx ret
, tmp
, tmpreg
, equiv
;
11541 rtx second_test
, bypass_test
;
11543 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11544 return 0; /* FAIL */
11546 gcc_assert (GET_MODE (dest
) == QImode
);
11548 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11549 PUT_MODE (ret
, QImode
);
11554 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11555 if (bypass_test
|| second_test
)
11557 rtx test
= second_test
;
11559 rtx tmp2
= gen_reg_rtx (QImode
);
11562 gcc_assert (!second_test
);
11563 test
= bypass_test
;
11565 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11567 PUT_MODE (test
, QImode
);
11568 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11571 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11573 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11576 /* Attach a REG_EQUAL note describing the comparison result. */
11577 if (ix86_compare_op0
&& ix86_compare_op1
)
11579 equiv
= simplify_gen_relational (code
, QImode
,
11580 GET_MODE (ix86_compare_op0
),
11581 ix86_compare_op0
, ix86_compare_op1
);
11582 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11585 return 1; /* DONE */
11588 /* Expand comparison setting or clearing carry flag. Return true when
11589 successful and set pop for the operation. */
11591 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11593 enum machine_mode mode
=
11594 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11596 /* Do not handle DImode compares that go through special path. Also we can't
11597 deal with FP compares yet. This is possible to add. */
11598 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11600 if (FLOAT_MODE_P (mode
))
11602 rtx second_test
= NULL
, bypass_test
= NULL
;
11603 rtx compare_op
, compare_seq
;
11605 /* Shortcut: following common codes never translate into carry flag compares. */
11606 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11607 || code
== ORDERED
|| code
== UNORDERED
)
11610 /* These comparisons require zero flag; swap operands so they won't. */
11611 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11612 && !TARGET_IEEE_FP
)
11617 code
= swap_condition (code
);
11620 /* Try to expand the comparison and verify that we end up with carry flag
11621 based comparison. This is fails to be true only when we decide to expand
11622 comparison using arithmetic that is not too common scenario. */
11624 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11625 &second_test
, &bypass_test
);
11626 compare_seq
= get_insns ();
11629 if (second_test
|| bypass_test
)
11631 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11632 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11633 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11635 code
= GET_CODE (compare_op
);
11636 if (code
!= LTU
&& code
!= GEU
)
11638 emit_insn (compare_seq
);
11642 if (!INTEGRAL_MODE_P (mode
))
11650 /* Convert a==0 into (unsigned)a<1. */
11653 if (op1
!= const0_rtx
)
11656 code
= (code
== EQ
? LTU
: GEU
);
11659 /* Convert a>b into b<a or a>=b-1. */
11662 if (CONST_INT_P (op1
))
11664 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11665 /* Bail out on overflow. We still can swap operands but that
11666 would force loading of the constant into register. */
11667 if (op1
== const0_rtx
11668 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11670 code
= (code
== GTU
? GEU
: LTU
);
11677 code
= (code
== GTU
? LTU
: GEU
);
11681 /* Convert a>=0 into (unsigned)a<0x80000000. */
11684 if (mode
== DImode
|| op1
!= const0_rtx
)
11686 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11687 code
= (code
== LT
? GEU
: LTU
);
11691 if (mode
== DImode
|| op1
!= constm1_rtx
)
11693 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11694 code
= (code
== LE
? GEU
: LTU
);
11700 /* Swapping operands may cause constant to appear as first operand. */
11701 if (!nonimmediate_operand (op0
, VOIDmode
))
11703 if (no_new_pseudos
)
11705 op0
= force_reg (mode
, op0
);
11707 ix86_compare_op0
= op0
;
11708 ix86_compare_op1
= op1
;
11709 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11710 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11715 ix86_expand_int_movcc (rtx operands
[])
11717 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11718 rtx compare_seq
, compare_op
;
11719 rtx second_test
, bypass_test
;
11720 enum machine_mode mode
= GET_MODE (operands
[0]);
11721 bool sign_bit_compare_p
= false;;
11724 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11725 compare_seq
= get_insns ();
11728 compare_code
= GET_CODE (compare_op
);
11730 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11731 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11732 sign_bit_compare_p
= true;
11734 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11735 HImode insns, we'd be swallowed in word prefix ops. */
11737 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11738 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11739 && CONST_INT_P (operands
[2])
11740 && CONST_INT_P (operands
[3]))
11742 rtx out
= operands
[0];
11743 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11744 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11745 HOST_WIDE_INT diff
;
11748 /* Sign bit compares are better done using shifts than we do by using
11750 if (sign_bit_compare_p
11751 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11752 ix86_compare_op1
, &compare_op
))
11754 /* Detect overlap between destination and compare sources. */
11757 if (!sign_bit_compare_p
)
11759 bool fpcmp
= false;
11761 compare_code
= GET_CODE (compare_op
);
11763 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11764 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11767 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11770 /* To simplify rest of code, restrict to the GEU case. */
11771 if (compare_code
== LTU
)
11773 HOST_WIDE_INT tmp
= ct
;
11776 compare_code
= reverse_condition (compare_code
);
11777 code
= reverse_condition (code
);
11782 PUT_CODE (compare_op
,
11783 reverse_condition_maybe_unordered
11784 (GET_CODE (compare_op
)));
11786 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11790 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11791 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11792 tmp
= gen_reg_rtx (mode
);
11794 if (mode
== DImode
)
11795 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11797 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11801 if (code
== GT
|| code
== GE
)
11802 code
= reverse_condition (code
);
11805 HOST_WIDE_INT tmp
= ct
;
11810 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11811 ix86_compare_op1
, VOIDmode
, 0, -1);
11824 tmp
= expand_simple_binop (mode
, PLUS
,
11826 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11837 tmp
= expand_simple_binop (mode
, IOR
,
11839 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11841 else if (diff
== -1 && ct
)
11851 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11853 tmp
= expand_simple_binop (mode
, PLUS
,
11854 copy_rtx (tmp
), GEN_INT (cf
),
11855 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11863 * andl cf - ct, dest
11873 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11876 tmp
= expand_simple_binop (mode
, AND
,
11878 gen_int_mode (cf
- ct
, mode
),
11879 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11881 tmp
= expand_simple_binop (mode
, PLUS
,
11882 copy_rtx (tmp
), GEN_INT (ct
),
11883 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11886 if (!rtx_equal_p (tmp
, out
))
11887 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11889 return 1; /* DONE */
11895 tmp
= ct
, ct
= cf
, cf
= tmp
;
11897 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11899 /* We may be reversing unordered compare to normal compare, that
11900 is not valid in general (we may convert non-trapping condition
11901 to trapping one), however on i386 we currently emit all
11902 comparisons unordered. */
11903 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11904 code
= reverse_condition_maybe_unordered (code
);
11908 compare_code
= reverse_condition (compare_code
);
11909 code
= reverse_condition (code
);
11913 compare_code
= UNKNOWN
;
11914 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11915 && CONST_INT_P (ix86_compare_op1
))
11917 if (ix86_compare_op1
== const0_rtx
11918 && (code
== LT
|| code
== GE
))
11919 compare_code
= code
;
11920 else if (ix86_compare_op1
== constm1_rtx
)
11924 else if (code
== GT
)
11929 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11930 if (compare_code
!= UNKNOWN
11931 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11932 && (cf
== -1 || ct
== -1))
11934 /* If lea code below could be used, only optimize
11935 if it results in a 2 insn sequence. */
11937 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11938 || diff
== 3 || diff
== 5 || diff
== 9)
11939 || (compare_code
== LT
&& ct
== -1)
11940 || (compare_code
== GE
&& cf
== -1))
11943 * notl op1 (if necessary)
11951 code
= reverse_condition (code
);
11954 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11955 ix86_compare_op1
, VOIDmode
, 0, -1);
11957 out
= expand_simple_binop (mode
, IOR
,
11959 out
, 1, OPTAB_DIRECT
);
11960 if (out
!= operands
[0])
11961 emit_move_insn (operands
[0], out
);
11963 return 1; /* DONE */
11968 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11969 || diff
== 3 || diff
== 5 || diff
== 9)
11970 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11972 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11978 * lea cf(dest*(ct-cf)),dest
11982 * This also catches the degenerate setcc-only case.
11988 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11989 ix86_compare_op1
, VOIDmode
, 0, 1);
11992 /* On x86_64 the lea instruction operates on Pmode, so we need
11993 to get arithmetics done in proper mode to match. */
11995 tmp
= copy_rtx (out
);
11999 out1
= copy_rtx (out
);
12000 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12004 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12010 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12013 if (!rtx_equal_p (tmp
, out
))
12016 out
= force_operand (tmp
, copy_rtx (out
));
12018 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12020 if (!rtx_equal_p (out
, operands
[0]))
12021 emit_move_insn (operands
[0], copy_rtx (out
));
12023 return 1; /* DONE */
12027 * General case: Jumpful:
12028 * xorl dest,dest cmpl op1, op2
12029 * cmpl op1, op2 movl ct, dest
12030 * setcc dest jcc 1f
12031 * decl dest movl cf, dest
12032 * andl (cf-ct),dest 1:
12035 * Size 20. Size 14.
12037 * This is reasonably steep, but branch mispredict costs are
12038 * high on modern cpus, so consider failing only if optimizing
12042 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12043 && BRANCH_COST
>= 2)
12049 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
12050 /* We may be reversing unordered compare to normal compare,
12051 that is not valid in general (we may convert non-trapping
12052 condition to trapping one), however on i386 we currently
12053 emit all comparisons unordered. */
12054 code
= reverse_condition_maybe_unordered (code
);
12057 code
= reverse_condition (code
);
12058 if (compare_code
!= UNKNOWN
)
12059 compare_code
= reverse_condition (compare_code
);
12063 if (compare_code
!= UNKNOWN
)
12065 /* notl op1 (if needed)
12070 For x < 0 (resp. x <= -1) there will be no notl,
12071 so if possible swap the constants to get rid of the
12073 True/false will be -1/0 while code below (store flag
12074 followed by decrement) is 0/-1, so the constants need
12075 to be exchanged once more. */
12077 if (compare_code
== GE
|| !cf
)
12079 code
= reverse_condition (code
);
12084 HOST_WIDE_INT tmp
= cf
;
12089 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12090 ix86_compare_op1
, VOIDmode
, 0, -1);
12094 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12095 ix86_compare_op1
, VOIDmode
, 0, 1);
12097 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12098 copy_rtx (out
), 1, OPTAB_DIRECT
);
12101 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12102 gen_int_mode (cf
- ct
, mode
),
12103 copy_rtx (out
), 1, OPTAB_DIRECT
);
12105 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12106 copy_rtx (out
), 1, OPTAB_DIRECT
);
12107 if (!rtx_equal_p (out
, operands
[0]))
12108 emit_move_insn (operands
[0], copy_rtx (out
));
12110 return 1; /* DONE */
12114 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12116 /* Try a few things more with specific constants and a variable. */
12119 rtx var
, orig_out
, out
, tmp
;
12121 if (BRANCH_COST
<= 2)
12122 return 0; /* FAIL */
12124 /* If one of the two operands is an interesting constant, load a
12125 constant with the above and mask it in with a logical operation. */
12127 if (CONST_INT_P (operands
[2]))
12130 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12131 operands
[3] = constm1_rtx
, op
= and_optab
;
12132 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12133 operands
[3] = const0_rtx
, op
= ior_optab
;
12135 return 0; /* FAIL */
12137 else if (CONST_INT_P (operands
[3]))
12140 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12141 operands
[2] = constm1_rtx
, op
= and_optab
;
12142 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12143 operands
[2] = const0_rtx
, op
= ior_optab
;
12145 return 0; /* FAIL */
12148 return 0; /* FAIL */
12150 orig_out
= operands
[0];
12151 tmp
= gen_reg_rtx (mode
);
12154 /* Recurse to get the constant loaded. */
12155 if (ix86_expand_int_movcc (operands
) == 0)
12156 return 0; /* FAIL */
12158 /* Mask in the interesting variable. */
12159 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12161 if (!rtx_equal_p (out
, orig_out
))
12162 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12164 return 1; /* DONE */
12168 * For comparison with above,
12178 if (! nonimmediate_operand (operands
[2], mode
))
12179 operands
[2] = force_reg (mode
, operands
[2]);
12180 if (! nonimmediate_operand (operands
[3], mode
))
12181 operands
[3] = force_reg (mode
, operands
[3]);
12183 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12185 rtx tmp
= gen_reg_rtx (mode
);
12186 emit_move_insn (tmp
, operands
[3]);
12189 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12191 rtx tmp
= gen_reg_rtx (mode
);
12192 emit_move_insn (tmp
, operands
[2]);
12196 if (! register_operand (operands
[2], VOIDmode
)
12198 || ! register_operand (operands
[3], VOIDmode
)))
12199 operands
[2] = force_reg (mode
, operands
[2]);
12202 && ! register_operand (operands
[3], VOIDmode
))
12203 operands
[3] = force_reg (mode
, operands
[3]);
12205 emit_insn (compare_seq
);
12206 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12207 gen_rtx_IF_THEN_ELSE (mode
,
12208 compare_op
, operands
[2],
12211 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12212 gen_rtx_IF_THEN_ELSE (mode
,
12214 copy_rtx (operands
[3]),
12215 copy_rtx (operands
[0]))));
12217 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12218 gen_rtx_IF_THEN_ELSE (mode
,
12220 copy_rtx (operands
[2]),
12221 copy_rtx (operands
[0]))));
12223 return 1; /* DONE */
12226 /* Swap, force into registers, or otherwise massage the two operands
12227 to an sse comparison with a mask result. Thus we differ a bit from
12228 ix86_prepare_fp_compare_args which expects to produce a flags result.
12230 The DEST operand exists to help determine whether to commute commutative
12231 operators. The POP0/POP1 operands are updated in place. The new
12232 comparison code is returned, or UNKNOWN if not implementable. */
12234 static enum rtx_code
12235 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12236 rtx
*pop0
, rtx
*pop1
)
12244 /* We have no LTGT as an operator. We could implement it with
12245 NE & ORDERED, but this requires an extra temporary. It's
12246 not clear that it's worth it. */
12253 /* These are supported directly. */
12260 /* For commutative operators, try to canonicalize the destination
12261 operand to be first in the comparison - this helps reload to
12262 avoid extra moves. */
12263 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12271 /* These are not supported directly. Swap the comparison operands
12272 to transform into something that is supported. */
12276 code
= swap_condition (code
);
12280 gcc_unreachable ();
12286 /* Detect conditional moves that exactly match min/max operational
12287 semantics. Note that this is IEEE safe, as long as we don't
12288 interchange the operands.
12290 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12291 and TRUE if the operation is successful and instructions are emitted. */
12294 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12295 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12297 enum machine_mode mode
;
12303 else if (code
== UNGE
)
12306 if_true
= if_false
;
12312 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12314 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12319 mode
= GET_MODE (dest
);
12321 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12322 but MODE may be a vector mode and thus not appropriate. */
12323 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12325 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12328 if_true
= force_reg (mode
, if_true
);
12329 v
= gen_rtvec (2, if_true
, if_false
);
12330 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12334 code
= is_min
? SMIN
: SMAX
;
12335 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12338 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12342 /* Expand an sse vector comparison. Return the register with the result. */
12345 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12346 rtx op_true
, rtx op_false
)
12348 enum machine_mode mode
= GET_MODE (dest
);
12351 cmp_op0
= force_reg (mode
, cmp_op0
);
12352 if (!nonimmediate_operand (cmp_op1
, mode
))
12353 cmp_op1
= force_reg (mode
, cmp_op1
);
12356 || reg_overlap_mentioned_p (dest
, op_true
)
12357 || reg_overlap_mentioned_p (dest
, op_false
))
12358 dest
= gen_reg_rtx (mode
);
12360 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12361 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12366 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12367 operations. This is used for both scalar and vector conditional moves. */
12370 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12372 enum machine_mode mode
= GET_MODE (dest
);
12375 if (op_false
== CONST0_RTX (mode
))
12377 op_true
= force_reg (mode
, op_true
);
12378 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12379 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12381 else if (op_true
== CONST0_RTX (mode
))
12383 op_false
= force_reg (mode
, op_false
);
12384 x
= gen_rtx_NOT (mode
, cmp
);
12385 x
= gen_rtx_AND (mode
, x
, op_false
);
12386 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12390 op_true
= force_reg (mode
, op_true
);
12391 op_false
= force_reg (mode
, op_false
);
12393 t2
= gen_reg_rtx (mode
);
12395 t3
= gen_reg_rtx (mode
);
12399 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12400 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12402 x
= gen_rtx_NOT (mode
, cmp
);
12403 x
= gen_rtx_AND (mode
, x
, op_false
);
12404 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12406 x
= gen_rtx_IOR (mode
, t3
, t2
);
12407 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12411 /* Expand a floating-point conditional move. Return true if successful. */
12414 ix86_expand_fp_movcc (rtx operands
[])
12416 enum machine_mode mode
= GET_MODE (operands
[0]);
12417 enum rtx_code code
= GET_CODE (operands
[1]);
12418 rtx tmp
, compare_op
, second_test
, bypass_test
;
12420 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12422 enum machine_mode cmode
;
12424 /* Since we've no cmove for sse registers, don't force bad register
12425 allocation just to gain access to it. Deny movcc when the
12426 comparison mode doesn't match the move mode. */
12427 cmode
= GET_MODE (ix86_compare_op0
);
12428 if (cmode
== VOIDmode
)
12429 cmode
= GET_MODE (ix86_compare_op1
);
12433 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12435 &ix86_compare_op1
);
12436 if (code
== UNKNOWN
)
12439 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12440 ix86_compare_op1
, operands
[2],
12444 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12445 ix86_compare_op1
, operands
[2], operands
[3]);
12446 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12450 /* The floating point conditional move instructions don't directly
12451 support conditions resulting from a signed integer comparison. */
12453 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12455 /* The floating point conditional move instructions don't directly
12456 support signed integer comparisons. */
12458 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12460 gcc_assert (!second_test
&& !bypass_test
);
12461 tmp
= gen_reg_rtx (QImode
);
12462 ix86_expand_setcc (code
, tmp
);
12464 ix86_compare_op0
= tmp
;
12465 ix86_compare_op1
= const0_rtx
;
12466 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12468 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12470 tmp
= gen_reg_rtx (mode
);
12471 emit_move_insn (tmp
, operands
[3]);
12474 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12476 tmp
= gen_reg_rtx (mode
);
12477 emit_move_insn (tmp
, operands
[2]);
12481 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12482 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12483 operands
[2], operands
[3])));
12485 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12486 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12487 operands
[3], operands
[0])));
12489 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12490 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12491 operands
[2], operands
[0])));
12496 /* Expand a floating-point vector conditional move; a vcond operation
12497 rather than a movcc operation. */
12500 ix86_expand_fp_vcond (rtx operands
[])
12502 enum rtx_code code
= GET_CODE (operands
[3]);
12505 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12506 &operands
[4], &operands
[5]);
12507 if (code
== UNKNOWN
)
12510 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12511 operands
[5], operands
[1], operands
[2]))
12514 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12515 operands
[1], operands
[2]);
12516 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12520 /* Expand a signed integral vector conditional move. */
12523 ix86_expand_int_vcond (rtx operands
[])
12525 enum machine_mode mode
= GET_MODE (operands
[0]);
12526 enum rtx_code code
= GET_CODE (operands
[3]);
12527 bool negate
= false;
12530 cop0
= operands
[4];
12531 cop1
= operands
[5];
12533 /* Canonicalize the comparison to EQ, GT, GTU. */
12544 code
= reverse_condition (code
);
12550 code
= reverse_condition (code
);
12556 code
= swap_condition (code
);
12557 x
= cop0
, cop0
= cop1
, cop1
= x
;
12561 gcc_unreachable ();
12564 /* Unsigned parallel compare is not supported by the hardware. Play some
12565 tricks to turn this into a signed comparison against 0. */
12568 cop0
= force_reg (mode
, cop0
);
12576 /* Perform a parallel modulo subtraction. */
12577 t1
= gen_reg_rtx (mode
);
12578 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12580 /* Extract the original sign bit of op0. */
12581 mask
= GEN_INT (-0x80000000);
12582 mask
= gen_rtx_CONST_VECTOR (mode
,
12583 gen_rtvec (4, mask
, mask
, mask
, mask
));
12584 mask
= force_reg (mode
, mask
);
12585 t2
= gen_reg_rtx (mode
);
12586 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12588 /* XOR it back into the result of the subtraction. This results
12589 in the sign bit set iff we saw unsigned underflow. */
12590 x
= gen_reg_rtx (mode
);
12591 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12599 /* Perform a parallel unsigned saturating subtraction. */
12600 x
= gen_reg_rtx (mode
);
12601 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12602 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12609 gcc_unreachable ();
12613 cop1
= CONST0_RTX (mode
);
12616 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12617 operands
[1+negate
], operands
[2-negate
]);
12619 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12620 operands
[2-negate
]);
12624 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12625 true if we should do zero extension, else sign extension. HIGH_P is
12626 true if we want the N/2 high elements, else the low elements. */
12629 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12631 enum machine_mode imode
= GET_MODE (operands
[1]);
12632 rtx (*unpack
)(rtx
, rtx
, rtx
);
12639 unpack
= gen_vec_interleave_highv16qi
;
12641 unpack
= gen_vec_interleave_lowv16qi
;
12645 unpack
= gen_vec_interleave_highv8hi
;
12647 unpack
= gen_vec_interleave_lowv8hi
;
12651 unpack
= gen_vec_interleave_highv4si
;
12653 unpack
= gen_vec_interleave_lowv4si
;
12656 gcc_unreachable ();
12659 dest
= gen_lowpart (imode
, operands
[0]);
12662 se
= force_reg (imode
, CONST0_RTX (imode
));
12664 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12665 operands
[1], pc_rtx
, pc_rtx
);
12667 emit_insn (unpack (dest
, operands
[1], se
));
12670 /* Expand conditional increment or decrement using adb/sbb instructions.
12671 The default case using setcc followed by the conditional move can be
12672 done by generic code. */
12674 ix86_expand_int_addcc (rtx operands
[])
12676 enum rtx_code code
= GET_CODE (operands
[1]);
12678 rtx val
= const0_rtx
;
12679 bool fpcmp
= false;
12680 enum machine_mode mode
= GET_MODE (operands
[0]);
12682 if (operands
[3] != const1_rtx
12683 && operands
[3] != constm1_rtx
)
12685 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12686 ix86_compare_op1
, &compare_op
))
12688 code
= GET_CODE (compare_op
);
12690 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12691 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12694 code
= ix86_fp_compare_code_to_integer (code
);
12701 PUT_CODE (compare_op
,
12702 reverse_condition_maybe_unordered
12703 (GET_CODE (compare_op
)));
12705 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12707 PUT_MODE (compare_op
, mode
);
12709 /* Construct either adc or sbb insn. */
12710 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12712 switch (GET_MODE (operands
[0]))
12715 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12718 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12721 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12724 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12727 gcc_unreachable ();
12732 switch (GET_MODE (operands
[0]))
12735 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12738 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12741 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12744 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12747 gcc_unreachable ();
12750 return 1; /* DONE */
12754 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12755 works for floating pointer parameters and nonoffsetable memories.
12756 For pushes, it returns just stack offsets; the values will be saved
12757 in the right order. Maximally three parts are generated. */
12760 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12765 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12767 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12769 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12770 gcc_assert (size
>= 2 && size
<= 3);
12772 /* Optimize constant pool reference to immediates. This is used by fp
12773 moves, that force all constants to memory to allow combining. */
12774 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12776 rtx tmp
= maybe_get_pool_constant (operand
);
12781 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12783 /* The only non-offsetable memories we handle are pushes. */
12784 int ok
= push_operand (operand
, VOIDmode
);
12788 operand
= copy_rtx (operand
);
12789 PUT_MODE (operand
, Pmode
);
12790 parts
[0] = parts
[1] = parts
[2] = operand
;
12794 if (GET_CODE (operand
) == CONST_VECTOR
)
12796 enum machine_mode imode
= int_mode_for_mode (mode
);
12797 /* Caution: if we looked through a constant pool memory above,
12798 the operand may actually have a different mode now. That's
12799 ok, since we want to pun this all the way back to an integer. */
12800 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12801 gcc_assert (operand
!= NULL
);
12807 if (mode
== DImode
)
12808 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12811 if (REG_P (operand
))
12813 gcc_assert (reload_completed
);
12814 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12815 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12817 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12819 else if (offsettable_memref_p (operand
))
12821 operand
= adjust_address (operand
, SImode
, 0);
12822 parts
[0] = operand
;
12823 parts
[1] = adjust_address (operand
, SImode
, 4);
12825 parts
[2] = adjust_address (operand
, SImode
, 8);
12827 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12832 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12836 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12837 parts
[2] = gen_int_mode (l
[2], SImode
);
12840 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12843 gcc_unreachable ();
12845 parts
[1] = gen_int_mode (l
[1], SImode
);
12846 parts
[0] = gen_int_mode (l
[0], SImode
);
12849 gcc_unreachable ();
12854 if (mode
== TImode
)
12855 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12856 if (mode
== XFmode
|| mode
== TFmode
)
12858 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12859 if (REG_P (operand
))
12861 gcc_assert (reload_completed
);
12862 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12863 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12865 else if (offsettable_memref_p (operand
))
12867 operand
= adjust_address (operand
, DImode
, 0);
12868 parts
[0] = operand
;
12869 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12871 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12876 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12877 real_to_target (l
, &r
, mode
);
12879 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12880 if (HOST_BITS_PER_WIDE_INT
>= 64)
12883 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12884 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12887 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12889 if (upper_mode
== SImode
)
12890 parts
[1] = gen_int_mode (l
[2], SImode
);
12891 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12894 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12895 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12898 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12901 gcc_unreachable ();
12908 /* Emit insns to perform a move or push of DI, DF, and XF values.
12909 Return false when normal moves are needed; true when all required
12910 insns have been emitted. Operands 2-4 contain the input values
12911 int the correct order; operands 5-7 contain the output values. */
12914 ix86_split_long_move (rtx operands
[])
12919 int collisions
= 0;
12920 enum machine_mode mode
= GET_MODE (operands
[0]);
12922 /* The DFmode expanders may ask us to move double.
12923 For 64bit target this is single move. By hiding the fact
12924 here we simplify i386.md splitters. */
12925 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12927 /* Optimize constant pool reference to immediates. This is used by
12928 fp moves, that force all constants to memory to allow combining. */
12930 if (MEM_P (operands
[1])
12931 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12932 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12933 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12934 if (push_operand (operands
[0], VOIDmode
))
12936 operands
[0] = copy_rtx (operands
[0]);
12937 PUT_MODE (operands
[0], Pmode
);
12940 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12941 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12942 emit_move_insn (operands
[0], operands
[1]);
12946 /* The only non-offsettable memory we handle is push. */
12947 if (push_operand (operands
[0], VOIDmode
))
12950 gcc_assert (!MEM_P (operands
[0])
12951 || offsettable_memref_p (operands
[0]));
12953 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12954 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12956 /* When emitting push, take care for source operands on the stack. */
12957 if (push
&& MEM_P (operands
[1])
12958 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12961 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12962 XEXP (part
[1][2], 0));
12963 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12964 XEXP (part
[1][1], 0));
12967 /* We need to do copy in the right order in case an address register
12968 of the source overlaps the destination. */
12969 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12971 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12973 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12976 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12979 /* Collision in the middle part can be handled by reordering. */
12980 if (collisions
== 1 && nparts
== 3
12981 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12984 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12985 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12988 /* If there are more collisions, we can't handle it by reordering.
12989 Do an lea to the last part and use only one colliding move. */
12990 else if (collisions
> 1)
12996 base
= part
[0][nparts
- 1];
12998 /* Handle the case when the last part isn't valid for lea.
12999 Happens in 64-bit mode storing the 12-byte XFmode. */
13000 if (GET_MODE (base
) != Pmode
)
13001 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13003 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13004 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13005 part
[1][1] = replace_equiv_address (part
[1][1],
13006 plus_constant (base
, UNITS_PER_WORD
));
13008 part
[1][2] = replace_equiv_address (part
[1][2],
13009 plus_constant (base
, 8));
13019 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13020 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13021 emit_move_insn (part
[0][2], part
[1][2]);
13026 /* In 64bit mode we don't have 32bit push available. In case this is
13027 register, it is OK - we will just use larger counterpart. We also
13028 retype memory - these comes from attempt to avoid REX prefix on
13029 moving of second half of TFmode value. */
13030 if (GET_MODE (part
[1][1]) == SImode
)
13032 switch (GET_CODE (part
[1][1]))
13035 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13039 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13043 gcc_unreachable ();
13046 if (GET_MODE (part
[1][0]) == SImode
)
13047 part
[1][0] = part
[1][1];
13050 emit_move_insn (part
[0][1], part
[1][1]);
13051 emit_move_insn (part
[0][0], part
[1][0]);
13055 /* Choose correct order to not overwrite the source before it is copied. */
13056 if ((REG_P (part
[0][0])
13057 && REG_P (part
[1][1])
13058 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13060 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13062 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13066 operands
[2] = part
[0][2];
13067 operands
[3] = part
[0][1];
13068 operands
[4] = part
[0][0];
13069 operands
[5] = part
[1][2];
13070 operands
[6] = part
[1][1];
13071 operands
[7] = part
[1][0];
13075 operands
[2] = part
[0][1];
13076 operands
[3] = part
[0][0];
13077 operands
[5] = part
[1][1];
13078 operands
[6] = part
[1][0];
13085 operands
[2] = part
[0][0];
13086 operands
[3] = part
[0][1];
13087 operands
[4] = part
[0][2];
13088 operands
[5] = part
[1][0];
13089 operands
[6] = part
[1][1];
13090 operands
[7] = part
[1][2];
13094 operands
[2] = part
[0][0];
13095 operands
[3] = part
[0][1];
13096 operands
[5] = part
[1][0];
13097 operands
[6] = part
[1][1];
13101 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13104 if (CONST_INT_P (operands
[5])
13105 && operands
[5] != const0_rtx
13106 && REG_P (operands
[2]))
13108 if (CONST_INT_P (operands
[6])
13109 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13110 operands
[6] = operands
[2];
13113 && CONST_INT_P (operands
[7])
13114 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13115 operands
[7] = operands
[2];
13119 && CONST_INT_P (operands
[6])
13120 && operands
[6] != const0_rtx
13121 && REG_P (operands
[3])
13122 && CONST_INT_P (operands
[7])
13123 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13124 operands
[7] = operands
[3];
13127 emit_move_insn (operands
[2], operands
[5]);
13128 emit_move_insn (operands
[3], operands
[6]);
13130 emit_move_insn (operands
[4], operands
[7]);
13135 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13136 left shift by a constant, either using a single shift or
13137 a sequence of add instructions. */
13140 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13144 emit_insn ((mode
== DImode
13146 : gen_adddi3
) (operand
, operand
, operand
));
13148 else if (!optimize_size
13149 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13152 for (i
=0; i
<count
; i
++)
13154 emit_insn ((mode
== DImode
13156 : gen_adddi3
) (operand
, operand
, operand
));
13160 emit_insn ((mode
== DImode
13162 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13166 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13168 rtx low
[2], high
[2];
13170 const int single_width
= mode
== DImode
? 32 : 64;
13172 if (CONST_INT_P (operands
[2]))
13174 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13175 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13177 if (count
>= single_width
)
13179 emit_move_insn (high
[0], low
[1]);
13180 emit_move_insn (low
[0], const0_rtx
);
13182 if (count
> single_width
)
13183 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13187 if (!rtx_equal_p (operands
[0], operands
[1]))
13188 emit_move_insn (operands
[0], operands
[1]);
13189 emit_insn ((mode
== DImode
13191 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13192 ix86_expand_ashl_const (low
[0], count
, mode
);
13197 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13199 if (operands
[1] == const1_rtx
)
13201 /* Assuming we've chosen a QImode capable registers, then 1 << N
13202 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13203 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13205 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13207 ix86_expand_clear (low
[0]);
13208 ix86_expand_clear (high
[0]);
13209 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13211 d
= gen_lowpart (QImode
, low
[0]);
13212 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13213 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13214 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13216 d
= gen_lowpart (QImode
, high
[0]);
13217 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13218 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13219 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13222 /* Otherwise, we can get the same results by manually performing
13223 a bit extract operation on bit 5/6, and then performing the two
13224 shifts. The two methods of getting 0/1 into low/high are exactly
13225 the same size. Avoiding the shift in the bit extract case helps
13226 pentium4 a bit; no one else seems to care much either way. */
13231 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13232 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13234 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13235 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13237 emit_insn ((mode
== DImode
13239 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13240 emit_insn ((mode
== DImode
13242 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13243 emit_move_insn (low
[0], high
[0]);
13244 emit_insn ((mode
== DImode
13246 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13249 emit_insn ((mode
== DImode
13251 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13252 emit_insn ((mode
== DImode
13254 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13258 if (operands
[1] == constm1_rtx
)
13260 /* For -1 << N, we can avoid the shld instruction, because we
13261 know that we're shifting 0...31/63 ones into a -1. */
13262 emit_move_insn (low
[0], constm1_rtx
);
13264 emit_move_insn (high
[0], low
[0]);
13266 emit_move_insn (high
[0], constm1_rtx
);
13270 if (!rtx_equal_p (operands
[0], operands
[1]))
13271 emit_move_insn (operands
[0], operands
[1]);
13273 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13274 emit_insn ((mode
== DImode
13276 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13279 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13281 if (TARGET_CMOVE
&& scratch
)
13283 ix86_expand_clear (scratch
);
13284 emit_insn ((mode
== DImode
13285 ? gen_x86_shift_adj_1
13286 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13289 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13293 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13295 rtx low
[2], high
[2];
13297 const int single_width
= mode
== DImode
? 32 : 64;
13299 if (CONST_INT_P (operands
[2]))
13301 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13302 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13304 if (count
== single_width
* 2 - 1)
13306 emit_move_insn (high
[0], high
[1]);
13307 emit_insn ((mode
== DImode
13309 : gen_ashrdi3
) (high
[0], high
[0],
13310 GEN_INT (single_width
- 1)));
13311 emit_move_insn (low
[0], high
[0]);
13314 else if (count
>= single_width
)
13316 emit_move_insn (low
[0], high
[1]);
13317 emit_move_insn (high
[0], low
[0]);
13318 emit_insn ((mode
== DImode
13320 : gen_ashrdi3
) (high
[0], high
[0],
13321 GEN_INT (single_width
- 1)));
13322 if (count
> single_width
)
13323 emit_insn ((mode
== DImode
13325 : gen_ashrdi3
) (low
[0], low
[0],
13326 GEN_INT (count
- single_width
)));
13330 if (!rtx_equal_p (operands
[0], operands
[1]))
13331 emit_move_insn (operands
[0], operands
[1]);
13332 emit_insn ((mode
== DImode
13334 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13335 emit_insn ((mode
== DImode
13337 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13342 if (!rtx_equal_p (operands
[0], operands
[1]))
13343 emit_move_insn (operands
[0], operands
[1]);
13345 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13347 emit_insn ((mode
== DImode
13349 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13350 emit_insn ((mode
== DImode
13352 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13354 if (TARGET_CMOVE
&& scratch
)
13356 emit_move_insn (scratch
, high
[0]);
13357 emit_insn ((mode
== DImode
13359 : gen_ashrdi3
) (scratch
, scratch
,
13360 GEN_INT (single_width
- 1)));
13361 emit_insn ((mode
== DImode
13362 ? gen_x86_shift_adj_1
13363 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13367 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13372 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13374 rtx low
[2], high
[2];
13376 const int single_width
= mode
== DImode
? 32 : 64;
13378 if (CONST_INT_P (operands
[2]))
13380 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13381 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13383 if (count
>= single_width
)
13385 emit_move_insn (low
[0], high
[1]);
13386 ix86_expand_clear (high
[0]);
13388 if (count
> single_width
)
13389 emit_insn ((mode
== DImode
13391 : gen_lshrdi3
) (low
[0], low
[0],
13392 GEN_INT (count
- single_width
)));
13396 if (!rtx_equal_p (operands
[0], operands
[1]))
13397 emit_move_insn (operands
[0], operands
[1]);
13398 emit_insn ((mode
== DImode
13400 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13401 emit_insn ((mode
== DImode
13403 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13408 if (!rtx_equal_p (operands
[0], operands
[1]))
13409 emit_move_insn (operands
[0], operands
[1]);
13411 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13413 emit_insn ((mode
== DImode
13415 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13416 emit_insn ((mode
== DImode
13418 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13420 /* Heh. By reversing the arguments, we can reuse this pattern. */
13421 if (TARGET_CMOVE
&& scratch
)
13423 ix86_expand_clear (scratch
);
13424 emit_insn ((mode
== DImode
13425 ? gen_x86_shift_adj_1
13426 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13430 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13434 /* Predict just emitted jump instruction to be taken with probability PROB. */
13436 predict_jump (int prob
)
13438 rtx insn
= get_last_insn ();
13439 gcc_assert (JUMP_P (insn
));
13441 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13446 /* Helper function for the string operations below. Dest VARIABLE whether
13447 it is aligned to VALUE bytes. If true, jump to the label. */
13449 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13451 rtx label
= gen_label_rtx ();
13452 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13453 if (GET_MODE (variable
) == DImode
)
13454 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13456 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13457 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13460 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13462 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13466 /* Adjust COUNTER by the VALUE. */
13468 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13470 if (GET_MODE (countreg
) == DImode
)
13471 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13473 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13476 /* Zero extend possibly SImode EXP to Pmode register. */
13478 ix86_zero_extend_to_Pmode (rtx exp
)
13481 if (GET_MODE (exp
) == VOIDmode
)
13482 return force_reg (Pmode
, exp
);
13483 if (GET_MODE (exp
) == Pmode
)
13484 return copy_to_mode_reg (Pmode
, exp
);
13485 r
= gen_reg_rtx (Pmode
);
13486 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13490 /* Divide COUNTREG by SCALE. */
13492 scale_counter (rtx countreg
, int scale
)
13495 rtx piece_size_mask
;
13499 if (CONST_INT_P (countreg
))
13500 return GEN_INT (INTVAL (countreg
) / scale
);
13501 gcc_assert (REG_P (countreg
));
13503 piece_size_mask
= GEN_INT (scale
- 1);
13504 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13505 GEN_INT (exact_log2 (scale
)),
13506 NULL
, 1, OPTAB_DIRECT
);
13510 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13511 for constant loop counts. */
13513 static enum machine_mode
13514 counter_mode (rtx count_exp
)
13516 if (GET_MODE (count_exp
) != VOIDmode
)
13517 return GET_MODE (count_exp
);
13518 if (GET_CODE (count_exp
) != CONST_INT
)
13520 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13525 /* When SRCPTR is non-NULL, output simple loop to move memory
13526 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13527 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13528 equivalent loop to set memory by VALUE (supposed to be in MODE).
13530 The size is rounded down to whole number of chunk size moved at once.
13531 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13535 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13536 rtx destptr
, rtx srcptr
, rtx value
,
13537 rtx count
, enum machine_mode mode
, int unroll
,
13540 rtx out_label
, top_label
, iter
, tmp
;
13541 enum machine_mode iter_mode
= counter_mode (count
);
13542 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13543 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13549 top_label
= gen_label_rtx ();
13550 out_label
= gen_label_rtx ();
13551 iter
= gen_reg_rtx (iter_mode
);
13553 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13554 NULL
, 1, OPTAB_DIRECT
);
13555 /* Those two should combine. */
13556 if (piece_size
== const1_rtx
)
13558 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13560 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13562 emit_move_insn (iter
, const0_rtx
);
13564 emit_label (top_label
);
13566 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13567 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13568 destmem
= change_address (destmem
, mode
, x_addr
);
13572 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13573 srcmem
= change_address (srcmem
, mode
, y_addr
);
13575 /* When unrolling for chips that reorder memory reads and writes,
13576 we can save registers by using single temporary.
13577 Also using 4 temporaries is overkill in 32bit mode. */
13578 if (!TARGET_64BIT
&& 0)
13580 for (i
= 0; i
< unroll
; i
++)
13585 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13587 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13589 emit_move_insn (destmem
, srcmem
);
13595 gcc_assert (unroll
<= 4);
13596 for (i
= 0; i
< unroll
; i
++)
13598 tmpreg
[i
] = gen_reg_rtx (mode
);
13602 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13604 emit_move_insn (tmpreg
[i
], srcmem
);
13606 for (i
= 0; i
< unroll
; i
++)
13611 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13613 emit_move_insn (destmem
, tmpreg
[i
]);
13618 for (i
= 0; i
< unroll
; i
++)
13622 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13623 emit_move_insn (destmem
, value
);
13626 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13627 true, OPTAB_LIB_WIDEN
);
13629 emit_move_insn (iter
, tmp
);
13631 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13633 if (expected_size
!= -1)
13635 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13636 if (expected_size
== 0)
13638 else if (expected_size
> REG_BR_PROB_BASE
)
13639 predict_jump (REG_BR_PROB_BASE
- 1);
13641 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13644 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13645 iter
= ix86_zero_extend_to_Pmode (iter
);
13646 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13647 true, OPTAB_LIB_WIDEN
);
13648 if (tmp
!= destptr
)
13649 emit_move_insn (destptr
, tmp
);
13652 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13653 true, OPTAB_LIB_WIDEN
);
13655 emit_move_insn (srcptr
, tmp
);
13657 emit_label (out_label
);
13660 /* Output "rep; mov" instruction.
13661 Arguments have same meaning as for previous function */
13663 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13664 rtx destptr
, rtx srcptr
,
13666 enum machine_mode mode
)
13672 /* If the size is known, it is shorter to use rep movs. */
13673 if (mode
== QImode
&& CONST_INT_P (count
)
13674 && !(INTVAL (count
) & 3))
13677 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13678 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13679 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13680 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13681 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13682 if (mode
!= QImode
)
13684 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13685 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13686 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13687 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13688 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13689 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13693 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13694 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13696 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13700 /* Output "rep; stos" instruction.
13701 Arguments have same meaning as for previous function */
13703 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13705 enum machine_mode mode
)
13710 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13711 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13712 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13713 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13714 if (mode
!= QImode
)
13716 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13717 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13718 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13721 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13722 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13726 emit_strmov (rtx destmem
, rtx srcmem
,
13727 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13729 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13730 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13731 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13734 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13736 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13737 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13740 if (CONST_INT_P (count
))
13742 HOST_WIDE_INT countval
= INTVAL (count
);
13745 if ((countval
& 0x10) && max_size
> 16)
13749 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13750 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13753 gcc_unreachable ();
13756 if ((countval
& 0x08) && max_size
> 8)
13759 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13762 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13763 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13767 if ((countval
& 0x04) && max_size
> 4)
13769 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13772 if ((countval
& 0x02) && max_size
> 2)
13774 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13777 if ((countval
& 0x01) && max_size
> 1)
13779 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13786 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13787 count
, 1, OPTAB_DIRECT
);
13788 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13789 count
, QImode
, 1, 4);
13793 /* When there are stringops, we can cheaply increase dest and src pointers.
13794 Otherwise we save code size by maintaining offset (zero is readily
13795 available from preceding rep operation) and using x86 addressing modes.
13797 if (TARGET_SINGLE_STRINGOP
)
13801 rtx label
= ix86_expand_aligntest (count
, 4, true);
13802 src
= change_address (srcmem
, SImode
, srcptr
);
13803 dest
= change_address (destmem
, SImode
, destptr
);
13804 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13805 emit_label (label
);
13806 LABEL_NUSES (label
) = 1;
13810 rtx label
= ix86_expand_aligntest (count
, 2, true);
13811 src
= change_address (srcmem
, HImode
, srcptr
);
13812 dest
= change_address (destmem
, HImode
, destptr
);
13813 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13814 emit_label (label
);
13815 LABEL_NUSES (label
) = 1;
13819 rtx label
= ix86_expand_aligntest (count
, 1, true);
13820 src
= change_address (srcmem
, QImode
, srcptr
);
13821 dest
= change_address (destmem
, QImode
, destptr
);
13822 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13823 emit_label (label
);
13824 LABEL_NUSES (label
) = 1;
13829 rtx offset
= force_reg (Pmode
, const0_rtx
);
13834 rtx label
= ix86_expand_aligntest (count
, 4, true);
13835 src
= change_address (srcmem
, SImode
, srcptr
);
13836 dest
= change_address (destmem
, SImode
, destptr
);
13837 emit_move_insn (dest
, src
);
13838 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13839 true, OPTAB_LIB_WIDEN
);
13841 emit_move_insn (offset
, tmp
);
13842 emit_label (label
);
13843 LABEL_NUSES (label
) = 1;
13847 rtx label
= ix86_expand_aligntest (count
, 2, true);
13848 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13849 src
= change_address (srcmem
, HImode
, tmp
);
13850 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13851 dest
= change_address (destmem
, HImode
, tmp
);
13852 emit_move_insn (dest
, src
);
13853 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13854 true, OPTAB_LIB_WIDEN
);
13856 emit_move_insn (offset
, tmp
);
13857 emit_label (label
);
13858 LABEL_NUSES (label
) = 1;
13862 rtx label
= ix86_expand_aligntest (count
, 1, true);
13863 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13864 src
= change_address (srcmem
, QImode
, tmp
);
13865 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13866 dest
= change_address (destmem
, QImode
, tmp
);
13867 emit_move_insn (dest
, src
);
13868 emit_label (label
);
13869 LABEL_NUSES (label
) = 1;
13874 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13876 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13877 rtx count
, int max_size
)
13880 expand_simple_binop (counter_mode (count
), AND
, count
,
13881 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13882 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13883 gen_lowpart (QImode
, value
), count
, QImode
,
13887 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13889 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13893 if (CONST_INT_P (count
))
13895 HOST_WIDE_INT countval
= INTVAL (count
);
13898 if ((countval
& 0x10) && max_size
> 16)
13902 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13903 emit_insn (gen_strset (destptr
, dest
, value
));
13904 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13905 emit_insn (gen_strset (destptr
, dest
, value
));
13908 gcc_unreachable ();
13911 if ((countval
& 0x08) && max_size
> 8)
13915 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13916 emit_insn (gen_strset (destptr
, dest
, value
));
13920 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13921 emit_insn (gen_strset (destptr
, dest
, value
));
13922 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13923 emit_insn (gen_strset (destptr
, dest
, value
));
13927 if ((countval
& 0x04) && max_size
> 4)
13929 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13930 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13933 if ((countval
& 0x02) && max_size
> 2)
13935 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13936 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13939 if ((countval
& 0x01) && max_size
> 1)
13941 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13942 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13949 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13954 rtx label
= ix86_expand_aligntest (count
, 16, true);
13957 dest
= change_address (destmem
, DImode
, destptr
);
13958 emit_insn (gen_strset (destptr
, dest
, value
));
13959 emit_insn (gen_strset (destptr
, dest
, value
));
13963 dest
= change_address (destmem
, SImode
, destptr
);
13964 emit_insn (gen_strset (destptr
, dest
, value
));
13965 emit_insn (gen_strset (destptr
, dest
, value
));
13966 emit_insn (gen_strset (destptr
, dest
, value
));
13967 emit_insn (gen_strset (destptr
, dest
, value
));
13969 emit_label (label
);
13970 LABEL_NUSES (label
) = 1;
13974 rtx label
= ix86_expand_aligntest (count
, 8, true);
13977 dest
= change_address (destmem
, DImode
, destptr
);
13978 emit_insn (gen_strset (destptr
, dest
, value
));
13982 dest
= change_address (destmem
, SImode
, destptr
);
13983 emit_insn (gen_strset (destptr
, dest
, value
));
13984 emit_insn (gen_strset (destptr
, dest
, value
));
13986 emit_label (label
);
13987 LABEL_NUSES (label
) = 1;
13991 rtx label
= ix86_expand_aligntest (count
, 4, true);
13992 dest
= change_address (destmem
, SImode
, destptr
);
13993 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13994 emit_label (label
);
13995 LABEL_NUSES (label
) = 1;
13999 rtx label
= ix86_expand_aligntest (count
, 2, true);
14000 dest
= change_address (destmem
, HImode
, destptr
);
14001 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14002 emit_label (label
);
14003 LABEL_NUSES (label
) = 1;
14007 rtx label
= ix86_expand_aligntest (count
, 1, true);
14008 dest
= change_address (destmem
, QImode
, destptr
);
14009 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14010 emit_label (label
);
14011 LABEL_NUSES (label
) = 1;
14015 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14016 DESIRED_ALIGNMENT. */
14018 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14019 rtx destptr
, rtx srcptr
, rtx count
,
14020 int align
, int desired_alignment
)
14022 if (align
<= 1 && desired_alignment
> 1)
14024 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14025 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14026 destmem
= change_address (destmem
, QImode
, destptr
);
14027 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14028 ix86_adjust_counter (count
, 1);
14029 emit_label (label
);
14030 LABEL_NUSES (label
) = 1;
14032 if (align
<= 2 && desired_alignment
> 2)
14034 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14035 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14036 destmem
= change_address (destmem
, HImode
, destptr
);
14037 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14038 ix86_adjust_counter (count
, 2);
14039 emit_label (label
);
14040 LABEL_NUSES (label
) = 1;
14042 if (align
<= 4 && desired_alignment
> 4)
14044 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14045 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14046 destmem
= change_address (destmem
, SImode
, destptr
);
14047 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14048 ix86_adjust_counter (count
, 4);
14049 emit_label (label
);
14050 LABEL_NUSES (label
) = 1;
14052 gcc_assert (desired_alignment
<= 8);
14055 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14056 DESIRED_ALIGNMENT. */
14058 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14059 int align
, int desired_alignment
)
14061 if (align
<= 1 && desired_alignment
> 1)
14063 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14064 destmem
= change_address (destmem
, QImode
, destptr
);
14065 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14066 ix86_adjust_counter (count
, 1);
14067 emit_label (label
);
14068 LABEL_NUSES (label
) = 1;
14070 if (align
<= 2 && desired_alignment
> 2)
14072 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14073 destmem
= change_address (destmem
, HImode
, destptr
);
14074 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14075 ix86_adjust_counter (count
, 2);
14076 emit_label (label
);
14077 LABEL_NUSES (label
) = 1;
14079 if (align
<= 4 && desired_alignment
> 4)
14081 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14082 destmem
= change_address (destmem
, SImode
, destptr
);
14083 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14084 ix86_adjust_counter (count
, 4);
14085 emit_label (label
);
14086 LABEL_NUSES (label
) = 1;
14088 gcc_assert (desired_alignment
<= 8);
14091 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14092 static enum stringop_alg
14093 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14094 int *dynamic_check
)
14096 const struct stringop_algs
* algs
;
14098 *dynamic_check
= -1;
14100 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14102 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14103 if (stringop_alg
!= no_stringop
)
14104 return stringop_alg
;
14105 /* rep; movq or rep; movl is the smallest variant. */
14106 else if (optimize_size
)
14108 if (!count
|| (count
& 3))
14109 return rep_prefix_1_byte
;
14111 return rep_prefix_4_byte
;
14113 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14115 else if (expected_size
!= -1 && expected_size
< 4)
14116 return loop_1_byte
;
14117 else if (expected_size
!= -1)
14120 enum stringop_alg alg
= libcall
;
14121 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14123 gcc_assert (algs
->size
[i
].max
);
14124 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14126 if (algs
->size
[i
].alg
!= libcall
)
14127 alg
= algs
->size
[i
].alg
;
14128 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14129 last non-libcall inline algorithm. */
14130 if (TARGET_INLINE_ALL_STRINGOPS
)
14132 /* When the current size is best to be copied by a libcall,
14133 but we are still forced to inline, run the heuristic bellow
14134 that will pick code for medium sized blocks. */
14135 if (alg
!= libcall
)
14140 return algs
->size
[i
].alg
;
14143 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14145 /* When asked to inline the call anyway, try to pick meaningful choice.
14146 We look for maximal size of block that is faster to copy by hand and
14147 take blocks of at most of that size guessing that average size will
14148 be roughly half of the block.
14150 If this turns out to be bad, we might simply specify the preferred
14151 choice in ix86_costs. */
14152 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14153 && algs
->unknown_size
== libcall
)
14156 enum stringop_alg alg
;
14159 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14160 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14161 max
= algs
->size
[i
].max
;
14164 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14165 gcc_assert (*dynamic_check
== -1);
14166 gcc_assert (alg
!= libcall
);
14167 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14168 *dynamic_check
= max
;
14171 return algs
->unknown_size
;
14174 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14175 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14177 decide_alignment (int align
,
14178 enum stringop_alg alg
,
14181 int desired_align
= 0;
14185 gcc_unreachable ();
14187 case unrolled_loop
:
14188 desired_align
= GET_MODE_SIZE (Pmode
);
14190 case rep_prefix_8_byte
:
14193 case rep_prefix_4_byte
:
14194 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14195 copying whole cacheline at once. */
14196 if (TARGET_PENTIUMPRO
)
14201 case rep_prefix_1_byte
:
14202 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14203 copying whole cacheline at once. */
14204 if (TARGET_PENTIUMPRO
)
14218 if (desired_align
< align
)
14219 desired_align
= align
;
14220 if (expected_size
!= -1 && expected_size
< 4)
14221 desired_align
= align
;
14222 return desired_align
;
14225 /* Return the smallest power of 2 greater than VAL. */
14227 smallest_pow2_greater_than (int val
)
14235 /* Expand string move (memcpy) operation. Use i386 string operations when
14236 profitable. expand_clrmem contains similar code. The code depends upon
14237 architecture, block size and alignment, but always has the same
14240 1) Prologue guard: Conditional that jumps up to epilogues for small
14241 blocks that can be handled by epilogue alone. This is faster but
14242 also needed for correctness, since prologue assume the block is larger
14243 than the desired alignment.
14245 Optional dynamic check for size and libcall for large
14246 blocks is emitted here too, with -minline-stringops-dynamically.
14248 2) Prologue: copy first few bytes in order to get destination aligned
14249 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14250 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14251 We emit either a jump tree on power of two sized blocks, or a byte loop.
14253 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14254 with specified algorithm.
14256 4) Epilogue: code copying tail of the block that is too small to be
14257 handled by main body (or up to size guarded by prologue guard). */
14260 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14261 rtx expected_align_exp
, rtx expected_size_exp
)
14267 rtx jump_around_label
= NULL
;
14268 HOST_WIDE_INT align
= 1;
14269 unsigned HOST_WIDE_INT count
= 0;
14270 HOST_WIDE_INT expected_size
= -1;
14271 int size_needed
= 0, epilogue_size_needed
;
14272 int desired_align
= 0;
14273 enum stringop_alg alg
;
14276 if (CONST_INT_P (align_exp
))
14277 align
= INTVAL (align_exp
);
14278 /* i386 can do misaligned access on reasonably increased cost. */
14279 if (CONST_INT_P (expected_align_exp
)
14280 && INTVAL (expected_align_exp
) > align
)
14281 align
= INTVAL (expected_align_exp
);
14282 if (CONST_INT_P (count_exp
))
14283 count
= expected_size
= INTVAL (count_exp
);
14284 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14285 expected_size
= INTVAL (expected_size_exp
);
14287 /* Step 0: Decide on preferred algorithm, desired alignment and
14288 size of chunks to be copied by main loop. */
14290 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14291 desired_align
= decide_alignment (align
, alg
, expected_size
);
14293 if (!TARGET_ALIGN_STRINGOPS
)
14294 align
= desired_align
;
14296 if (alg
== libcall
)
14298 gcc_assert (alg
!= no_stringop
);
14300 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14301 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14302 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14307 gcc_unreachable ();
14309 size_needed
= GET_MODE_SIZE (Pmode
);
14311 case unrolled_loop
:
14312 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14314 case rep_prefix_8_byte
:
14317 case rep_prefix_4_byte
:
14320 case rep_prefix_1_byte
:
14326 epilogue_size_needed
= size_needed
;
14328 /* Step 1: Prologue guard. */
14330 /* Alignment code needs count to be in register. */
14331 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14333 enum machine_mode mode
= SImode
;
14334 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14336 count_exp
= force_reg (mode
, count_exp
);
14338 gcc_assert (desired_align
>= 1 && align
>= 1);
14340 /* Ensure that alignment prologue won't copy past end of block. */
14341 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14343 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14344 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14345 Make sure it is power of 2. */
14346 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14348 label
= gen_label_rtx ();
14349 emit_cmp_and_jump_insns (count_exp
,
14350 GEN_INT (epilogue_size_needed
),
14351 LTU
, 0, counter_mode (count_exp
), 1, label
);
14352 if (GET_CODE (count_exp
) == CONST_INT
)
14354 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14355 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14357 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14359 /* Emit code to decide on runtime whether library call or inline should be
14361 if (dynamic_check
!= -1)
14363 rtx hot_label
= gen_label_rtx ();
14364 jump_around_label
= gen_label_rtx ();
14365 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14366 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14367 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14368 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14369 emit_jump (jump_around_label
);
14370 emit_label (hot_label
);
14373 /* Step 2: Alignment prologue. */
14375 if (desired_align
> align
)
14377 /* Except for the first move in epilogue, we no longer know
14378 constant offset in aliasing info. It don't seems to worth
14379 the pain to maintain it for the first move, so throw away
14381 src
= change_address (src
, BLKmode
, srcreg
);
14382 dst
= change_address (dst
, BLKmode
, destreg
);
14383 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14386 if (label
&& size_needed
== 1)
14388 emit_label (label
);
14389 LABEL_NUSES (label
) = 1;
14393 /* Step 3: Main loop. */
14399 gcc_unreachable ();
14401 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14402 count_exp
, QImode
, 1, expected_size
);
14405 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14406 count_exp
, Pmode
, 1, expected_size
);
14408 case unrolled_loop
:
14409 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14410 registers for 4 temporaries anyway. */
14411 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14412 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14415 case rep_prefix_8_byte
:
14416 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14419 case rep_prefix_4_byte
:
14420 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14423 case rep_prefix_1_byte
:
14424 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14428 /* Adjust properly the offset of src and dest memory for aliasing. */
14429 if (CONST_INT_P (count_exp
))
14431 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14432 (count
/ size_needed
) * size_needed
);
14433 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14434 (count
/ size_needed
) * size_needed
);
14438 src
= change_address (src
, BLKmode
, srcreg
);
14439 dst
= change_address (dst
, BLKmode
, destreg
);
14442 /* Step 4: Epilogue to copy the remaining bytes. */
14446 /* When the main loop is done, COUNT_EXP might hold original count,
14447 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14448 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14449 bytes. Compensate if needed. */
14451 if (size_needed
< epilogue_size_needed
)
14454 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14455 GEN_INT (size_needed
- 1), count_exp
, 1,
14457 if (tmp
!= count_exp
)
14458 emit_move_insn (count_exp
, tmp
);
14460 emit_label (label
);
14461 LABEL_NUSES (label
) = 1;
14464 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14465 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14466 epilogue_size_needed
);
14467 if (jump_around_label
)
14468 emit_label (jump_around_label
);
14472 /* Helper function for memcpy. For QImode value 0xXY produce
14473 0xXYXYXYXY of wide specified by MODE. This is essentially
14474 a * 0x10101010, but we can do slightly better than
14475 synth_mult by unwinding the sequence by hand on CPUs with
14478 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14480 enum machine_mode valmode
= GET_MODE (val
);
14482 int nops
= mode
== DImode
? 3 : 2;
14484 gcc_assert (mode
== SImode
|| mode
== DImode
);
14485 if (val
== const0_rtx
)
14486 return copy_to_mode_reg (mode
, const0_rtx
);
14487 if (CONST_INT_P (val
))
14489 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14493 if (mode
== DImode
)
14494 v
|= (v
<< 16) << 16;
14495 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14498 if (valmode
== VOIDmode
)
14500 if (valmode
!= QImode
)
14501 val
= gen_lowpart (QImode
, val
);
14502 if (mode
== QImode
)
14504 if (!TARGET_PARTIAL_REG_STALL
)
14506 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14507 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14508 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14509 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14511 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14512 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14513 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14518 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14520 if (!TARGET_PARTIAL_REG_STALL
)
14521 if (mode
== SImode
)
14522 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14524 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14527 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14528 NULL
, 1, OPTAB_DIRECT
);
14530 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14532 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14533 NULL
, 1, OPTAB_DIRECT
);
14534 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14535 if (mode
== SImode
)
14537 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14538 NULL
, 1, OPTAB_DIRECT
);
14539 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14544 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14545 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14546 alignment from ALIGN to DESIRED_ALIGN. */
14548 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14553 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14554 promoted_val
= promote_duplicated_reg (DImode
, val
);
14555 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14556 promoted_val
= promote_duplicated_reg (SImode
, val
);
14557 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14558 promoted_val
= promote_duplicated_reg (HImode
, val
);
14560 promoted_val
= val
;
14562 return promoted_val
;
14565 /* Expand string clear operation (bzero). Use i386 string operations when
14566 profitable. See expand_movmem comment for explanation of individual
14567 steps performed. */
14569 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14570 rtx expected_align_exp
, rtx expected_size_exp
)
14575 rtx jump_around_label
= NULL
;
14576 HOST_WIDE_INT align
= 1;
14577 unsigned HOST_WIDE_INT count
= 0;
14578 HOST_WIDE_INT expected_size
= -1;
14579 int size_needed
= 0, epilogue_size_needed
;
14580 int desired_align
= 0;
14581 enum stringop_alg alg
;
14582 rtx promoted_val
= NULL
;
14583 bool force_loopy_epilogue
= false;
14586 if (CONST_INT_P (align_exp
))
14587 align
= INTVAL (align_exp
);
14588 /* i386 can do misaligned access on reasonably increased cost. */
14589 if (CONST_INT_P (expected_align_exp
)
14590 && INTVAL (expected_align_exp
) > align
)
14591 align
= INTVAL (expected_align_exp
);
14592 if (CONST_INT_P (count_exp
))
14593 count
= expected_size
= INTVAL (count_exp
);
14594 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14595 expected_size
= INTVAL (expected_size_exp
);
14597 /* Step 0: Decide on preferred algorithm, desired alignment and
14598 size of chunks to be copied by main loop. */
14600 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14601 desired_align
= decide_alignment (align
, alg
, expected_size
);
14603 if (!TARGET_ALIGN_STRINGOPS
)
14604 align
= desired_align
;
14606 if (alg
== libcall
)
14608 gcc_assert (alg
!= no_stringop
);
14610 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14611 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14616 gcc_unreachable ();
14618 size_needed
= GET_MODE_SIZE (Pmode
);
14620 case unrolled_loop
:
14621 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14623 case rep_prefix_8_byte
:
14626 case rep_prefix_4_byte
:
14629 case rep_prefix_1_byte
:
14634 epilogue_size_needed
= size_needed
;
14636 /* Step 1: Prologue guard. */
14638 /* Alignment code needs count to be in register. */
14639 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14641 enum machine_mode mode
= SImode
;
14642 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14644 count_exp
= force_reg (mode
, count_exp
);
14646 /* Do the cheap promotion to allow better CSE across the
14647 main loop and epilogue (ie one load of the big constant in the
14648 front of all code. */
14649 if (CONST_INT_P (val_exp
))
14650 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14651 desired_align
, align
);
14652 /* Ensure that alignment prologue won't copy past end of block. */
14653 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14655 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14656 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14657 Make sure it is power of 2. */
14658 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14660 /* To improve performance of small blocks, we jump around the VAL
14661 promoting mode. This mean that if the promoted VAL is not constant,
14662 we might not use it in the epilogue and have to use byte
14664 if (epilogue_size_needed
> 2 && !promoted_val
)
14665 force_loopy_epilogue
= true;
14666 label
= gen_label_rtx ();
14667 emit_cmp_and_jump_insns (count_exp
,
14668 GEN_INT (epilogue_size_needed
),
14669 LTU
, 0, counter_mode (count_exp
), 1, label
);
14670 if (GET_CODE (count_exp
) == CONST_INT
)
14672 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14673 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14675 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14677 if (dynamic_check
!= -1)
14679 rtx hot_label
= gen_label_rtx ();
14680 jump_around_label
= gen_label_rtx ();
14681 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14682 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14683 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14684 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14685 emit_jump (jump_around_label
);
14686 emit_label (hot_label
);
14689 /* Step 2: Alignment prologue. */
14691 /* Do the expensive promotion once we branched off the small blocks. */
14693 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14694 desired_align
, align
);
14695 gcc_assert (desired_align
>= 1 && align
>= 1);
14697 if (desired_align
> align
)
14699 /* Except for the first move in epilogue, we no longer know
14700 constant offset in aliasing info. It don't seems to worth
14701 the pain to maintain it for the first move, so throw away
14703 dst
= change_address (dst
, BLKmode
, destreg
);
14704 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14707 if (label
&& size_needed
== 1)
14709 emit_label (label
);
14710 LABEL_NUSES (label
) = 1;
14714 /* Step 3: Main loop. */
14720 gcc_unreachable ();
14722 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14723 count_exp
, QImode
, 1, expected_size
);
14726 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14727 count_exp
, Pmode
, 1, expected_size
);
14729 case unrolled_loop
:
14730 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14731 count_exp
, Pmode
, 4, expected_size
);
14733 case rep_prefix_8_byte
:
14734 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14737 case rep_prefix_4_byte
:
14738 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14741 case rep_prefix_1_byte
:
14742 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14746 /* Adjust properly the offset of src and dest memory for aliasing. */
14747 if (CONST_INT_P (count_exp
))
14748 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14749 (count
/ size_needed
) * size_needed
);
14751 dst
= change_address (dst
, BLKmode
, destreg
);
14753 /* Step 4: Epilogue to copy the remaining bytes. */
14757 /* When the main loop is done, COUNT_EXP might hold original count,
14758 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14759 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14760 bytes. Compensate if needed. */
14762 if (size_needed
< desired_align
- align
)
14765 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14766 GEN_INT (size_needed
- 1), count_exp
, 1,
14768 size_needed
= desired_align
- align
+ 1;
14769 if (tmp
!= count_exp
)
14770 emit_move_insn (count_exp
, tmp
);
14772 emit_label (label
);
14773 LABEL_NUSES (label
) = 1;
14775 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14777 if (force_loopy_epilogue
)
14778 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14781 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14784 if (jump_around_label
)
14785 emit_label (jump_around_label
);
14789 /* Expand strlen. */
14791 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14793 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14795 /* The generic case of strlen expander is long. Avoid it's
14796 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14798 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14799 && !TARGET_INLINE_ALL_STRINGOPS
14801 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14804 addr
= force_reg (Pmode
, XEXP (src
, 0));
14805 scratch1
= gen_reg_rtx (Pmode
);
14807 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14810 /* Well it seems that some optimizer does not combine a call like
14811 foo(strlen(bar), strlen(bar));
14812 when the move and the subtraction is done here. It does calculate
14813 the length just once when these instructions are done inside of
14814 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14815 often used and I use one fewer register for the lifetime of
14816 output_strlen_unroll() this is better. */
14818 emit_move_insn (out
, addr
);
14820 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14822 /* strlensi_unroll_1 returns the address of the zero at the end of
14823 the string, like memchr(), so compute the length by subtracting
14824 the start address. */
14826 emit_insn (gen_subdi3 (out
, out
, addr
));
14828 emit_insn (gen_subsi3 (out
, out
, addr
));
14833 scratch2
= gen_reg_rtx (Pmode
);
14834 scratch3
= gen_reg_rtx (Pmode
);
14835 scratch4
= force_reg (Pmode
, constm1_rtx
);
14837 emit_move_insn (scratch3
, addr
);
14838 eoschar
= force_reg (QImode
, eoschar
);
14840 src
= replace_equiv_address_nv (src
, scratch3
);
14842 /* If .md starts supporting :P, this can be done in .md. */
14843 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14844 scratch4
), UNSPEC_SCAS
);
14845 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14848 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14849 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14853 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14854 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14860 /* Expand the appropriate insns for doing strlen if not just doing
14863 out = result, initialized with the start address
14864 align_rtx = alignment of the address.
14865 scratch = scratch register, initialized with the startaddress when
14866 not aligned, otherwise undefined
14868 This is just the body. It needs the initializations mentioned above and
14869 some address computing at the end. These things are done in i386.md. */
14872 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14876 rtx align_2_label
= NULL_RTX
;
14877 rtx align_3_label
= NULL_RTX
;
14878 rtx align_4_label
= gen_label_rtx ();
14879 rtx end_0_label
= gen_label_rtx ();
14881 rtx tmpreg
= gen_reg_rtx (SImode
);
14882 rtx scratch
= gen_reg_rtx (SImode
);
14886 if (CONST_INT_P (align_rtx
))
14887 align
= INTVAL (align_rtx
);
14889 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14891 /* Is there a known alignment and is it less than 4? */
14894 rtx scratch1
= gen_reg_rtx (Pmode
);
14895 emit_move_insn (scratch1
, out
);
14896 /* Is there a known alignment and is it not 2? */
14899 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14900 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14902 /* Leave just the 3 lower bits. */
14903 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14904 NULL_RTX
, 0, OPTAB_WIDEN
);
14906 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14907 Pmode
, 1, align_4_label
);
14908 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14909 Pmode
, 1, align_2_label
);
14910 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14911 Pmode
, 1, align_3_label
);
14915 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14916 check if is aligned to 4 - byte. */
14918 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14919 NULL_RTX
, 0, OPTAB_WIDEN
);
14921 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14922 Pmode
, 1, align_4_label
);
14925 mem
= change_address (src
, QImode
, out
);
14927 /* Now compare the bytes. */
14929 /* Compare the first n unaligned byte on a byte per byte basis. */
14930 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14931 QImode
, 1, end_0_label
);
14933 /* Increment the address. */
14935 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14937 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14939 /* Not needed with an alignment of 2 */
14942 emit_label (align_2_label
);
14944 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14948 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14950 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14952 emit_label (align_3_label
);
14955 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14959 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14961 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14964 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14965 align this loop. It gives only huge programs, but does not help to
14967 emit_label (align_4_label
);
14969 mem
= change_address (src
, SImode
, out
);
14970 emit_move_insn (scratch
, mem
);
14972 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14974 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14976 /* This formula yields a nonzero result iff one of the bytes is zero.
14977 This saves three branches inside loop and many cycles. */
14979 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14980 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14981 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14982 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14983 gen_int_mode (0x80808080, SImode
)));
14984 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14989 rtx reg
= gen_reg_rtx (SImode
);
14990 rtx reg2
= gen_reg_rtx (Pmode
);
14991 emit_move_insn (reg
, tmpreg
);
14992 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14994 /* If zero is not in the first two bytes, move two bytes forward. */
14995 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14996 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14997 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14998 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14999 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15002 /* Emit lea manually to avoid clobbering of flags. */
15003 emit_insn (gen_rtx_SET (SImode
, reg2
,
15004 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15006 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15007 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15008 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15009 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15016 rtx end_2_label
= gen_label_rtx ();
15017 /* Is zero in the first two bytes? */
15019 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15020 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15021 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15022 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15023 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15025 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15026 JUMP_LABEL (tmp
) = end_2_label
;
15028 /* Not in the first two. Move two bytes forward. */
15029 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15031 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15033 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15035 emit_label (end_2_label
);
15039 /* Avoid branch in fixing the byte. */
15040 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15041 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15042 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
15044 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15046 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15048 emit_label (end_0_label
);
15051 /* For given symbol (function) construct code to compute address of it's PLT
15052 entry in large x86-64 PIC model. */
15054 construct_plt_address (rtx symbol
)
15056 rtx tmp
= gen_reg_rtx (Pmode
);
15057 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15059 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15060 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15062 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15063 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15068 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15069 rtx callarg2 ATTRIBUTE_UNUSED
,
15070 rtx pop
, int sibcall
)
15072 rtx use
= NULL
, call
;
15074 if (pop
== const0_rtx
)
15076 gcc_assert (!TARGET_64BIT
|| !pop
);
15078 if (TARGET_MACHO
&& !TARGET_64BIT
)
15081 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15082 fnaddr
= machopic_indirect_call_target (fnaddr
);
15087 /* Static functions and indirect calls don't need the pic register. */
15088 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15089 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15090 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15091 use_reg (&use
, pic_offset_table_rtx
);
15094 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15096 rtx al
= gen_rtx_REG (QImode
, 0);
15097 emit_move_insn (al
, callarg2
);
15098 use_reg (&use
, al
);
15101 if (ix86_cmodel
== CM_LARGE_PIC
15102 && GET_CODE (fnaddr
) == MEM
15103 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15104 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15105 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15106 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15108 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15109 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15111 if (sibcall
&& TARGET_64BIT
15112 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15115 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15116 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15117 emit_move_insn (fnaddr
, addr
);
15118 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15121 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15123 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15126 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15127 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15128 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15131 call
= emit_call_insn (call
);
15133 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15137 /* Clear stack slot assignments remembered from previous functions.
15138 This is called from INIT_EXPANDERS once before RTL is emitted for each
15141 static struct machine_function
*
15142 ix86_init_machine_status (void)
15144 struct machine_function
*f
;
15146 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15147 f
->use_fast_prologue_epilogue_nregs
= -1;
15148 f
->tls_descriptor_call_expanded_p
= 0;
15153 /* Return a MEM corresponding to a stack slot with mode MODE.
15154 Allocate a new slot if necessary.
15156 The RTL for a function can have several slots available: N is
15157 which slot to use. */
15160 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15162 struct stack_local_entry
*s
;
15164 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15166 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15167 if (s
->mode
== mode
&& s
->n
== n
)
15168 return copy_rtx (s
->rtl
);
15170 s
= (struct stack_local_entry
*)
15171 ggc_alloc (sizeof (struct stack_local_entry
));
15174 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15176 s
->next
= ix86_stack_locals
;
15177 ix86_stack_locals
= s
;
15181 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15183 static GTY(()) rtx ix86_tls_symbol
;
15185 ix86_tls_get_addr (void)
15188 if (!ix86_tls_symbol
)
15190 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15191 (TARGET_ANY_GNU_TLS
15193 ? "___tls_get_addr"
15194 : "__tls_get_addr");
15197 return ix86_tls_symbol
;
15200 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15202 static GTY(()) rtx ix86_tls_module_base_symbol
;
15204 ix86_tls_module_base (void)
15207 if (!ix86_tls_module_base_symbol
)
15209 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15210 "_TLS_MODULE_BASE_");
15211 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15212 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15215 return ix86_tls_module_base_symbol
;
15218 /* Calculate the length of the memory address in the instruction
15219 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15222 memory_address_length (rtx addr
)
15224 struct ix86_address parts
;
15225 rtx base
, index
, disp
;
15229 if (GET_CODE (addr
) == PRE_DEC
15230 || GET_CODE (addr
) == POST_INC
15231 || GET_CODE (addr
) == PRE_MODIFY
15232 || GET_CODE (addr
) == POST_MODIFY
)
15235 ok
= ix86_decompose_address (addr
, &parts
);
15238 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15239 parts
.base
= SUBREG_REG (parts
.base
);
15240 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15241 parts
.index
= SUBREG_REG (parts
.index
);
15244 index
= parts
.index
;
15249 - esp as the base always wants an index,
15250 - ebp as the base always wants a displacement. */
15252 /* Register Indirect. */
15253 if (base
&& !index
&& !disp
)
15255 /* esp (for its index) and ebp (for its displacement) need
15256 the two-byte modrm form. */
15257 if (addr
== stack_pointer_rtx
15258 || addr
== arg_pointer_rtx
15259 || addr
== frame_pointer_rtx
15260 || addr
== hard_frame_pointer_rtx
)
15264 /* Direct Addressing. */
15265 else if (disp
&& !base
&& !index
)
15270 /* Find the length of the displacement constant. */
15273 if (base
&& satisfies_constraint_K (disp
))
15278 /* ebp always wants a displacement. */
15279 else if (base
== hard_frame_pointer_rtx
)
15282 /* An index requires the two-byte modrm form.... */
15284 /* ...like esp, which always wants an index. */
15285 || base
== stack_pointer_rtx
15286 || base
== arg_pointer_rtx
15287 || base
== frame_pointer_rtx
)
15294 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15295 is set, expect that insn have 8bit immediate alternative. */
15297 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15301 extract_insn_cached (insn
);
15302 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15303 if (CONSTANT_P (recog_data
.operand
[i
]))
15306 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15310 switch (get_attr_mode (insn
))
15321 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15326 fatal_insn ("unknown insn mode", insn
);
15332 /* Compute default value for "length_address" attribute. */
15334 ix86_attr_length_address_default (rtx insn
)
15338 if (get_attr_type (insn
) == TYPE_LEA
)
15340 rtx set
= PATTERN (insn
);
15342 if (GET_CODE (set
) == PARALLEL
)
15343 set
= XVECEXP (set
, 0, 0);
15345 gcc_assert (GET_CODE (set
) == SET
);
15347 return memory_address_length (SET_SRC (set
));
15350 extract_insn_cached (insn
);
15351 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15352 if (MEM_P (recog_data
.operand
[i
]))
15354 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15360 /* Return the maximum number of instructions a cpu can issue. */
15363 ix86_issue_rate (void)
15367 case PROCESSOR_PENTIUM
:
15371 case PROCESSOR_PENTIUMPRO
:
15372 case PROCESSOR_PENTIUM4
:
15373 case PROCESSOR_ATHLON
:
15375 case PROCESSOR_AMDFAM10
:
15376 case PROCESSOR_NOCONA
:
15377 case PROCESSOR_GENERIC32
:
15378 case PROCESSOR_GENERIC64
:
15381 case PROCESSOR_CORE2
:
15389 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15390 by DEP_INSN and nothing set by DEP_INSN. */
15393 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15397 /* Simplify the test for uninteresting insns. */
15398 if (insn_type
!= TYPE_SETCC
15399 && insn_type
!= TYPE_ICMOV
15400 && insn_type
!= TYPE_FCMOV
15401 && insn_type
!= TYPE_IBR
)
15404 if ((set
= single_set (dep_insn
)) != 0)
15406 set
= SET_DEST (set
);
15409 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15410 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15411 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15412 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15414 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15415 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15420 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15423 /* This test is true if the dependent insn reads the flags but
15424 not any other potentially set register. */
15425 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15428 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15434 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15435 address with operands set by DEP_INSN. */
15438 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15442 if (insn_type
== TYPE_LEA
15445 addr
= PATTERN (insn
);
15447 if (GET_CODE (addr
) == PARALLEL
)
15448 addr
= XVECEXP (addr
, 0, 0);
15450 gcc_assert (GET_CODE (addr
) == SET
);
15452 addr
= SET_SRC (addr
);
15457 extract_insn_cached (insn
);
15458 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15459 if (MEM_P (recog_data
.operand
[i
]))
15461 addr
= XEXP (recog_data
.operand
[i
], 0);
15468 return modified_in_p (addr
, dep_insn
);
15472 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15474 enum attr_type insn_type
, dep_insn_type
;
15475 enum attr_memory memory
;
15477 int dep_insn_code_number
;
15479 /* Anti and output dependencies have zero cost on all CPUs. */
15480 if (REG_NOTE_KIND (link
) != 0)
15483 dep_insn_code_number
= recog_memoized (dep_insn
);
15485 /* If we can't recognize the insns, we can't really do anything. */
15486 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15489 insn_type
= get_attr_type (insn
);
15490 dep_insn_type
= get_attr_type (dep_insn
);
15494 case PROCESSOR_PENTIUM
:
15495 /* Address Generation Interlock adds a cycle of latency. */
15496 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15499 /* ??? Compares pair with jump/setcc. */
15500 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15503 /* Floating point stores require value to be ready one cycle earlier. */
15504 if (insn_type
== TYPE_FMOV
15505 && get_attr_memory (insn
) == MEMORY_STORE
15506 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15510 case PROCESSOR_PENTIUMPRO
:
15511 memory
= get_attr_memory (insn
);
15513 /* INT->FP conversion is expensive. */
15514 if (get_attr_fp_int_src (dep_insn
))
15517 /* There is one cycle extra latency between an FP op and a store. */
15518 if (insn_type
== TYPE_FMOV
15519 && (set
= single_set (dep_insn
)) != NULL_RTX
15520 && (set2
= single_set (insn
)) != NULL_RTX
15521 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15522 && MEM_P (SET_DEST (set2
)))
15525 /* Show ability of reorder buffer to hide latency of load by executing
15526 in parallel with previous instruction in case
15527 previous instruction is not needed to compute the address. */
15528 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15529 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15531 /* Claim moves to take one cycle, as core can issue one load
15532 at time and the next load can start cycle later. */
15533 if (dep_insn_type
== TYPE_IMOV
15534 || dep_insn_type
== TYPE_FMOV
)
15542 memory
= get_attr_memory (insn
);
15544 /* The esp dependency is resolved before the instruction is really
15546 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15547 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15550 /* INT->FP conversion is expensive. */
15551 if (get_attr_fp_int_src (dep_insn
))
15554 /* Show ability of reorder buffer to hide latency of load by executing
15555 in parallel with previous instruction in case
15556 previous instruction is not needed to compute the address. */
15557 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15558 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15560 /* Claim moves to take one cycle, as core can issue one load
15561 at time and the next load can start cycle later. */
15562 if (dep_insn_type
== TYPE_IMOV
15563 || dep_insn_type
== TYPE_FMOV
)
15572 case PROCESSOR_ATHLON
:
15574 case PROCESSOR_AMDFAM10
:
15575 case PROCESSOR_GENERIC32
:
15576 case PROCESSOR_GENERIC64
:
15577 memory
= get_attr_memory (insn
);
15579 /* Show ability of reorder buffer to hide latency of load by executing
15580 in parallel with previous instruction in case
15581 previous instruction is not needed to compute the address. */
15582 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15583 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15585 enum attr_unit unit
= get_attr_unit (insn
);
15588 /* Because of the difference between the length of integer and
15589 floating unit pipeline preparation stages, the memory operands
15590 for floating point are cheaper.
15592 ??? For Athlon it the difference is most probably 2. */
15593 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15596 loadcost
= TARGET_ATHLON
? 2 : 0;
15598 if (cost
>= loadcost
)
15611 /* How many alternative schedules to try. This should be as wide as the
15612 scheduling freedom in the DFA, but no wider. Making this value too
15613 large results extra work for the scheduler. */
15616 ia32_multipass_dfa_lookahead (void)
15618 if (ix86_tune
== PROCESSOR_PENTIUM
)
15621 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15622 || ix86_tune
== PROCESSOR_K6
)
15630 /* Compute the alignment given to a constant that is being placed in memory.
15631 EXP is the constant and ALIGN is the alignment that the object would
15633 The value of this function is used instead of that alignment to align
15637 ix86_constant_alignment (tree exp
, int align
)
15639 if (TREE_CODE (exp
) == REAL_CST
)
15641 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15643 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15646 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15647 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15648 return BITS_PER_WORD
;
15653 /* Compute the alignment for a static variable.
15654 TYPE is the data type, and ALIGN is the alignment that
15655 the object would ordinarily have. The value of this function is used
15656 instead of that alignment to align the object. */
15659 ix86_data_alignment (tree type
, int align
)
15661 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15663 if (AGGREGATE_TYPE_P (type
)
15664 && TYPE_SIZE (type
)
15665 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15666 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15667 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15668 && align
< max_align
)
15671 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15672 to 16byte boundary. */
15675 if (AGGREGATE_TYPE_P (type
)
15676 && TYPE_SIZE (type
)
15677 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15678 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15679 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15683 if (TREE_CODE (type
) == ARRAY_TYPE
)
15685 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15687 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15690 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15693 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15695 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15698 else if ((TREE_CODE (type
) == RECORD_TYPE
15699 || TREE_CODE (type
) == UNION_TYPE
15700 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15701 && TYPE_FIELDS (type
))
15703 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15705 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15708 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15709 || TREE_CODE (type
) == INTEGER_TYPE
)
15711 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15713 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15720 /* Compute the alignment for a local variable.
15721 TYPE is the data type, and ALIGN is the alignment that
15722 the object would ordinarily have. The value of this macro is used
15723 instead of that alignment to align the object. */
15726 ix86_local_alignment (tree type
, int align
)
15728 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15729 to 16byte boundary. */
15732 if (AGGREGATE_TYPE_P (type
)
15733 && TYPE_SIZE (type
)
15734 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15735 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15736 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15739 if (TREE_CODE (type
) == ARRAY_TYPE
)
15741 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15743 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15746 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15748 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15750 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15753 else if ((TREE_CODE (type
) == RECORD_TYPE
15754 || TREE_CODE (type
) == UNION_TYPE
15755 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15756 && TYPE_FIELDS (type
))
15758 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15760 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15763 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15764 || TREE_CODE (type
) == INTEGER_TYPE
)
15767 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15769 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15775 /* Emit RTL insns to initialize the variable parts of a trampoline.
15776 FNADDR is an RTX for the address of the function's pure code.
15777 CXT is an RTX for the static chain value for the function. */
15779 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15783 /* Compute offset from the end of the jmp to the target function. */
15784 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15785 plus_constant (tramp
, 10),
15786 NULL_RTX
, 1, OPTAB_DIRECT
);
15787 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15788 gen_int_mode (0xb9, QImode
));
15789 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15790 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15791 gen_int_mode (0xe9, QImode
));
15792 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15797 /* Try to load address using shorter movl instead of movabs.
15798 We may want to support movq for kernel mode, but kernel does not use
15799 trampolines at the moment. */
15800 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15802 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15803 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15804 gen_int_mode (0xbb41, HImode
));
15805 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15806 gen_lowpart (SImode
, fnaddr
));
15811 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15812 gen_int_mode (0xbb49, HImode
));
15813 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15817 /* Load static chain using movabs to r10. */
15818 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15819 gen_int_mode (0xba49, HImode
));
15820 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15823 /* Jump to the r11 */
15824 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15825 gen_int_mode (0xff49, HImode
));
15826 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15827 gen_int_mode (0xe3, QImode
));
15829 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15832 #ifdef ENABLE_EXECUTE_STACK
15833 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15834 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15838 /* Codes for all the SSE/MMX builtins. */
15841 IX86_BUILTIN_ADDPS
,
15842 IX86_BUILTIN_ADDSS
,
15843 IX86_BUILTIN_DIVPS
,
15844 IX86_BUILTIN_DIVSS
,
15845 IX86_BUILTIN_MULPS
,
15846 IX86_BUILTIN_MULSS
,
15847 IX86_BUILTIN_SUBPS
,
15848 IX86_BUILTIN_SUBSS
,
15850 IX86_BUILTIN_CMPEQPS
,
15851 IX86_BUILTIN_CMPLTPS
,
15852 IX86_BUILTIN_CMPLEPS
,
15853 IX86_BUILTIN_CMPGTPS
,
15854 IX86_BUILTIN_CMPGEPS
,
15855 IX86_BUILTIN_CMPNEQPS
,
15856 IX86_BUILTIN_CMPNLTPS
,
15857 IX86_BUILTIN_CMPNLEPS
,
15858 IX86_BUILTIN_CMPNGTPS
,
15859 IX86_BUILTIN_CMPNGEPS
,
15860 IX86_BUILTIN_CMPORDPS
,
15861 IX86_BUILTIN_CMPUNORDPS
,
15862 IX86_BUILTIN_CMPEQSS
,
15863 IX86_BUILTIN_CMPLTSS
,
15864 IX86_BUILTIN_CMPLESS
,
15865 IX86_BUILTIN_CMPNEQSS
,
15866 IX86_BUILTIN_CMPNLTSS
,
15867 IX86_BUILTIN_CMPNLESS
,
15868 IX86_BUILTIN_CMPNGTSS
,
15869 IX86_BUILTIN_CMPNGESS
,
15870 IX86_BUILTIN_CMPORDSS
,
15871 IX86_BUILTIN_CMPUNORDSS
,
15873 IX86_BUILTIN_COMIEQSS
,
15874 IX86_BUILTIN_COMILTSS
,
15875 IX86_BUILTIN_COMILESS
,
15876 IX86_BUILTIN_COMIGTSS
,
15877 IX86_BUILTIN_COMIGESS
,
15878 IX86_BUILTIN_COMINEQSS
,
15879 IX86_BUILTIN_UCOMIEQSS
,
15880 IX86_BUILTIN_UCOMILTSS
,
15881 IX86_BUILTIN_UCOMILESS
,
15882 IX86_BUILTIN_UCOMIGTSS
,
15883 IX86_BUILTIN_UCOMIGESS
,
15884 IX86_BUILTIN_UCOMINEQSS
,
15886 IX86_BUILTIN_CVTPI2PS
,
15887 IX86_BUILTIN_CVTPS2PI
,
15888 IX86_BUILTIN_CVTSI2SS
,
15889 IX86_BUILTIN_CVTSI642SS
,
15890 IX86_BUILTIN_CVTSS2SI
,
15891 IX86_BUILTIN_CVTSS2SI64
,
15892 IX86_BUILTIN_CVTTPS2PI
,
15893 IX86_BUILTIN_CVTTSS2SI
,
15894 IX86_BUILTIN_CVTTSS2SI64
,
15896 IX86_BUILTIN_MAXPS
,
15897 IX86_BUILTIN_MAXSS
,
15898 IX86_BUILTIN_MINPS
,
15899 IX86_BUILTIN_MINSS
,
15901 IX86_BUILTIN_LOADUPS
,
15902 IX86_BUILTIN_STOREUPS
,
15903 IX86_BUILTIN_MOVSS
,
15905 IX86_BUILTIN_MOVHLPS
,
15906 IX86_BUILTIN_MOVLHPS
,
15907 IX86_BUILTIN_LOADHPS
,
15908 IX86_BUILTIN_LOADLPS
,
15909 IX86_BUILTIN_STOREHPS
,
15910 IX86_BUILTIN_STORELPS
,
15912 IX86_BUILTIN_MASKMOVQ
,
15913 IX86_BUILTIN_MOVMSKPS
,
15914 IX86_BUILTIN_PMOVMSKB
,
15916 IX86_BUILTIN_MOVNTPS
,
15917 IX86_BUILTIN_MOVNTQ
,
15919 IX86_BUILTIN_LOADDQU
,
15920 IX86_BUILTIN_STOREDQU
,
15922 IX86_BUILTIN_PACKSSWB
,
15923 IX86_BUILTIN_PACKSSDW
,
15924 IX86_BUILTIN_PACKUSWB
,
15926 IX86_BUILTIN_PADDB
,
15927 IX86_BUILTIN_PADDW
,
15928 IX86_BUILTIN_PADDD
,
15929 IX86_BUILTIN_PADDQ
,
15930 IX86_BUILTIN_PADDSB
,
15931 IX86_BUILTIN_PADDSW
,
15932 IX86_BUILTIN_PADDUSB
,
15933 IX86_BUILTIN_PADDUSW
,
15934 IX86_BUILTIN_PSUBB
,
15935 IX86_BUILTIN_PSUBW
,
15936 IX86_BUILTIN_PSUBD
,
15937 IX86_BUILTIN_PSUBQ
,
15938 IX86_BUILTIN_PSUBSB
,
15939 IX86_BUILTIN_PSUBSW
,
15940 IX86_BUILTIN_PSUBUSB
,
15941 IX86_BUILTIN_PSUBUSW
,
15944 IX86_BUILTIN_PANDN
,
15948 IX86_BUILTIN_PAVGB
,
15949 IX86_BUILTIN_PAVGW
,
15951 IX86_BUILTIN_PCMPEQB
,
15952 IX86_BUILTIN_PCMPEQW
,
15953 IX86_BUILTIN_PCMPEQD
,
15954 IX86_BUILTIN_PCMPGTB
,
15955 IX86_BUILTIN_PCMPGTW
,
15956 IX86_BUILTIN_PCMPGTD
,
15958 IX86_BUILTIN_PMADDWD
,
15960 IX86_BUILTIN_PMAXSW
,
15961 IX86_BUILTIN_PMAXUB
,
15962 IX86_BUILTIN_PMINSW
,
15963 IX86_BUILTIN_PMINUB
,
15965 IX86_BUILTIN_PMULHUW
,
15966 IX86_BUILTIN_PMULHW
,
15967 IX86_BUILTIN_PMULLW
,
15969 IX86_BUILTIN_PSADBW
,
15970 IX86_BUILTIN_PSHUFW
,
15972 IX86_BUILTIN_PSLLW
,
15973 IX86_BUILTIN_PSLLD
,
15974 IX86_BUILTIN_PSLLQ
,
15975 IX86_BUILTIN_PSRAW
,
15976 IX86_BUILTIN_PSRAD
,
15977 IX86_BUILTIN_PSRLW
,
15978 IX86_BUILTIN_PSRLD
,
15979 IX86_BUILTIN_PSRLQ
,
15980 IX86_BUILTIN_PSLLWI
,
15981 IX86_BUILTIN_PSLLDI
,
15982 IX86_BUILTIN_PSLLQI
,
15983 IX86_BUILTIN_PSRAWI
,
15984 IX86_BUILTIN_PSRADI
,
15985 IX86_BUILTIN_PSRLWI
,
15986 IX86_BUILTIN_PSRLDI
,
15987 IX86_BUILTIN_PSRLQI
,
15989 IX86_BUILTIN_PUNPCKHBW
,
15990 IX86_BUILTIN_PUNPCKHWD
,
15991 IX86_BUILTIN_PUNPCKHDQ
,
15992 IX86_BUILTIN_PUNPCKLBW
,
15993 IX86_BUILTIN_PUNPCKLWD
,
15994 IX86_BUILTIN_PUNPCKLDQ
,
15996 IX86_BUILTIN_SHUFPS
,
15998 IX86_BUILTIN_RCPPS
,
15999 IX86_BUILTIN_RCPSS
,
16000 IX86_BUILTIN_RSQRTPS
,
16001 IX86_BUILTIN_RSQRTSS
,
16002 IX86_BUILTIN_SQRTPS
,
16003 IX86_BUILTIN_SQRTSS
,
16005 IX86_BUILTIN_UNPCKHPS
,
16006 IX86_BUILTIN_UNPCKLPS
,
16008 IX86_BUILTIN_ANDPS
,
16009 IX86_BUILTIN_ANDNPS
,
16011 IX86_BUILTIN_XORPS
,
16014 IX86_BUILTIN_LDMXCSR
,
16015 IX86_BUILTIN_STMXCSR
,
16016 IX86_BUILTIN_SFENCE
,
16018 /* 3DNow! Original */
16019 IX86_BUILTIN_FEMMS
,
16020 IX86_BUILTIN_PAVGUSB
,
16021 IX86_BUILTIN_PF2ID
,
16022 IX86_BUILTIN_PFACC
,
16023 IX86_BUILTIN_PFADD
,
16024 IX86_BUILTIN_PFCMPEQ
,
16025 IX86_BUILTIN_PFCMPGE
,
16026 IX86_BUILTIN_PFCMPGT
,
16027 IX86_BUILTIN_PFMAX
,
16028 IX86_BUILTIN_PFMIN
,
16029 IX86_BUILTIN_PFMUL
,
16030 IX86_BUILTIN_PFRCP
,
16031 IX86_BUILTIN_PFRCPIT1
,
16032 IX86_BUILTIN_PFRCPIT2
,
16033 IX86_BUILTIN_PFRSQIT1
,
16034 IX86_BUILTIN_PFRSQRT
,
16035 IX86_BUILTIN_PFSUB
,
16036 IX86_BUILTIN_PFSUBR
,
16037 IX86_BUILTIN_PI2FD
,
16038 IX86_BUILTIN_PMULHRW
,
16040 /* 3DNow! Athlon Extensions */
16041 IX86_BUILTIN_PF2IW
,
16042 IX86_BUILTIN_PFNACC
,
16043 IX86_BUILTIN_PFPNACC
,
16044 IX86_BUILTIN_PI2FW
,
16045 IX86_BUILTIN_PSWAPDSI
,
16046 IX86_BUILTIN_PSWAPDSF
,
16049 IX86_BUILTIN_ADDPD
,
16050 IX86_BUILTIN_ADDSD
,
16051 IX86_BUILTIN_DIVPD
,
16052 IX86_BUILTIN_DIVSD
,
16053 IX86_BUILTIN_MULPD
,
16054 IX86_BUILTIN_MULSD
,
16055 IX86_BUILTIN_SUBPD
,
16056 IX86_BUILTIN_SUBSD
,
16058 IX86_BUILTIN_CMPEQPD
,
16059 IX86_BUILTIN_CMPLTPD
,
16060 IX86_BUILTIN_CMPLEPD
,
16061 IX86_BUILTIN_CMPGTPD
,
16062 IX86_BUILTIN_CMPGEPD
,
16063 IX86_BUILTIN_CMPNEQPD
,
16064 IX86_BUILTIN_CMPNLTPD
,
16065 IX86_BUILTIN_CMPNLEPD
,
16066 IX86_BUILTIN_CMPNGTPD
,
16067 IX86_BUILTIN_CMPNGEPD
,
16068 IX86_BUILTIN_CMPORDPD
,
16069 IX86_BUILTIN_CMPUNORDPD
,
16070 IX86_BUILTIN_CMPNEPD
,
16071 IX86_BUILTIN_CMPEQSD
,
16072 IX86_BUILTIN_CMPLTSD
,
16073 IX86_BUILTIN_CMPLESD
,
16074 IX86_BUILTIN_CMPNEQSD
,
16075 IX86_BUILTIN_CMPNLTSD
,
16076 IX86_BUILTIN_CMPNLESD
,
16077 IX86_BUILTIN_CMPORDSD
,
16078 IX86_BUILTIN_CMPUNORDSD
,
16079 IX86_BUILTIN_CMPNESD
,
16081 IX86_BUILTIN_COMIEQSD
,
16082 IX86_BUILTIN_COMILTSD
,
16083 IX86_BUILTIN_COMILESD
,
16084 IX86_BUILTIN_COMIGTSD
,
16085 IX86_BUILTIN_COMIGESD
,
16086 IX86_BUILTIN_COMINEQSD
,
16087 IX86_BUILTIN_UCOMIEQSD
,
16088 IX86_BUILTIN_UCOMILTSD
,
16089 IX86_BUILTIN_UCOMILESD
,
16090 IX86_BUILTIN_UCOMIGTSD
,
16091 IX86_BUILTIN_UCOMIGESD
,
16092 IX86_BUILTIN_UCOMINEQSD
,
16094 IX86_BUILTIN_MAXPD
,
16095 IX86_BUILTIN_MAXSD
,
16096 IX86_BUILTIN_MINPD
,
16097 IX86_BUILTIN_MINSD
,
16099 IX86_BUILTIN_ANDPD
,
16100 IX86_BUILTIN_ANDNPD
,
16102 IX86_BUILTIN_XORPD
,
16104 IX86_BUILTIN_SQRTPD
,
16105 IX86_BUILTIN_SQRTSD
,
16107 IX86_BUILTIN_UNPCKHPD
,
16108 IX86_BUILTIN_UNPCKLPD
,
16110 IX86_BUILTIN_SHUFPD
,
16112 IX86_BUILTIN_LOADUPD
,
16113 IX86_BUILTIN_STOREUPD
,
16114 IX86_BUILTIN_MOVSD
,
16116 IX86_BUILTIN_LOADHPD
,
16117 IX86_BUILTIN_LOADLPD
,
16119 IX86_BUILTIN_CVTDQ2PD
,
16120 IX86_BUILTIN_CVTDQ2PS
,
16122 IX86_BUILTIN_CVTPD2DQ
,
16123 IX86_BUILTIN_CVTPD2PI
,
16124 IX86_BUILTIN_CVTPD2PS
,
16125 IX86_BUILTIN_CVTTPD2DQ
,
16126 IX86_BUILTIN_CVTTPD2PI
,
16128 IX86_BUILTIN_CVTPI2PD
,
16129 IX86_BUILTIN_CVTSI2SD
,
16130 IX86_BUILTIN_CVTSI642SD
,
16132 IX86_BUILTIN_CVTSD2SI
,
16133 IX86_BUILTIN_CVTSD2SI64
,
16134 IX86_BUILTIN_CVTSD2SS
,
16135 IX86_BUILTIN_CVTSS2SD
,
16136 IX86_BUILTIN_CVTTSD2SI
,
16137 IX86_BUILTIN_CVTTSD2SI64
,
16139 IX86_BUILTIN_CVTPS2DQ
,
16140 IX86_BUILTIN_CVTPS2PD
,
16141 IX86_BUILTIN_CVTTPS2DQ
,
16143 IX86_BUILTIN_MOVNTI
,
16144 IX86_BUILTIN_MOVNTPD
,
16145 IX86_BUILTIN_MOVNTDQ
,
16148 IX86_BUILTIN_MASKMOVDQU
,
16149 IX86_BUILTIN_MOVMSKPD
,
16150 IX86_BUILTIN_PMOVMSKB128
,
16152 IX86_BUILTIN_PACKSSWB128
,
16153 IX86_BUILTIN_PACKSSDW128
,
16154 IX86_BUILTIN_PACKUSWB128
,
16156 IX86_BUILTIN_PADDB128
,
16157 IX86_BUILTIN_PADDW128
,
16158 IX86_BUILTIN_PADDD128
,
16159 IX86_BUILTIN_PADDQ128
,
16160 IX86_BUILTIN_PADDSB128
,
16161 IX86_BUILTIN_PADDSW128
,
16162 IX86_BUILTIN_PADDUSB128
,
16163 IX86_BUILTIN_PADDUSW128
,
16164 IX86_BUILTIN_PSUBB128
,
16165 IX86_BUILTIN_PSUBW128
,
16166 IX86_BUILTIN_PSUBD128
,
16167 IX86_BUILTIN_PSUBQ128
,
16168 IX86_BUILTIN_PSUBSB128
,
16169 IX86_BUILTIN_PSUBSW128
,
16170 IX86_BUILTIN_PSUBUSB128
,
16171 IX86_BUILTIN_PSUBUSW128
,
16173 IX86_BUILTIN_PAND128
,
16174 IX86_BUILTIN_PANDN128
,
16175 IX86_BUILTIN_POR128
,
16176 IX86_BUILTIN_PXOR128
,
16178 IX86_BUILTIN_PAVGB128
,
16179 IX86_BUILTIN_PAVGW128
,
16181 IX86_BUILTIN_PCMPEQB128
,
16182 IX86_BUILTIN_PCMPEQW128
,
16183 IX86_BUILTIN_PCMPEQD128
,
16184 IX86_BUILTIN_PCMPGTB128
,
16185 IX86_BUILTIN_PCMPGTW128
,
16186 IX86_BUILTIN_PCMPGTD128
,
16188 IX86_BUILTIN_PMADDWD128
,
16190 IX86_BUILTIN_PMAXSW128
,
16191 IX86_BUILTIN_PMAXUB128
,
16192 IX86_BUILTIN_PMINSW128
,
16193 IX86_BUILTIN_PMINUB128
,
16195 IX86_BUILTIN_PMULUDQ
,
16196 IX86_BUILTIN_PMULUDQ128
,
16197 IX86_BUILTIN_PMULHUW128
,
16198 IX86_BUILTIN_PMULHW128
,
16199 IX86_BUILTIN_PMULLW128
,
16201 IX86_BUILTIN_PSADBW128
,
16202 IX86_BUILTIN_PSHUFHW
,
16203 IX86_BUILTIN_PSHUFLW
,
16204 IX86_BUILTIN_PSHUFD
,
16206 IX86_BUILTIN_PSLLW128
,
16207 IX86_BUILTIN_PSLLD128
,
16208 IX86_BUILTIN_PSLLQ128
,
16209 IX86_BUILTIN_PSRAW128
,
16210 IX86_BUILTIN_PSRAD128
,
16211 IX86_BUILTIN_PSRLW128
,
16212 IX86_BUILTIN_PSRLD128
,
16213 IX86_BUILTIN_PSRLQ128
,
16214 IX86_BUILTIN_PSLLDQI128
,
16215 IX86_BUILTIN_PSLLWI128
,
16216 IX86_BUILTIN_PSLLDI128
,
16217 IX86_BUILTIN_PSLLQI128
,
16218 IX86_BUILTIN_PSRAWI128
,
16219 IX86_BUILTIN_PSRADI128
,
16220 IX86_BUILTIN_PSRLDQI128
,
16221 IX86_BUILTIN_PSRLWI128
,
16222 IX86_BUILTIN_PSRLDI128
,
16223 IX86_BUILTIN_PSRLQI128
,
16225 IX86_BUILTIN_PUNPCKHBW128
,
16226 IX86_BUILTIN_PUNPCKHWD128
,
16227 IX86_BUILTIN_PUNPCKHDQ128
,
16228 IX86_BUILTIN_PUNPCKHQDQ128
,
16229 IX86_BUILTIN_PUNPCKLBW128
,
16230 IX86_BUILTIN_PUNPCKLWD128
,
16231 IX86_BUILTIN_PUNPCKLDQ128
,
16232 IX86_BUILTIN_PUNPCKLQDQ128
,
16234 IX86_BUILTIN_CLFLUSH
,
16235 IX86_BUILTIN_MFENCE
,
16236 IX86_BUILTIN_LFENCE
,
16238 /* Prescott New Instructions. */
16239 IX86_BUILTIN_ADDSUBPS
,
16240 IX86_BUILTIN_HADDPS
,
16241 IX86_BUILTIN_HSUBPS
,
16242 IX86_BUILTIN_MOVSHDUP
,
16243 IX86_BUILTIN_MOVSLDUP
,
16244 IX86_BUILTIN_ADDSUBPD
,
16245 IX86_BUILTIN_HADDPD
,
16246 IX86_BUILTIN_HSUBPD
,
16247 IX86_BUILTIN_LDDQU
,
16249 IX86_BUILTIN_MONITOR
,
16250 IX86_BUILTIN_MWAIT
,
16253 IX86_BUILTIN_PHADDW
,
16254 IX86_BUILTIN_PHADDD
,
16255 IX86_BUILTIN_PHADDSW
,
16256 IX86_BUILTIN_PHSUBW
,
16257 IX86_BUILTIN_PHSUBD
,
16258 IX86_BUILTIN_PHSUBSW
,
16259 IX86_BUILTIN_PMADDUBSW
,
16260 IX86_BUILTIN_PMULHRSW
,
16261 IX86_BUILTIN_PSHUFB
,
16262 IX86_BUILTIN_PSIGNB
,
16263 IX86_BUILTIN_PSIGNW
,
16264 IX86_BUILTIN_PSIGND
,
16265 IX86_BUILTIN_PALIGNR
,
16266 IX86_BUILTIN_PABSB
,
16267 IX86_BUILTIN_PABSW
,
16268 IX86_BUILTIN_PABSD
,
16270 IX86_BUILTIN_PHADDW128
,
16271 IX86_BUILTIN_PHADDD128
,
16272 IX86_BUILTIN_PHADDSW128
,
16273 IX86_BUILTIN_PHSUBW128
,
16274 IX86_BUILTIN_PHSUBD128
,
16275 IX86_BUILTIN_PHSUBSW128
,
16276 IX86_BUILTIN_PMADDUBSW128
,
16277 IX86_BUILTIN_PMULHRSW128
,
16278 IX86_BUILTIN_PSHUFB128
,
16279 IX86_BUILTIN_PSIGNB128
,
16280 IX86_BUILTIN_PSIGNW128
,
16281 IX86_BUILTIN_PSIGND128
,
16282 IX86_BUILTIN_PALIGNR128
,
16283 IX86_BUILTIN_PABSB128
,
16284 IX86_BUILTIN_PABSW128
,
16285 IX86_BUILTIN_PABSD128
,
16287 /* AMDFAM10 - SSE4A New Instructions. */
16288 IX86_BUILTIN_MOVNTSD
,
16289 IX86_BUILTIN_MOVNTSS
,
16290 IX86_BUILTIN_EXTRQI
,
16291 IX86_BUILTIN_EXTRQ
,
16292 IX86_BUILTIN_INSERTQI
,
16293 IX86_BUILTIN_INSERTQ
,
16295 IX86_BUILTIN_VEC_INIT_V2SI
,
16296 IX86_BUILTIN_VEC_INIT_V4HI
,
16297 IX86_BUILTIN_VEC_INIT_V8QI
,
16298 IX86_BUILTIN_VEC_EXT_V2DF
,
16299 IX86_BUILTIN_VEC_EXT_V2DI
,
16300 IX86_BUILTIN_VEC_EXT_V4SF
,
16301 IX86_BUILTIN_VEC_EXT_V4SI
,
16302 IX86_BUILTIN_VEC_EXT_V8HI
,
16303 IX86_BUILTIN_VEC_EXT_V2SI
,
16304 IX86_BUILTIN_VEC_EXT_V4HI
,
16305 IX86_BUILTIN_VEC_SET_V8HI
,
16306 IX86_BUILTIN_VEC_SET_V4HI
,
16311 /* Table for the ix86 builtin decls. */
16312 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16314 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16315 * if the target_flags include one of MASK. Stores the function decl
16316 * in the ix86_builtins array.
16317 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16320 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16322 tree decl
= NULL_TREE
;
16324 if (mask
& target_flags
16325 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16327 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16329 ix86_builtins
[(int) code
] = decl
;
16335 /* Like def_builtin, but also marks the function decl "const". */
16338 def_builtin_const (int mask
, const char *name
, tree type
,
16339 enum ix86_builtins code
)
16341 tree decl
= def_builtin (mask
, name
, type
, code
);
16343 TREE_READONLY (decl
) = 1;
16347 /* Bits for builtin_description.flag. */
16349 /* Set when we don't support the comparison natively, and should
16350 swap_comparison in order to support it. */
16351 #define BUILTIN_DESC_SWAP_OPERANDS 1
16353 struct builtin_description
16355 const unsigned int mask
;
16356 const enum insn_code icode
;
16357 const char *const name
;
16358 const enum ix86_builtins code
;
16359 const enum rtx_code comparison
;
16360 const unsigned int flag
;
16363 static const struct builtin_description bdesc_comi
[] =
16365 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16366 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16367 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16368 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16369 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16370 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16371 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16372 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16373 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16374 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16375 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16376 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16377 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16378 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16379 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16380 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16381 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16382 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16383 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16384 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16385 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16386 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16387 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16388 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16391 static const struct builtin_description bdesc_2arg
[] =
16394 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16395 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16396 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16397 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16398 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16399 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16400 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16401 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16403 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16404 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16405 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16406 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16407 BUILTIN_DESC_SWAP_OPERANDS
},
16408 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16409 BUILTIN_DESC_SWAP_OPERANDS
},
16410 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16411 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16412 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16413 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16414 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16415 BUILTIN_DESC_SWAP_OPERANDS
},
16416 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16417 BUILTIN_DESC_SWAP_OPERANDS
},
16418 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16419 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16420 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16421 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16422 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16423 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16424 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16425 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16426 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16427 BUILTIN_DESC_SWAP_OPERANDS
},
16428 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16429 BUILTIN_DESC_SWAP_OPERANDS
},
16430 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16432 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16433 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16434 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16435 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16437 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16438 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16439 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16440 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16442 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16443 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16444 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16445 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16446 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16449 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16450 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16451 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16452 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16453 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16454 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16455 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16456 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16458 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16459 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16460 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16461 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16462 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16463 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16464 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16465 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16467 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16468 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16469 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16471 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16472 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16473 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16474 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16476 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16477 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16479 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16480 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16481 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16482 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16483 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16484 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16486 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16487 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16488 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16489 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16491 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16492 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16493 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16494 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16495 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16496 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16499 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16500 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16501 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16503 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16504 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16505 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16507 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16508 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16509 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16510 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16511 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16512 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16514 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16515 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16516 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16517 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16518 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16519 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16521 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16522 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16523 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16524 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16526 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16527 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16530 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16531 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16532 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16533 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16534 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16535 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16536 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16537 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16539 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16540 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16541 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16542 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16543 BUILTIN_DESC_SWAP_OPERANDS
},
16544 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16545 BUILTIN_DESC_SWAP_OPERANDS
},
16546 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16547 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16548 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16549 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16550 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16551 BUILTIN_DESC_SWAP_OPERANDS
},
16552 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16553 BUILTIN_DESC_SWAP_OPERANDS
},
16554 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16555 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16556 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16557 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16558 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16559 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16560 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16561 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16562 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16564 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16565 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16566 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16567 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16569 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16570 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16571 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16572 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16574 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16575 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16576 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16579 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16580 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16581 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16582 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16583 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16584 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16585 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16586 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16588 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16589 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16590 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16591 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16592 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16593 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16594 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16595 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16597 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16598 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16600 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16601 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16602 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16603 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16605 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16606 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16608 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16609 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16610 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16611 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16612 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16613 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16615 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16616 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16617 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16618 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16620 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16621 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16622 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16623 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16624 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16625 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16626 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16627 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16629 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16630 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16631 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16633 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16634 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16636 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16637 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16639 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16640 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16641 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16643 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16644 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16645 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16647 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16648 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16650 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16652 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16653 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16654 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16655 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16658 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16659 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16660 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16661 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16662 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16663 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16666 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16667 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16668 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16669 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16670 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16671 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16672 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16673 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16674 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16675 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16676 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16677 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16678 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16679 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16680 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16681 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16682 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16683 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16684 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16685 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16686 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16687 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16688 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16689 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16692 static const struct builtin_description bdesc_1arg
[] =
16694 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16695 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16697 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16698 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16699 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16701 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16702 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16703 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16704 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16705 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16706 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16708 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16709 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16711 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16713 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16714 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16716 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16717 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16718 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16719 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16720 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16722 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16724 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16725 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16726 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16727 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16729 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16730 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16731 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16734 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16735 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16738 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16739 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16740 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16741 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16742 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16743 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16747 ix86_init_builtins (void)
16750 ix86_init_mmx_sse_builtins ();
16753 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16754 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16757 ix86_init_mmx_sse_builtins (void)
16759 const struct builtin_description
* d
;
16762 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16763 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16764 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16765 tree V2DI_type_node
16766 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16767 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16768 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16769 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16770 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16771 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16772 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16774 tree pchar_type_node
= build_pointer_type (char_type_node
);
16775 tree pcchar_type_node
= build_pointer_type (
16776 build_type_variant (char_type_node
, 1, 0));
16777 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16778 tree pcfloat_type_node
= build_pointer_type (
16779 build_type_variant (float_type_node
, 1, 0));
16780 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16781 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16782 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16785 tree int_ftype_v4sf_v4sf
16786 = build_function_type_list (integer_type_node
,
16787 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16788 tree v4si_ftype_v4sf_v4sf
16789 = build_function_type_list (V4SI_type_node
,
16790 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16791 /* MMX/SSE/integer conversions. */
16792 tree int_ftype_v4sf
16793 = build_function_type_list (integer_type_node
,
16794 V4SF_type_node
, NULL_TREE
);
16795 tree int64_ftype_v4sf
16796 = build_function_type_list (long_long_integer_type_node
,
16797 V4SF_type_node
, NULL_TREE
);
16798 tree int_ftype_v8qi
16799 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16800 tree v4sf_ftype_v4sf_int
16801 = build_function_type_list (V4SF_type_node
,
16802 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16803 tree v4sf_ftype_v4sf_int64
16804 = build_function_type_list (V4SF_type_node
,
16805 V4SF_type_node
, long_long_integer_type_node
,
16807 tree v4sf_ftype_v4sf_v2si
16808 = build_function_type_list (V4SF_type_node
,
16809 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16811 /* Miscellaneous. */
16812 tree v8qi_ftype_v4hi_v4hi
16813 = build_function_type_list (V8QI_type_node
,
16814 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16815 tree v4hi_ftype_v2si_v2si
16816 = build_function_type_list (V4HI_type_node
,
16817 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16818 tree v4sf_ftype_v4sf_v4sf_int
16819 = build_function_type_list (V4SF_type_node
,
16820 V4SF_type_node
, V4SF_type_node
,
16821 integer_type_node
, NULL_TREE
);
16822 tree v2si_ftype_v4hi_v4hi
16823 = build_function_type_list (V2SI_type_node
,
16824 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16825 tree v4hi_ftype_v4hi_int
16826 = build_function_type_list (V4HI_type_node
,
16827 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16828 tree v4hi_ftype_v4hi_di
16829 = build_function_type_list (V4HI_type_node
,
16830 V4HI_type_node
, long_long_unsigned_type_node
,
16832 tree v2si_ftype_v2si_di
16833 = build_function_type_list (V2SI_type_node
,
16834 V2SI_type_node
, long_long_unsigned_type_node
,
16836 tree void_ftype_void
16837 = build_function_type (void_type_node
, void_list_node
);
16838 tree void_ftype_unsigned
16839 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16840 tree void_ftype_unsigned_unsigned
16841 = build_function_type_list (void_type_node
, unsigned_type_node
,
16842 unsigned_type_node
, NULL_TREE
);
16843 tree void_ftype_pcvoid_unsigned_unsigned
16844 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16845 unsigned_type_node
, unsigned_type_node
,
16847 tree unsigned_ftype_void
16848 = build_function_type (unsigned_type_node
, void_list_node
);
16849 tree v2si_ftype_v4sf
16850 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16851 /* Loads/stores. */
16852 tree void_ftype_v8qi_v8qi_pchar
16853 = build_function_type_list (void_type_node
,
16854 V8QI_type_node
, V8QI_type_node
,
16855 pchar_type_node
, NULL_TREE
);
16856 tree v4sf_ftype_pcfloat
16857 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16858 /* @@@ the type is bogus */
16859 tree v4sf_ftype_v4sf_pv2si
16860 = build_function_type_list (V4SF_type_node
,
16861 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16862 tree void_ftype_pv2si_v4sf
16863 = build_function_type_list (void_type_node
,
16864 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16865 tree void_ftype_pfloat_v4sf
16866 = build_function_type_list (void_type_node
,
16867 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16868 tree void_ftype_pdi_di
16869 = build_function_type_list (void_type_node
,
16870 pdi_type_node
, long_long_unsigned_type_node
,
16872 tree void_ftype_pv2di_v2di
16873 = build_function_type_list (void_type_node
,
16874 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16875 /* Normal vector unops. */
16876 tree v4sf_ftype_v4sf
16877 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16878 tree v16qi_ftype_v16qi
16879 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16880 tree v8hi_ftype_v8hi
16881 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16882 tree v4si_ftype_v4si
16883 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16884 tree v8qi_ftype_v8qi
16885 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16886 tree v4hi_ftype_v4hi
16887 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16889 /* Normal vector binops. */
16890 tree v4sf_ftype_v4sf_v4sf
16891 = build_function_type_list (V4SF_type_node
,
16892 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16893 tree v8qi_ftype_v8qi_v8qi
16894 = build_function_type_list (V8QI_type_node
,
16895 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16896 tree v4hi_ftype_v4hi_v4hi
16897 = build_function_type_list (V4HI_type_node
,
16898 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16899 tree v2si_ftype_v2si_v2si
16900 = build_function_type_list (V2SI_type_node
,
16901 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16902 tree di_ftype_di_di
16903 = build_function_type_list (long_long_unsigned_type_node
,
16904 long_long_unsigned_type_node
,
16905 long_long_unsigned_type_node
, NULL_TREE
);
16907 tree di_ftype_di_di_int
16908 = build_function_type_list (long_long_unsigned_type_node
,
16909 long_long_unsigned_type_node
,
16910 long_long_unsigned_type_node
,
16911 integer_type_node
, NULL_TREE
);
16913 tree v2si_ftype_v2sf
16914 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16915 tree v2sf_ftype_v2si
16916 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16917 tree v2si_ftype_v2si
16918 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16919 tree v2sf_ftype_v2sf
16920 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16921 tree v2sf_ftype_v2sf_v2sf
16922 = build_function_type_list (V2SF_type_node
,
16923 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16924 tree v2si_ftype_v2sf_v2sf
16925 = build_function_type_list (V2SI_type_node
,
16926 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16927 tree pint_type_node
= build_pointer_type (integer_type_node
);
16928 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16929 tree pcdouble_type_node
= build_pointer_type (
16930 build_type_variant (double_type_node
, 1, 0));
16931 tree int_ftype_v2df_v2df
16932 = build_function_type_list (integer_type_node
,
16933 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16935 tree void_ftype_pcvoid
16936 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16937 tree v4sf_ftype_v4si
16938 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16939 tree v4si_ftype_v4sf
16940 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16941 tree v2df_ftype_v4si
16942 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16943 tree v4si_ftype_v2df
16944 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16945 tree v2si_ftype_v2df
16946 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16947 tree v4sf_ftype_v2df
16948 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16949 tree v2df_ftype_v2si
16950 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16951 tree v2df_ftype_v4sf
16952 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16953 tree int_ftype_v2df
16954 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16955 tree int64_ftype_v2df
16956 = build_function_type_list (long_long_integer_type_node
,
16957 V2DF_type_node
, NULL_TREE
);
16958 tree v2df_ftype_v2df_int
16959 = build_function_type_list (V2DF_type_node
,
16960 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16961 tree v2df_ftype_v2df_int64
16962 = build_function_type_list (V2DF_type_node
,
16963 V2DF_type_node
, long_long_integer_type_node
,
16965 tree v4sf_ftype_v4sf_v2df
16966 = build_function_type_list (V4SF_type_node
,
16967 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16968 tree v2df_ftype_v2df_v4sf
16969 = build_function_type_list (V2DF_type_node
,
16970 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16971 tree v2df_ftype_v2df_v2df_int
16972 = build_function_type_list (V2DF_type_node
,
16973 V2DF_type_node
, V2DF_type_node
,
16976 tree v2df_ftype_v2df_pcdouble
16977 = build_function_type_list (V2DF_type_node
,
16978 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16979 tree void_ftype_pdouble_v2df
16980 = build_function_type_list (void_type_node
,
16981 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16982 tree void_ftype_pint_int
16983 = build_function_type_list (void_type_node
,
16984 pint_type_node
, integer_type_node
, NULL_TREE
);
16985 tree void_ftype_v16qi_v16qi_pchar
16986 = build_function_type_list (void_type_node
,
16987 V16QI_type_node
, V16QI_type_node
,
16988 pchar_type_node
, NULL_TREE
);
16989 tree v2df_ftype_pcdouble
16990 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16991 tree v2df_ftype_v2df_v2df
16992 = build_function_type_list (V2DF_type_node
,
16993 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16994 tree v16qi_ftype_v16qi_v16qi
16995 = build_function_type_list (V16QI_type_node
,
16996 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16997 tree v8hi_ftype_v8hi_v8hi
16998 = build_function_type_list (V8HI_type_node
,
16999 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17000 tree v4si_ftype_v4si_v4si
17001 = build_function_type_list (V4SI_type_node
,
17002 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17003 tree v2di_ftype_v2di_v2di
17004 = build_function_type_list (V2DI_type_node
,
17005 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17006 tree v2di_ftype_v2df_v2df
17007 = build_function_type_list (V2DI_type_node
,
17008 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17009 tree v2df_ftype_v2df
17010 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17011 tree v2di_ftype_v2di_int
17012 = build_function_type_list (V2DI_type_node
,
17013 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17014 tree v2di_ftype_v2di_v2di_int
17015 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17016 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17017 tree v4si_ftype_v4si_int
17018 = build_function_type_list (V4SI_type_node
,
17019 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17020 tree v8hi_ftype_v8hi_int
17021 = build_function_type_list (V8HI_type_node
,
17022 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17023 tree v8hi_ftype_v8hi_v2di
17024 = build_function_type_list (V8HI_type_node
,
17025 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
17026 tree v4si_ftype_v4si_v2di
17027 = build_function_type_list (V4SI_type_node
,
17028 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
17029 tree v4si_ftype_v8hi_v8hi
17030 = build_function_type_list (V4SI_type_node
,
17031 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17032 tree di_ftype_v8qi_v8qi
17033 = build_function_type_list (long_long_unsigned_type_node
,
17034 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17035 tree di_ftype_v2si_v2si
17036 = build_function_type_list (long_long_unsigned_type_node
,
17037 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17038 tree v2di_ftype_v16qi_v16qi
17039 = build_function_type_list (V2DI_type_node
,
17040 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17041 tree v2di_ftype_v4si_v4si
17042 = build_function_type_list (V2DI_type_node
,
17043 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17044 tree int_ftype_v16qi
17045 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17046 tree v16qi_ftype_pcchar
17047 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17048 tree void_ftype_pchar_v16qi
17049 = build_function_type_list (void_type_node
,
17050 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17052 tree v2di_ftype_v2di_unsigned_unsigned
17053 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17054 unsigned_type_node
, unsigned_type_node
,
17056 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17057 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17058 unsigned_type_node
, unsigned_type_node
,
17060 tree v2di_ftype_v2di_v16qi
17061 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17065 tree float128_type
;
17068 /* The __float80 type. */
17069 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17070 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17074 /* The __float80 type. */
17075 float80_type
= make_node (REAL_TYPE
);
17076 TYPE_PRECISION (float80_type
) = 80;
17077 layout_type (float80_type
);
17078 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
17083 float128_type
= make_node (REAL_TYPE
);
17084 TYPE_PRECISION (float128_type
) = 128;
17085 layout_type (float128_type
);
17086 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
17089 /* Add all builtins that are more or less simple operations on two
17091 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17093 /* Use one of the operands; the target can have a different mode for
17094 mask-generating compares. */
17095 enum machine_mode mode
;
17100 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17105 type
= v16qi_ftype_v16qi_v16qi
;
17108 type
= v8hi_ftype_v8hi_v8hi
;
17111 type
= v4si_ftype_v4si_v4si
;
17114 type
= v2di_ftype_v2di_v2di
;
17117 type
= v2df_ftype_v2df_v2df
;
17120 type
= v4sf_ftype_v4sf_v4sf
;
17123 type
= v8qi_ftype_v8qi_v8qi
;
17126 type
= v4hi_ftype_v4hi_v4hi
;
17129 type
= v2si_ftype_v2si_v2si
;
17132 type
= di_ftype_di_di
;
17136 gcc_unreachable ();
17139 /* Override for comparisons. */
17140 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17141 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17142 type
= v4si_ftype_v4sf_v4sf
;
17144 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17145 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17146 type
= v2di_ftype_v2df_v2df
;
17148 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17151 /* Add all builtins that are more or less simple operations on 1 operand. */
17152 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17154 enum machine_mode mode
;
17159 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17164 type
= v16qi_ftype_v16qi
;
17167 type
= v8hi_ftype_v8hi
;
17170 type
= v4si_ftype_v4si
;
17173 type
= v2df_ftype_v2df
;
17176 type
= v4sf_ftype_v4sf
;
17179 type
= v8qi_ftype_v8qi
;
17182 type
= v4hi_ftype_v4hi
;
17185 type
= v2si_ftype_v2si
;
17192 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17195 /* Add the remaining MMX insns with somewhat more complicated types. */
17196 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17197 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17198 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17199 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17201 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17202 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17203 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17205 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17206 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17208 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17209 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17211 /* comi/ucomi insns. */
17212 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17213 if (d
->mask
== MASK_SSE2
)
17214 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17216 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17218 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17219 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17220 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17222 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17223 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17224 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17225 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17226 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17227 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17228 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17229 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17230 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17231 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17232 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17234 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17236 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17237 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17239 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17240 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17241 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17242 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17244 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17245 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17246 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17247 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17249 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17251 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17253 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17254 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17255 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17256 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17257 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17258 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17260 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17262 /* Original 3DNow! */
17263 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17264 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17265 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17266 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17267 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17268 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17269 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17270 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17271 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17272 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17273 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17274 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17275 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17276 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17277 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17278 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17279 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17280 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17281 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17282 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17284 /* 3DNow! extension as used in the Athlon CPU. */
17285 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17286 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17287 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17288 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17289 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17290 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17293 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17295 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17296 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17298 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17299 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17301 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17302 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17303 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17304 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17305 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17307 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17308 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17309 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17310 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17312 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17313 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17315 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17317 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17318 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17320 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17321 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17322 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17323 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17324 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17326 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17328 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17329 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17330 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17331 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17333 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17334 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17335 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17337 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17338 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17339 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17340 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17342 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17343 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17344 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17346 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17347 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17349 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17350 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17352 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17353 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17354 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17356 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17357 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17358 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17360 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17361 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17363 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17364 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17365 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17366 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17368 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17369 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17370 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17371 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17373 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17374 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17376 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17378 /* Prescott New Instructions. */
17379 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17380 void_ftype_pcvoid_unsigned_unsigned
,
17381 IX86_BUILTIN_MONITOR
);
17382 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17383 void_ftype_unsigned_unsigned
,
17384 IX86_BUILTIN_MWAIT
);
17385 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17386 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17389 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17390 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17391 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17392 IX86_BUILTIN_PALIGNR
);
17394 /* AMDFAM10 SSE4A New built-ins */
17395 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17396 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17397 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17398 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17399 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17400 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17401 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17402 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17403 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17404 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17405 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17406 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17408 /* Access to the vec_init patterns. */
17409 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17410 integer_type_node
, NULL_TREE
);
17411 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17412 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17414 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17415 short_integer_type_node
,
17416 short_integer_type_node
,
17417 short_integer_type_node
, NULL_TREE
);
17418 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17419 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17421 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17422 char_type_node
, char_type_node
,
17423 char_type_node
, char_type_node
,
17424 char_type_node
, char_type_node
,
17425 char_type_node
, NULL_TREE
);
17426 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17427 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17429 /* Access to the vec_extract patterns. */
17430 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17431 integer_type_node
, NULL_TREE
);
17432 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17433 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17435 ftype
= build_function_type_list (long_long_integer_type_node
,
17436 V2DI_type_node
, integer_type_node
,
17438 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17439 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17441 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17442 integer_type_node
, NULL_TREE
);
17443 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17444 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17446 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17447 integer_type_node
, NULL_TREE
);
17448 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17449 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17451 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17452 integer_type_node
, NULL_TREE
);
17453 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17454 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17456 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17457 integer_type_node
, NULL_TREE
);
17458 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17459 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17461 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17462 integer_type_node
, NULL_TREE
);
17463 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17464 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17466 /* Access to the vec_set patterns. */
17467 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17469 integer_type_node
, NULL_TREE
);
17470 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17471 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17473 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17475 integer_type_node
, NULL_TREE
);
17476 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17477 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17480 /* Errors in the source file can cause expand_expr to return const0_rtx
17481 where we expect a vector. To avoid crashing, use one of the vector
17482 clear instructions. */
17484 safe_vector_operand (rtx x
, enum machine_mode mode
)
17486 if (x
== const0_rtx
)
17487 x
= CONST0_RTX (mode
);
17491 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17494 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17497 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17498 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17499 rtx op0
= expand_normal (arg0
);
17500 rtx op1
= expand_normal (arg1
);
17501 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17502 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17503 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17505 if (VECTOR_MODE_P (mode0
))
17506 op0
= safe_vector_operand (op0
, mode0
);
17507 if (VECTOR_MODE_P (mode1
))
17508 op1
= safe_vector_operand (op1
, mode1
);
17510 if (optimize
|| !target
17511 || GET_MODE (target
) != tmode
17512 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17513 target
= gen_reg_rtx (tmode
);
17515 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17517 rtx x
= gen_reg_rtx (V4SImode
);
17518 emit_insn (gen_sse2_loadd (x
, op1
));
17519 op1
= gen_lowpart (TImode
, x
);
17522 /* The insn must want input operands in the same modes as the
17524 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17525 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17527 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17528 op0
= copy_to_mode_reg (mode0
, op0
);
17529 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17530 op1
= copy_to_mode_reg (mode1
, op1
);
17532 /* ??? Using ix86_fixup_binary_operands is problematic when
17533 we've got mismatched modes. Fake it. */
17539 if (tmode
== mode0
&& tmode
== mode1
)
17541 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17545 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17547 op0
= force_reg (mode0
, op0
);
17548 op1
= force_reg (mode1
, op1
);
17549 target
= gen_reg_rtx (tmode
);
17552 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17559 /* Subroutine of ix86_expand_builtin to take care of stores. */
17562 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17565 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17566 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17567 rtx op0
= expand_normal (arg0
);
17568 rtx op1
= expand_normal (arg1
);
17569 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17570 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17572 if (VECTOR_MODE_P (mode1
))
17573 op1
= safe_vector_operand (op1
, mode1
);
17575 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17576 op1
= copy_to_mode_reg (mode1
, op1
);
17578 pat
= GEN_FCN (icode
) (op0
, op1
);
17584 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17587 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17588 rtx target
, int do_load
)
17591 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17592 rtx op0
= expand_normal (arg0
);
17593 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17594 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17596 if (optimize
|| !target
17597 || GET_MODE (target
) != tmode
17598 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17599 target
= gen_reg_rtx (tmode
);
17601 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17604 if (VECTOR_MODE_P (mode0
))
17605 op0
= safe_vector_operand (op0
, mode0
);
17607 if ((optimize
&& !register_operand (op0
, mode0
))
17608 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17609 op0
= copy_to_mode_reg (mode0
, op0
);
17612 pat
= GEN_FCN (icode
) (target
, op0
);
17619 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17620 sqrtss, rsqrtss, rcpss. */
17623 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17626 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17627 rtx op1
, op0
= expand_normal (arg0
);
17628 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17629 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17631 if (optimize
|| !target
17632 || GET_MODE (target
) != tmode
17633 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17634 target
= gen_reg_rtx (tmode
);
17636 if (VECTOR_MODE_P (mode0
))
17637 op0
= safe_vector_operand (op0
, mode0
);
17639 if ((optimize
&& !register_operand (op0
, mode0
))
17640 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17641 op0
= copy_to_mode_reg (mode0
, op0
);
17644 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17645 op1
= copy_to_mode_reg (mode0
, op1
);
17647 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17654 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17657 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17661 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17662 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17663 rtx op0
= expand_normal (arg0
);
17664 rtx op1
= expand_normal (arg1
);
17666 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17667 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17668 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17669 enum rtx_code comparison
= d
->comparison
;
17671 if (VECTOR_MODE_P (mode0
))
17672 op0
= safe_vector_operand (op0
, mode0
);
17673 if (VECTOR_MODE_P (mode1
))
17674 op1
= safe_vector_operand (op1
, mode1
);
17676 /* Swap operands if we have a comparison that isn't available in
17678 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17680 rtx tmp
= gen_reg_rtx (mode1
);
17681 emit_move_insn (tmp
, op1
);
17686 if (optimize
|| !target
17687 || GET_MODE (target
) != tmode
17688 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17689 target
= gen_reg_rtx (tmode
);
17691 if ((optimize
&& !register_operand (op0
, mode0
))
17692 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17693 op0
= copy_to_mode_reg (mode0
, op0
);
17694 if ((optimize
&& !register_operand (op1
, mode1
))
17695 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17696 op1
= copy_to_mode_reg (mode1
, op1
);
17698 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17699 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17706 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17709 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17713 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17714 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17715 rtx op0
= expand_normal (arg0
);
17716 rtx op1
= expand_normal (arg1
);
17718 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17719 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17720 enum rtx_code comparison
= d
->comparison
;
17722 if (VECTOR_MODE_P (mode0
))
17723 op0
= safe_vector_operand (op0
, mode0
);
17724 if (VECTOR_MODE_P (mode1
))
17725 op1
= safe_vector_operand (op1
, mode1
);
17727 /* Swap operands if we have a comparison that isn't available in
17729 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17736 target
= gen_reg_rtx (SImode
);
17737 emit_move_insn (target
, const0_rtx
);
17738 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17740 if ((optimize
&& !register_operand (op0
, mode0
))
17741 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17742 op0
= copy_to_mode_reg (mode0
, op0
);
17743 if ((optimize
&& !register_operand (op1
, mode1
))
17744 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17745 op1
= copy_to_mode_reg (mode1
, op1
);
17747 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17748 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17752 emit_insn (gen_rtx_SET (VOIDmode
,
17753 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17754 gen_rtx_fmt_ee (comparison
, QImode
,
17758 return SUBREG_REG (target
);
17761 /* Return the integer constant in ARG. Constrain it to be in the range
17762 of the subparts of VEC_TYPE; issue an error if not. */
17765 get_element_number (tree vec_type
, tree arg
)
17767 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17769 if (!host_integerp (arg
, 1)
17770 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17772 error ("selector must be an integer constant in the range 0..%wi", max
);
17779 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17780 ix86_expand_vector_init. We DO have language-level syntax for this, in
17781 the form of (type){ init-list }. Except that since we can't place emms
17782 instructions from inside the compiler, we can't allow the use of MMX
17783 registers unless the user explicitly asks for it. So we do *not* define
17784 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17785 we have builtins invoked by mmintrin.h that gives us license to emit
17786 these sorts of instructions. */
17789 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17791 enum machine_mode tmode
= TYPE_MODE (type
);
17792 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17793 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17794 rtvec v
= rtvec_alloc (n_elt
);
17796 gcc_assert (VECTOR_MODE_P (tmode
));
17797 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17799 for (i
= 0; i
< n_elt
; ++i
)
17801 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17802 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17805 if (!target
|| !register_operand (target
, tmode
))
17806 target
= gen_reg_rtx (tmode
);
17808 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17812 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17813 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17814 had a language-level syntax for referencing vector elements. */
17817 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17819 enum machine_mode tmode
, mode0
;
17824 arg0
= CALL_EXPR_ARG (exp
, 0);
17825 arg1
= CALL_EXPR_ARG (exp
, 1);
17827 op0
= expand_normal (arg0
);
17828 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17830 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17831 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17832 gcc_assert (VECTOR_MODE_P (mode0
));
17834 op0
= force_reg (mode0
, op0
);
17836 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17837 target
= gen_reg_rtx (tmode
);
17839 ix86_expand_vector_extract (true, target
, op0
, elt
);
17844 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17845 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17846 a language-level syntax for referencing vector elements. */
17849 ix86_expand_vec_set_builtin (tree exp
)
17851 enum machine_mode tmode
, mode1
;
17852 tree arg0
, arg1
, arg2
;
17856 arg0
= CALL_EXPR_ARG (exp
, 0);
17857 arg1
= CALL_EXPR_ARG (exp
, 1);
17858 arg2
= CALL_EXPR_ARG (exp
, 2);
17860 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17861 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17862 gcc_assert (VECTOR_MODE_P (tmode
));
17864 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17865 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17866 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17868 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17869 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17871 op0
= force_reg (tmode
, op0
);
17872 op1
= force_reg (mode1
, op1
);
17874 ix86_expand_vector_set (true, op0
, op1
, elt
);
17879 /* Expand an expression EXP that calls a built-in function,
17880 with result going to TARGET if that's convenient
17881 (and in mode MODE if that's convenient).
17882 SUBTARGET may be used as the target for computing one of EXP's operands.
17883 IGNORE is nonzero if the value is to be ignored. */
17886 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17887 enum machine_mode mode ATTRIBUTE_UNUSED
,
17888 int ignore ATTRIBUTE_UNUSED
)
17890 const struct builtin_description
*d
;
17892 enum insn_code icode
;
17893 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17894 tree arg0
, arg1
, arg2
, arg3
;
17895 rtx op0
, op1
, op2
, op3
, pat
;
17896 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17897 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17901 case IX86_BUILTIN_EMMS
:
17902 emit_insn (gen_mmx_emms ());
17905 case IX86_BUILTIN_SFENCE
:
17906 emit_insn (gen_sse_sfence ());
17909 case IX86_BUILTIN_MASKMOVQ
:
17910 case IX86_BUILTIN_MASKMOVDQU
:
17911 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17912 ? CODE_FOR_mmx_maskmovq
17913 : CODE_FOR_sse2_maskmovdqu
);
17914 /* Note the arg order is different from the operand order. */
17915 arg1
= CALL_EXPR_ARG (exp
, 0);
17916 arg2
= CALL_EXPR_ARG (exp
, 1);
17917 arg0
= CALL_EXPR_ARG (exp
, 2);
17918 op0
= expand_normal (arg0
);
17919 op1
= expand_normal (arg1
);
17920 op2
= expand_normal (arg2
);
17921 mode0
= insn_data
[icode
].operand
[0].mode
;
17922 mode1
= insn_data
[icode
].operand
[1].mode
;
17923 mode2
= insn_data
[icode
].operand
[2].mode
;
17925 op0
= force_reg (Pmode
, op0
);
17926 op0
= gen_rtx_MEM (mode1
, op0
);
17928 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17929 op0
= copy_to_mode_reg (mode0
, op0
);
17930 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17931 op1
= copy_to_mode_reg (mode1
, op1
);
17932 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17933 op2
= copy_to_mode_reg (mode2
, op2
);
17934 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17940 case IX86_BUILTIN_SQRTSS
:
17941 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17942 case IX86_BUILTIN_RSQRTSS
:
17943 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17944 case IX86_BUILTIN_RCPSS
:
17945 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17947 case IX86_BUILTIN_LOADUPS
:
17948 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17950 case IX86_BUILTIN_STOREUPS
:
17951 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17953 case IX86_BUILTIN_LOADHPS
:
17954 case IX86_BUILTIN_LOADLPS
:
17955 case IX86_BUILTIN_LOADHPD
:
17956 case IX86_BUILTIN_LOADLPD
:
17957 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17958 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17959 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17960 : CODE_FOR_sse2_loadlpd
);
17961 arg0
= CALL_EXPR_ARG (exp
, 0);
17962 arg1
= CALL_EXPR_ARG (exp
, 1);
17963 op0
= expand_normal (arg0
);
17964 op1
= expand_normal (arg1
);
17965 tmode
= insn_data
[icode
].operand
[0].mode
;
17966 mode0
= insn_data
[icode
].operand
[1].mode
;
17967 mode1
= insn_data
[icode
].operand
[2].mode
;
17969 op0
= force_reg (mode0
, op0
);
17970 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17971 if (optimize
|| target
== 0
17972 || GET_MODE (target
) != tmode
17973 || !register_operand (target
, tmode
))
17974 target
= gen_reg_rtx (tmode
);
17975 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17981 case IX86_BUILTIN_STOREHPS
:
17982 case IX86_BUILTIN_STORELPS
:
17983 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17984 : CODE_FOR_sse_storelps
);
17985 arg0
= CALL_EXPR_ARG (exp
, 0);
17986 arg1
= CALL_EXPR_ARG (exp
, 1);
17987 op0
= expand_normal (arg0
);
17988 op1
= expand_normal (arg1
);
17989 mode0
= insn_data
[icode
].operand
[0].mode
;
17990 mode1
= insn_data
[icode
].operand
[1].mode
;
17992 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17993 op1
= force_reg (mode1
, op1
);
17995 pat
= GEN_FCN (icode
) (op0
, op1
);
18001 case IX86_BUILTIN_MOVNTPS
:
18002 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
18003 case IX86_BUILTIN_MOVNTQ
:
18004 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
18006 case IX86_BUILTIN_LDMXCSR
:
18007 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
18008 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18009 emit_move_insn (target
, op0
);
18010 emit_insn (gen_sse_ldmxcsr (target
));
18013 case IX86_BUILTIN_STMXCSR
:
18014 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
18015 emit_insn (gen_sse_stmxcsr (target
));
18016 return copy_to_mode_reg (SImode
, target
);
18018 case IX86_BUILTIN_SHUFPS
:
18019 case IX86_BUILTIN_SHUFPD
:
18020 icode
= (fcode
== IX86_BUILTIN_SHUFPS
18021 ? CODE_FOR_sse_shufps
18022 : CODE_FOR_sse2_shufpd
);
18023 arg0
= CALL_EXPR_ARG (exp
, 0);
18024 arg1
= CALL_EXPR_ARG (exp
, 1);
18025 arg2
= CALL_EXPR_ARG (exp
, 2);
18026 op0
= expand_normal (arg0
);
18027 op1
= expand_normal (arg1
);
18028 op2
= expand_normal (arg2
);
18029 tmode
= insn_data
[icode
].operand
[0].mode
;
18030 mode0
= insn_data
[icode
].operand
[1].mode
;
18031 mode1
= insn_data
[icode
].operand
[2].mode
;
18032 mode2
= insn_data
[icode
].operand
[3].mode
;
18034 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18035 op0
= copy_to_mode_reg (mode0
, op0
);
18036 if ((optimize
&& !register_operand (op1
, mode1
))
18037 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18038 op1
= copy_to_mode_reg (mode1
, op1
);
18039 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18041 /* @@@ better error message */
18042 error ("mask must be an immediate");
18043 return gen_reg_rtx (tmode
);
18045 if (optimize
|| target
== 0
18046 || GET_MODE (target
) != tmode
18047 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18048 target
= gen_reg_rtx (tmode
);
18049 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18055 case IX86_BUILTIN_PSHUFW
:
18056 case IX86_BUILTIN_PSHUFD
:
18057 case IX86_BUILTIN_PSHUFHW
:
18058 case IX86_BUILTIN_PSHUFLW
:
18059 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
18060 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
18061 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
18062 : CODE_FOR_mmx_pshufw
);
18063 arg0
= CALL_EXPR_ARG (exp
, 0);
18064 arg1
= CALL_EXPR_ARG (exp
, 1);
18065 op0
= expand_normal (arg0
);
18066 op1
= expand_normal (arg1
);
18067 tmode
= insn_data
[icode
].operand
[0].mode
;
18068 mode1
= insn_data
[icode
].operand
[1].mode
;
18069 mode2
= insn_data
[icode
].operand
[2].mode
;
18071 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18072 op0
= copy_to_mode_reg (mode1
, op0
);
18073 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18075 /* @@@ better error message */
18076 error ("mask must be an immediate");
18080 || GET_MODE (target
) != tmode
18081 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18082 target
= gen_reg_rtx (tmode
);
18083 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18089 case IX86_BUILTIN_PSLLDQI128
:
18090 case IX86_BUILTIN_PSRLDQI128
:
18091 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
18092 : CODE_FOR_sse2_lshrti3
);
18093 arg0
= CALL_EXPR_ARG (exp
, 0);
18094 arg1
= CALL_EXPR_ARG (exp
, 1);
18095 op0
= expand_normal (arg0
);
18096 op1
= expand_normal (arg1
);
18097 tmode
= insn_data
[icode
].operand
[0].mode
;
18098 mode1
= insn_data
[icode
].operand
[1].mode
;
18099 mode2
= insn_data
[icode
].operand
[2].mode
;
18101 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18103 op0
= copy_to_reg (op0
);
18104 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18106 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18108 error ("shift must be an immediate");
18111 target
= gen_reg_rtx (V2DImode
);
18112 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
18118 case IX86_BUILTIN_FEMMS
:
18119 emit_insn (gen_mmx_femms ());
18122 case IX86_BUILTIN_PAVGUSB
:
18123 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18125 case IX86_BUILTIN_PF2ID
:
18126 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18128 case IX86_BUILTIN_PFACC
:
18129 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18131 case IX86_BUILTIN_PFADD
:
18132 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18134 case IX86_BUILTIN_PFCMPEQ
:
18135 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18137 case IX86_BUILTIN_PFCMPGE
:
18138 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18140 case IX86_BUILTIN_PFCMPGT
:
18141 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18143 case IX86_BUILTIN_PFMAX
:
18144 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18146 case IX86_BUILTIN_PFMIN
:
18147 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18149 case IX86_BUILTIN_PFMUL
:
18150 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18152 case IX86_BUILTIN_PFRCP
:
18153 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18155 case IX86_BUILTIN_PFRCPIT1
:
18156 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18158 case IX86_BUILTIN_PFRCPIT2
:
18159 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18161 case IX86_BUILTIN_PFRSQIT1
:
18162 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18164 case IX86_BUILTIN_PFRSQRT
:
18165 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18167 case IX86_BUILTIN_PFSUB
:
18168 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18170 case IX86_BUILTIN_PFSUBR
:
18171 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18173 case IX86_BUILTIN_PI2FD
:
18174 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18176 case IX86_BUILTIN_PMULHRW
:
18177 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18179 case IX86_BUILTIN_PF2IW
:
18180 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18182 case IX86_BUILTIN_PFNACC
:
18183 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18185 case IX86_BUILTIN_PFPNACC
:
18186 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18188 case IX86_BUILTIN_PI2FW
:
18189 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18191 case IX86_BUILTIN_PSWAPDSI
:
18192 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18194 case IX86_BUILTIN_PSWAPDSF
:
18195 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18197 case IX86_BUILTIN_SQRTSD
:
18198 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18199 case IX86_BUILTIN_LOADUPD
:
18200 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18201 case IX86_BUILTIN_STOREUPD
:
18202 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18204 case IX86_BUILTIN_MFENCE
:
18205 emit_insn (gen_sse2_mfence ());
18207 case IX86_BUILTIN_LFENCE
:
18208 emit_insn (gen_sse2_lfence ());
18211 case IX86_BUILTIN_CLFLUSH
:
18212 arg0
= CALL_EXPR_ARG (exp
, 0);
18213 op0
= expand_normal (arg0
);
18214 icode
= CODE_FOR_sse2_clflush
;
18215 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18216 op0
= copy_to_mode_reg (Pmode
, op0
);
18218 emit_insn (gen_sse2_clflush (op0
));
18221 case IX86_BUILTIN_MOVNTPD
:
18222 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18223 case IX86_BUILTIN_MOVNTDQ
:
18224 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18225 case IX86_BUILTIN_MOVNTI
:
18226 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18228 case IX86_BUILTIN_LOADDQU
:
18229 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18230 case IX86_BUILTIN_STOREDQU
:
18231 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18233 case IX86_BUILTIN_MONITOR
:
18234 arg0
= CALL_EXPR_ARG (exp
, 0);
18235 arg1
= CALL_EXPR_ARG (exp
, 1);
18236 arg2
= CALL_EXPR_ARG (exp
, 2);
18237 op0
= expand_normal (arg0
);
18238 op1
= expand_normal (arg1
);
18239 op2
= expand_normal (arg2
);
18241 op0
= copy_to_mode_reg (Pmode
, op0
);
18243 op1
= copy_to_mode_reg (SImode
, op1
);
18245 op2
= copy_to_mode_reg (SImode
, op2
);
18247 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18249 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18252 case IX86_BUILTIN_MWAIT
:
18253 arg0
= CALL_EXPR_ARG (exp
, 0);
18254 arg1
= CALL_EXPR_ARG (exp
, 1);
18255 op0
= expand_normal (arg0
);
18256 op1
= expand_normal (arg1
);
18258 op0
= copy_to_mode_reg (SImode
, op0
);
18260 op1
= copy_to_mode_reg (SImode
, op1
);
18261 emit_insn (gen_sse3_mwait (op0
, op1
));
18264 case IX86_BUILTIN_LDDQU
:
18265 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18268 case IX86_BUILTIN_PALIGNR
:
18269 case IX86_BUILTIN_PALIGNR128
:
18270 if (fcode
== IX86_BUILTIN_PALIGNR
)
18272 icode
= CODE_FOR_ssse3_palignrdi
;
18277 icode
= CODE_FOR_ssse3_palignrti
;
18280 arg0
= CALL_EXPR_ARG (exp
, 0);
18281 arg1
= CALL_EXPR_ARG (exp
, 1);
18282 arg2
= CALL_EXPR_ARG (exp
, 2);
18283 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18284 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18285 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18286 tmode
= insn_data
[icode
].operand
[0].mode
;
18287 mode1
= insn_data
[icode
].operand
[1].mode
;
18288 mode2
= insn_data
[icode
].operand
[2].mode
;
18289 mode3
= insn_data
[icode
].operand
[3].mode
;
18291 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18293 op0
= copy_to_reg (op0
);
18294 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18296 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18298 op1
= copy_to_reg (op1
);
18299 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18301 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18303 error ("shift must be an immediate");
18306 target
= gen_reg_rtx (mode
);
18307 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18314 case IX86_BUILTIN_MOVNTSD
:
18315 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18317 case IX86_BUILTIN_MOVNTSS
:
18318 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18320 case IX86_BUILTIN_INSERTQ
:
18321 case IX86_BUILTIN_EXTRQ
:
18322 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18323 ? CODE_FOR_sse4a_extrq
18324 : CODE_FOR_sse4a_insertq
);
18325 arg0
= CALL_EXPR_ARG (exp
, 0);
18326 arg1
= CALL_EXPR_ARG (exp
, 1);
18327 op0
= expand_normal (arg0
);
18328 op1
= expand_normal (arg1
);
18329 tmode
= insn_data
[icode
].operand
[0].mode
;
18330 mode1
= insn_data
[icode
].operand
[1].mode
;
18331 mode2
= insn_data
[icode
].operand
[2].mode
;
18332 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18333 op0
= copy_to_mode_reg (mode1
, op0
);
18334 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18335 op1
= copy_to_mode_reg (mode2
, op1
);
18336 if (optimize
|| target
== 0
18337 || GET_MODE (target
) != tmode
18338 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18339 target
= gen_reg_rtx (tmode
);
18340 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18346 case IX86_BUILTIN_EXTRQI
:
18347 icode
= CODE_FOR_sse4a_extrqi
;
18348 arg0
= CALL_EXPR_ARG (exp
, 0);
18349 arg1
= CALL_EXPR_ARG (exp
, 1);
18350 arg2
= CALL_EXPR_ARG (exp
, 2);
18351 op0
= expand_normal (arg0
);
18352 op1
= expand_normal (arg1
);
18353 op2
= expand_normal (arg2
);
18354 tmode
= insn_data
[icode
].operand
[0].mode
;
18355 mode1
= insn_data
[icode
].operand
[1].mode
;
18356 mode2
= insn_data
[icode
].operand
[2].mode
;
18357 mode3
= insn_data
[icode
].operand
[3].mode
;
18358 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18359 op0
= copy_to_mode_reg (mode1
, op0
);
18360 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18362 error ("index mask must be an immediate");
18363 return gen_reg_rtx (tmode
);
18365 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18367 error ("length mask must be an immediate");
18368 return gen_reg_rtx (tmode
);
18370 if (optimize
|| target
== 0
18371 || GET_MODE (target
) != tmode
18372 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18373 target
= gen_reg_rtx (tmode
);
18374 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18380 case IX86_BUILTIN_INSERTQI
:
18381 icode
= CODE_FOR_sse4a_insertqi
;
18382 arg0
= CALL_EXPR_ARG (exp
, 0);
18383 arg1
= CALL_EXPR_ARG (exp
, 1);
18384 arg2
= CALL_EXPR_ARG (exp
, 2);
18385 arg3
= CALL_EXPR_ARG (exp
, 3);
18386 op0
= expand_normal (arg0
);
18387 op1
= expand_normal (arg1
);
18388 op2
= expand_normal (arg2
);
18389 op3
= expand_normal (arg3
);
18390 tmode
= insn_data
[icode
].operand
[0].mode
;
18391 mode1
= insn_data
[icode
].operand
[1].mode
;
18392 mode2
= insn_data
[icode
].operand
[2].mode
;
18393 mode3
= insn_data
[icode
].operand
[3].mode
;
18394 mode4
= insn_data
[icode
].operand
[4].mode
;
18396 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18397 op0
= copy_to_mode_reg (mode1
, op0
);
18399 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18400 op1
= copy_to_mode_reg (mode2
, op1
);
18402 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18404 error ("index mask must be an immediate");
18405 return gen_reg_rtx (tmode
);
18407 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18409 error ("length mask must be an immediate");
18410 return gen_reg_rtx (tmode
);
18412 if (optimize
|| target
== 0
18413 || GET_MODE (target
) != tmode
18414 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18415 target
= gen_reg_rtx (tmode
);
18416 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18422 case IX86_BUILTIN_VEC_INIT_V2SI
:
18423 case IX86_BUILTIN_VEC_INIT_V4HI
:
18424 case IX86_BUILTIN_VEC_INIT_V8QI
:
18425 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18427 case IX86_BUILTIN_VEC_EXT_V2DF
:
18428 case IX86_BUILTIN_VEC_EXT_V2DI
:
18429 case IX86_BUILTIN_VEC_EXT_V4SF
:
18430 case IX86_BUILTIN_VEC_EXT_V4SI
:
18431 case IX86_BUILTIN_VEC_EXT_V8HI
:
18432 case IX86_BUILTIN_VEC_EXT_V2SI
:
18433 case IX86_BUILTIN_VEC_EXT_V4HI
:
18434 return ix86_expand_vec_ext_builtin (exp
, target
);
18436 case IX86_BUILTIN_VEC_SET_V8HI
:
18437 case IX86_BUILTIN_VEC_SET_V4HI
:
18438 return ix86_expand_vec_set_builtin (exp
);
18444 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18445 if (d
->code
== fcode
)
18447 /* Compares are treated specially. */
18448 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18449 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18450 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18451 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18452 return ix86_expand_sse_compare (d
, exp
, target
);
18454 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18457 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18458 if (d
->code
== fcode
)
18459 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18461 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18462 if (d
->code
== fcode
)
18463 return ix86_expand_sse_comi (d
, exp
, target
);
18465 gcc_unreachable ();
18468 /* Returns a function decl for a vectorized version of the builtin function
18469 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18470 if it is not available. */
18473 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18476 enum machine_mode in_mode
, out_mode
;
18479 if (TREE_CODE (type_out
) != VECTOR_TYPE
18480 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18483 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18484 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18485 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18486 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18490 case BUILT_IN_SQRT
:
18491 if (out_mode
== DFmode
&& out_n
== 2
18492 && in_mode
== DFmode
&& in_n
== 2)
18493 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18496 case BUILT_IN_SQRTF
:
18497 if (out_mode
== SFmode
&& out_n
== 4
18498 && in_mode
== SFmode
&& in_n
== 4)
18499 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18502 case BUILT_IN_LRINTF
:
18503 if (out_mode
== SImode
&& out_n
== 4
18504 && in_mode
== SFmode
&& in_n
== 4)
18505 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18515 /* Returns a decl of a function that implements conversion of the
18516 input vector of type TYPE, or NULL_TREE if it is not available. */
18519 ix86_builtin_conversion (enum tree_code code
, tree type
)
18521 if (TREE_CODE (type
) != VECTOR_TYPE
)
18527 switch (TYPE_MODE (type
))
18530 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18535 case FIX_TRUNC_EXPR
:
18536 switch (TYPE_MODE (type
))
18539 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18549 /* Store OPERAND to the memory after reload is completed. This means
18550 that we can't easily use assign_stack_local. */
18552 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18556 gcc_assert (reload_completed
);
18557 if (TARGET_RED_ZONE
)
18559 result
= gen_rtx_MEM (mode
,
18560 gen_rtx_PLUS (Pmode
,
18562 GEN_INT (-RED_ZONE_SIZE
)));
18563 emit_move_insn (result
, operand
);
18565 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18571 operand
= gen_lowpart (DImode
, operand
);
18575 gen_rtx_SET (VOIDmode
,
18576 gen_rtx_MEM (DImode
,
18577 gen_rtx_PRE_DEC (DImode
,
18578 stack_pointer_rtx
)),
18582 gcc_unreachable ();
18584 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18593 split_di (&operand
, 1, operands
, operands
+ 1);
18595 gen_rtx_SET (VOIDmode
,
18596 gen_rtx_MEM (SImode
,
18597 gen_rtx_PRE_DEC (Pmode
,
18598 stack_pointer_rtx
)),
18601 gen_rtx_SET (VOIDmode
,
18602 gen_rtx_MEM (SImode
,
18603 gen_rtx_PRE_DEC (Pmode
,
18604 stack_pointer_rtx
)),
18609 /* Store HImodes as SImodes. */
18610 operand
= gen_lowpart (SImode
, operand
);
18614 gen_rtx_SET (VOIDmode
,
18615 gen_rtx_MEM (GET_MODE (operand
),
18616 gen_rtx_PRE_DEC (SImode
,
18617 stack_pointer_rtx
)),
18621 gcc_unreachable ();
18623 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18628 /* Free operand from the memory. */
18630 ix86_free_from_memory (enum machine_mode mode
)
18632 if (!TARGET_RED_ZONE
)
18636 if (mode
== DImode
|| TARGET_64BIT
)
18640 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18641 to pop or add instruction if registers are available. */
18642 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18643 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18648 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18649 QImode must go into class Q_REGS.
18650 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18651 movdf to do mem-to-mem moves through integer regs. */
18653 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18655 enum machine_mode mode
= GET_MODE (x
);
18657 /* We're only allowed to return a subclass of CLASS. Many of the
18658 following checks fail for NO_REGS, so eliminate that early. */
18659 if (class == NO_REGS
)
18662 /* All classes can load zeros. */
18663 if (x
== CONST0_RTX (mode
))
18666 /* Force constants into memory if we are loading a (nonzero) constant into
18667 an MMX or SSE register. This is because there are no MMX/SSE instructions
18668 to load from a constant. */
18670 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18673 /* Prefer SSE regs only, if we can use them for math. */
18674 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18675 return SSE_CLASS_P (class) ? class : NO_REGS
;
18677 /* Floating-point constants need more complex checks. */
18678 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18680 /* General regs can load everything. */
18681 if (reg_class_subset_p (class, GENERAL_REGS
))
18684 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18685 zero above. We only want to wind up preferring 80387 registers if
18686 we plan on doing computation with them. */
18688 && standard_80387_constant_p (x
))
18690 /* Limit class to non-sse. */
18691 if (class == FLOAT_SSE_REGS
)
18693 if (class == FP_TOP_SSE_REGS
)
18695 if (class == FP_SECOND_SSE_REGS
)
18696 return FP_SECOND_REG
;
18697 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18704 /* Generally when we see PLUS here, it's the function invariant
18705 (plus soft-fp const_int). Which can only be computed into general
18707 if (GET_CODE (x
) == PLUS
)
18708 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18710 /* QImode constants are easy to load, but non-constant QImode data
18711 must go into Q_REGS. */
18712 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18714 if (reg_class_subset_p (class, Q_REGS
))
18716 if (reg_class_subset_p (Q_REGS
, class))
18724 /* Discourage putting floating-point values in SSE registers unless
18725 SSE math is being used, and likewise for the 387 registers. */
18727 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18729 enum machine_mode mode
= GET_MODE (x
);
18731 /* Restrict the output reload class to the register bank that we are doing
18732 math on. If we would like not to return a subset of CLASS, reject this
18733 alternative: if reload cannot do this, it will still use its choice. */
18734 mode
= GET_MODE (x
);
18735 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18736 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18738 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18740 if (class == FP_TOP_SSE_REGS
)
18742 else if (class == FP_SECOND_SSE_REGS
)
18743 return FP_SECOND_REG
;
18745 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18751 /* If we are copying between general and FP registers, we need a memory
18752 location. The same is true for SSE and MMX registers.
18754 The macro can't work reliably when one of the CLASSES is class containing
18755 registers from multiple units (SSE, MMX, integer). We avoid this by never
18756 combining those units in single alternative in the machine description.
18757 Ensure that this constraint holds to avoid unexpected surprises.
18759 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18760 enforce these sanity checks. */
18763 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18764 enum machine_mode mode
, int strict
)
18766 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18767 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18768 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18769 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18770 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18771 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18773 gcc_assert (!strict
);
18777 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18780 /* ??? This is a lie. We do have moves between mmx/general, and for
18781 mmx/sse2. But by saying we need secondary memory we discourage the
18782 register allocator from using the mmx registers unless needed. */
18783 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18786 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18788 /* SSE1 doesn't have any direct moves from other classes. */
18792 /* If the target says that inter-unit moves are more expensive
18793 than moving through memory, then don't generate them. */
18794 if (!TARGET_INTER_UNIT_MOVES
)
18797 /* Between SSE and general, we have moves no larger than word size. */
18798 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18805 /* Return true if the registers in CLASS cannot represent the change from
18806 modes FROM to TO. */
18809 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18810 enum reg_class
class)
18815 /* x87 registers can't do subreg at all, as all values are reformatted
18816 to extended precision. */
18817 if (MAYBE_FLOAT_CLASS_P (class))
18820 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18822 /* Vector registers do not support QI or HImode loads. If we don't
18823 disallow a change to these modes, reload will assume it's ok to
18824 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18825 the vec_dupv4hi pattern. */
18826 if (GET_MODE_SIZE (from
) < 4)
18829 /* Vector registers do not support subreg with nonzero offsets, which
18830 are otherwise valid for integer registers. Since we can't see
18831 whether we have a nonzero offset from here, prohibit all
18832 nonparadoxical subregs changing size. */
18833 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18840 /* Return the cost of moving data from a register in class CLASS1 to
18841 one in class CLASS2.
18843 It is not required that the cost always equal 2 when FROM is the same as TO;
18844 on some machines it is expensive to move between registers if they are not
18845 general registers. */
18848 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18849 enum reg_class class2
)
18851 /* In case we require secondary memory, compute cost of the store followed
18852 by load. In order to avoid bad register allocation choices, we need
18853 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18855 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18859 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18860 MEMORY_MOVE_COST (mode
, class1
, 1));
18861 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18862 MEMORY_MOVE_COST (mode
, class2
, 1));
18864 /* In case of copying from general_purpose_register we may emit multiple
18865 stores followed by single load causing memory size mismatch stall.
18866 Count this as arbitrarily high cost of 20. */
18867 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18870 /* In the case of FP/MMX moves, the registers actually overlap, and we
18871 have to switch modes in order to treat them differently. */
18872 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18873 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18879 /* Moves between SSE/MMX and integer unit are expensive. */
18880 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18881 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18882 return ix86_cost
->mmxsse_to_integer
;
18883 if (MAYBE_FLOAT_CLASS_P (class1
))
18884 return ix86_cost
->fp_move
;
18885 if (MAYBE_SSE_CLASS_P (class1
))
18886 return ix86_cost
->sse_move
;
18887 if (MAYBE_MMX_CLASS_P (class1
))
18888 return ix86_cost
->mmx_move
;
18892 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18895 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18897 /* Flags and only flags can only hold CCmode values. */
18898 if (CC_REGNO_P (regno
))
18899 return GET_MODE_CLASS (mode
) == MODE_CC
;
18900 if (GET_MODE_CLASS (mode
) == MODE_CC
18901 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18902 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18904 if (FP_REGNO_P (regno
))
18905 return VALID_FP_MODE_P (mode
);
18906 if (SSE_REGNO_P (regno
))
18908 /* We implement the move patterns for all vector modes into and
18909 out of SSE registers, even when no operation instructions
18911 return (VALID_SSE_REG_MODE (mode
)
18912 || VALID_SSE2_REG_MODE (mode
)
18913 || VALID_MMX_REG_MODE (mode
)
18914 || VALID_MMX_REG_MODE_3DNOW (mode
));
18916 if (MMX_REGNO_P (regno
))
18918 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18919 so if the register is available at all, then we can move data of
18920 the given mode into or out of it. */
18921 return (VALID_MMX_REG_MODE (mode
)
18922 || VALID_MMX_REG_MODE_3DNOW (mode
));
18925 if (mode
== QImode
)
18927 /* Take care for QImode values - they can be in non-QI regs,
18928 but then they do cause partial register stalls. */
18929 if (regno
< 4 || TARGET_64BIT
)
18931 if (!TARGET_PARTIAL_REG_STALL
)
18933 return reload_in_progress
|| reload_completed
;
18935 /* We handle both integer and floats in the general purpose registers. */
18936 else if (VALID_INT_MODE_P (mode
))
18938 else if (VALID_FP_MODE_P (mode
))
18940 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18941 on to use that value in smaller contexts, this can easily force a
18942 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18943 supporting DImode, allow it. */
18944 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18950 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18951 tieable integer mode. */
18954 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18963 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18966 return TARGET_64BIT
;
18973 /* Return true if MODE1 is accessible in a register that can hold MODE2
18974 without copying. That is, all register classes that can hold MODE2
18975 can also hold MODE1. */
18978 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18980 if (mode1
== mode2
)
18983 if (ix86_tieable_integer_mode_p (mode1
)
18984 && ix86_tieable_integer_mode_p (mode2
))
18987 /* MODE2 being XFmode implies fp stack or general regs, which means we
18988 can tie any smaller floating point modes to it. Note that we do not
18989 tie this with TFmode. */
18990 if (mode2
== XFmode
)
18991 return mode1
== SFmode
|| mode1
== DFmode
;
18993 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18994 that we can tie it with SFmode. */
18995 if (mode2
== DFmode
)
18996 return mode1
== SFmode
;
18998 /* If MODE2 is only appropriate for an SSE register, then tie with
18999 any other mode acceptable to SSE registers. */
19000 if (GET_MODE_SIZE (mode2
) == 16
19001 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19002 return (GET_MODE_SIZE (mode1
) == 16
19003 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19005 /* If MODE2 is appropriate for an MMX register, then tie
19006 with any other mode acceptable to MMX registers. */
19007 if (GET_MODE_SIZE (mode2
) == 8
19008 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
19009 return (GET_MODE_SIZE (mode1
) == 8
19010 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
19015 /* Return the cost of moving data of mode M between a
19016 register and memory. A value of 2 is the default; this cost is
19017 relative to those in `REGISTER_MOVE_COST'.
19019 If moving between registers and memory is more expensive than
19020 between two registers, you should define this macro to express the
19023 Model also increased moving costs of QImode registers in non
19027 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
19029 if (FLOAT_CLASS_P (class))
19046 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
19048 if (SSE_CLASS_P (class))
19051 switch (GET_MODE_SIZE (mode
))
19065 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
19067 if (MMX_CLASS_P (class))
19070 switch (GET_MODE_SIZE (mode
))
19081 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
19083 switch (GET_MODE_SIZE (mode
))
19087 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
19088 : ix86_cost
->movzbl_load
);
19090 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
19091 : ix86_cost
->int_store
[0] + 4);
19094 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
19096 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19097 if (mode
== TFmode
)
19099 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
19100 * (((int) GET_MODE_SIZE (mode
)
19101 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
19105 /* Compute a (partial) cost for rtx X. Return true if the complete
19106 cost has been computed, and false if subexpressions should be
19107 scanned. In either case, *TOTAL contains the cost result. */
19110 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19112 enum machine_mode mode
= GET_MODE (x
);
19120 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19122 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19124 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19126 || (!GET_CODE (x
) != LABEL_REF
19127 && (GET_CODE (x
) != SYMBOL_REF
19128 || !SYMBOL_REF_LOCAL_P (x
)))))
19135 if (mode
== VOIDmode
)
19138 switch (standard_80387_constant_p (x
))
19143 default: /* Other constants */
19148 /* Start with (MEM (SYMBOL_REF)), since that's where
19149 it'll probably end up. Add a penalty for size. */
19150 *total
= (COSTS_N_INSNS (1)
19151 + (flag_pic
!= 0 && !TARGET_64BIT
)
19152 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19158 /* The zero extensions is often completely free on x86_64, so make
19159 it as cheap as possible. */
19160 if (TARGET_64BIT
&& mode
== DImode
19161 && GET_MODE (XEXP (x
, 0)) == SImode
)
19163 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19164 *total
= ix86_cost
->add
;
19166 *total
= ix86_cost
->movzx
;
19170 *total
= ix86_cost
->movsx
;
19174 if (CONST_INT_P (XEXP (x
, 1))
19175 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19177 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19180 *total
= ix86_cost
->add
;
19183 if ((value
== 2 || value
== 3)
19184 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19186 *total
= ix86_cost
->lea
;
19196 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19198 if (CONST_INT_P (XEXP (x
, 1)))
19200 if (INTVAL (XEXP (x
, 1)) > 32)
19201 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19203 *total
= ix86_cost
->shift_const
* 2;
19207 if (GET_CODE (XEXP (x
, 1)) == AND
)
19208 *total
= ix86_cost
->shift_var
* 2;
19210 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19215 if (CONST_INT_P (XEXP (x
, 1)))
19216 *total
= ix86_cost
->shift_const
;
19218 *total
= ix86_cost
->shift_var
;
19223 if (FLOAT_MODE_P (mode
))
19225 *total
= ix86_cost
->fmul
;
19230 rtx op0
= XEXP (x
, 0);
19231 rtx op1
= XEXP (x
, 1);
19233 if (CONST_INT_P (XEXP (x
, 1)))
19235 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19236 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19240 /* This is arbitrary. */
19243 /* Compute costs correctly for widening multiplication. */
19244 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19245 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19246 == GET_MODE_SIZE (mode
))
19248 int is_mulwiden
= 0;
19249 enum machine_mode inner_mode
= GET_MODE (op0
);
19251 if (GET_CODE (op0
) == GET_CODE (op1
))
19252 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19253 else if (CONST_INT_P (op1
))
19255 if (GET_CODE (op0
) == SIGN_EXTEND
)
19256 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19259 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19263 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19266 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19267 + nbits
* ix86_cost
->mult_bit
19268 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19277 if (FLOAT_MODE_P (mode
))
19278 *total
= ix86_cost
->fdiv
;
19280 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19284 if (FLOAT_MODE_P (mode
))
19285 *total
= ix86_cost
->fadd
;
19286 else if (GET_MODE_CLASS (mode
) == MODE_INT
19287 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19289 if (GET_CODE (XEXP (x
, 0)) == PLUS
19290 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19291 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19292 && CONSTANT_P (XEXP (x
, 1)))
19294 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19295 if (val
== 2 || val
== 4 || val
== 8)
19297 *total
= ix86_cost
->lea
;
19298 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19299 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19301 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19305 else if (GET_CODE (XEXP (x
, 0)) == MULT
19306 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19308 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19309 if (val
== 2 || val
== 4 || val
== 8)
19311 *total
= ix86_cost
->lea
;
19312 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19313 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19317 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19319 *total
= ix86_cost
->lea
;
19320 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19321 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19322 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19329 if (FLOAT_MODE_P (mode
))
19331 *total
= ix86_cost
->fadd
;
19339 if (!TARGET_64BIT
&& mode
== DImode
)
19341 *total
= (ix86_cost
->add
* 2
19342 + (rtx_cost (XEXP (x
, 0), outer_code
)
19343 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19344 + (rtx_cost (XEXP (x
, 1), outer_code
)
19345 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19351 if (FLOAT_MODE_P (mode
))
19353 *total
= ix86_cost
->fchs
;
19359 if (!TARGET_64BIT
&& mode
== DImode
)
19360 *total
= ix86_cost
->add
* 2;
19362 *total
= ix86_cost
->add
;
19366 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19367 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19368 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19369 && XEXP (x
, 1) == const0_rtx
)
19371 /* This kind of construct is implemented using test[bwl].
19372 Treat it as if we had an AND. */
19373 *total
= (ix86_cost
->add
19374 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19375 + rtx_cost (const1_rtx
, outer_code
));
19381 if (!TARGET_SSE_MATH
19383 || (mode
== DFmode
&& !TARGET_SSE2
))
19388 if (FLOAT_MODE_P (mode
))
19389 *total
= ix86_cost
->fabs
;
19393 if (FLOAT_MODE_P (mode
))
19394 *total
= ix86_cost
->fsqrt
;
19398 if (XINT (x
, 1) == UNSPEC_TP
)
19409 static int current_machopic_label_num
;
19411 /* Given a symbol name and its associated stub, write out the
19412 definition of the stub. */
19415 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19417 unsigned int length
;
19418 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19419 int label
= ++current_machopic_label_num
;
19421 /* For 64-bit we shouldn't get here. */
19422 gcc_assert (!TARGET_64BIT
);
19424 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19425 symb
= (*targetm
.strip_name_encoding
) (symb
);
19427 length
= strlen (stub
);
19428 binder_name
= alloca (length
+ 32);
19429 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19431 length
= strlen (symb
);
19432 symbol_name
= alloca (length
+ 32);
19433 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19435 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19438 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19440 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19442 fprintf (file
, "%s:\n", stub
);
19443 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19447 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19448 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19449 fprintf (file
, "\tjmp\t*%%edx\n");
19452 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19454 fprintf (file
, "%s:\n", binder_name
);
19458 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19459 fprintf (file
, "\tpushl\t%%eax\n");
19462 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19464 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19466 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19467 fprintf (file
, "%s:\n", lazy_ptr_name
);
19468 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19469 fprintf (file
, "\t.long %s\n", binder_name
);
19473 darwin_x86_file_end (void)
19475 darwin_file_end ();
19478 #endif /* TARGET_MACHO */
19480 /* Order the registers for register allocator. */
19483 x86_order_regs_for_local_alloc (void)
19488 /* First allocate the local general purpose registers. */
19489 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19490 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19491 reg_alloc_order
[pos
++] = i
;
19493 /* Global general purpose registers. */
19494 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19495 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19496 reg_alloc_order
[pos
++] = i
;
19498 /* x87 registers come first in case we are doing FP math
19500 if (!TARGET_SSE_MATH
)
19501 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19502 reg_alloc_order
[pos
++] = i
;
19504 /* SSE registers. */
19505 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19506 reg_alloc_order
[pos
++] = i
;
19507 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19508 reg_alloc_order
[pos
++] = i
;
19510 /* x87 registers. */
19511 if (TARGET_SSE_MATH
)
19512 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19513 reg_alloc_order
[pos
++] = i
;
19515 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19516 reg_alloc_order
[pos
++] = i
;
19518 /* Initialize the rest of array as we do not allocate some registers
19520 while (pos
< FIRST_PSEUDO_REGISTER
)
19521 reg_alloc_order
[pos
++] = 0;
19524 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19525 struct attribute_spec.handler. */
19527 ix86_handle_struct_attribute (tree
*node
, tree name
,
19528 tree args ATTRIBUTE_UNUSED
,
19529 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19532 if (DECL_P (*node
))
19534 if (TREE_CODE (*node
) == TYPE_DECL
)
19535 type
= &TREE_TYPE (*node
);
19540 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19541 || TREE_CODE (*type
) == UNION_TYPE
)))
19543 warning (OPT_Wattributes
, "%qs attribute ignored",
19544 IDENTIFIER_POINTER (name
));
19545 *no_add_attrs
= true;
19548 else if ((is_attribute_p ("ms_struct", name
)
19549 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19550 || ((is_attribute_p ("gcc_struct", name
)
19551 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19553 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19554 IDENTIFIER_POINTER (name
));
19555 *no_add_attrs
= true;
19562 ix86_ms_bitfield_layout_p (tree record_type
)
19564 return (TARGET_MS_BITFIELD_LAYOUT
&&
19565 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19566 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19569 /* Returns an expression indicating where the this parameter is
19570 located on entry to the FUNCTION. */
19573 x86_this_parameter (tree function
)
19575 tree type
= TREE_TYPE (function
);
19579 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19580 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19583 if (ix86_function_regparm (type
, function
) > 0)
19587 parm
= TYPE_ARG_TYPES (type
);
19588 /* Figure out whether or not the function has a variable number of
19590 for (; parm
; parm
= TREE_CHAIN (parm
))
19591 if (TREE_VALUE (parm
) == void_type_node
)
19593 /* If not, the this parameter is in the first argument. */
19597 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19599 return gen_rtx_REG (SImode
, regno
);
19603 if (aggregate_value_p (TREE_TYPE (type
), type
))
19604 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19606 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19609 /* Determine whether x86_output_mi_thunk can succeed. */
19612 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19613 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19614 HOST_WIDE_INT vcall_offset
, tree function
)
19616 /* 64-bit can handle anything. */
19620 /* For 32-bit, everything's fine if we have one free register. */
19621 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19624 /* Need a free register for vcall_offset. */
19628 /* Need a free register for GOT references. */
19629 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19632 /* Otherwise ok. */
19636 /* Output the assembler code for a thunk function. THUNK_DECL is the
19637 declaration for the thunk function itself, FUNCTION is the decl for
19638 the target function. DELTA is an immediate constant offset to be
19639 added to THIS. If VCALL_OFFSET is nonzero, the word at
19640 *(*this + vcall_offset) should be added to THIS. */
19643 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19644 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19645 HOST_WIDE_INT vcall_offset
, tree function
)
19648 rtx
this = x86_this_parameter (function
);
19651 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19652 pull it in now and let DELTA benefit. */
19655 else if (vcall_offset
)
19657 /* Put the this parameter into %eax. */
19659 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19660 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19663 this_reg
= NULL_RTX
;
19665 /* Adjust the this parameter by a fixed constant. */
19668 xops
[0] = GEN_INT (delta
);
19669 xops
[1] = this_reg
? this_reg
: this;
19672 if (!x86_64_general_operand (xops
[0], DImode
))
19674 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19676 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19680 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19683 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19686 /* Adjust the this parameter by a value stored in the vtable. */
19690 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19693 int tmp_regno
= 2 /* ECX */;
19694 if (lookup_attribute ("fastcall",
19695 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19696 tmp_regno
= 0 /* EAX */;
19697 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19700 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19703 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19705 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19707 /* Adjust the this parameter. */
19708 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19709 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19711 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19712 xops
[0] = GEN_INT (vcall_offset
);
19714 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19715 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19717 xops
[1] = this_reg
;
19719 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19721 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19724 /* If necessary, drop THIS back to its stack slot. */
19725 if (this_reg
&& this_reg
!= this)
19727 xops
[0] = this_reg
;
19729 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19732 xops
[0] = XEXP (DECL_RTL (function
), 0);
19735 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19736 output_asm_insn ("jmp\t%P0", xops
);
19739 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19740 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19741 tmp
= gen_rtx_MEM (QImode
, tmp
);
19743 output_asm_insn ("jmp\t%A0", xops
);
19748 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19749 output_asm_insn ("jmp\t%P0", xops
);
19754 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19755 tmp
= (gen_rtx_SYMBOL_REF
19757 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19758 tmp
= gen_rtx_MEM (QImode
, tmp
);
19760 output_asm_insn ("jmp\t%0", xops
);
19763 #endif /* TARGET_MACHO */
19765 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19766 output_set_got (tmp
, NULL_RTX
);
19769 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19770 output_asm_insn ("jmp\t{*}%1", xops
);
19776 x86_file_start (void)
19778 default_file_start ();
19780 darwin_file_start ();
19782 if (X86_FILE_START_VERSION_DIRECTIVE
)
19783 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19784 if (X86_FILE_START_FLTUSED
)
19785 fputs ("\t.global\t__fltused\n", asm_out_file
);
19786 if (ix86_asm_dialect
== ASM_INTEL
)
19787 fputs ("\t.intel_syntax\n", asm_out_file
);
19791 x86_field_alignment (tree field
, int computed
)
19793 enum machine_mode mode
;
19794 tree type
= TREE_TYPE (field
);
19796 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19798 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19799 ? get_inner_array_type (type
) : type
);
19800 if (mode
== DFmode
|| mode
== DCmode
19801 || GET_MODE_CLASS (mode
) == MODE_INT
19802 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19803 return MIN (32, computed
);
19807 /* Output assembler code to FILE to increment profiler label # LABELNO
19808 for profiling a function entry. */
19810 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19815 #ifndef NO_PROFILE_COUNTERS
19816 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19818 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19822 #ifndef NO_PROFILE_COUNTERS
19823 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19825 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19829 #ifndef NO_PROFILE_COUNTERS
19830 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19831 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19833 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19837 #ifndef NO_PROFILE_COUNTERS
19838 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19839 PROFILE_COUNT_REGISTER
);
19841 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19845 /* We don't have exact information about the insn sizes, but we may assume
19846 quite safely that we are informed about all 1 byte insns and memory
19847 address sizes. This is enough to eliminate unnecessary padding in
19851 min_insn_size (rtx insn
)
19855 if (!INSN_P (insn
) || !active_insn_p (insn
))
19858 /* Discard alignments we've emit and jump instructions. */
19859 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19860 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19863 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19864 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19867 /* Important case - calls are always 5 bytes.
19868 It is common to have many calls in the row. */
19870 && symbolic_reference_mentioned_p (PATTERN (insn
))
19871 && !SIBLING_CALL_P (insn
))
19873 if (get_attr_length (insn
) <= 1)
19876 /* For normal instructions we may rely on the sizes of addresses
19877 and the presence of symbol to require 4 bytes of encoding.
19878 This is not the case for jumps where references are PC relative. */
19879 if (!JUMP_P (insn
))
19881 l
= get_attr_length_address (insn
);
19882 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19891 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19895 ix86_avoid_jump_misspredicts (void)
19897 rtx insn
, start
= get_insns ();
19898 int nbytes
= 0, njumps
= 0;
19901 /* Look for all minimal intervals of instructions containing 4 jumps.
19902 The intervals are bounded by START and INSN. NBYTES is the total
19903 size of instructions in the interval including INSN and not including
19904 START. When the NBYTES is smaller than 16 bytes, it is possible
19905 that the end of START and INSN ends up in the same 16byte page.
19907 The smallest offset in the page INSN can start is the case where START
19908 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19909 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19911 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19914 nbytes
+= min_insn_size (insn
);
19916 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19917 INSN_UID (insn
), min_insn_size (insn
));
19919 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19920 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19928 start
= NEXT_INSN (start
);
19929 if ((JUMP_P (start
)
19930 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19931 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19933 njumps
--, isjump
= 1;
19936 nbytes
-= min_insn_size (start
);
19938 gcc_assert (njumps
>= 0);
19940 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19941 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19943 if (njumps
== 3 && isjump
&& nbytes
< 16)
19945 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19948 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19949 INSN_UID (insn
), padsize
);
19950 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19955 /* AMD Athlon works faster
19956 when RET is not destination of conditional jump or directly preceded
19957 by other jump instruction. We avoid the penalty by inserting NOP just
19958 before the RET instructions in such cases. */
19960 ix86_pad_returns (void)
19965 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19967 basic_block bb
= e
->src
;
19968 rtx ret
= BB_END (bb
);
19970 bool replace
= false;
19972 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19973 || !maybe_hot_bb_p (bb
))
19975 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19976 if (active_insn_p (prev
) || LABEL_P (prev
))
19978 if (prev
&& LABEL_P (prev
))
19983 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19984 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19985 && !(e
->flags
& EDGE_FALLTHRU
))
19990 prev
= prev_active_insn (ret
);
19992 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19995 /* Empty functions get branch mispredict even when the jump destination
19996 is not visible to us. */
19997 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
20002 emit_insn_before (gen_return_internal_long (), ret
);
20008 /* Implement machine specific optimizations. We implement padding of returns
20009 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20013 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
20014 ix86_pad_returns ();
20015 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
20016 ix86_avoid_jump_misspredicts ();
20019 /* Return nonzero when QImode register that must be represented via REX prefix
20022 x86_extended_QIreg_mentioned_p (rtx insn
)
20025 extract_insn_cached (insn
);
20026 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20027 if (REG_P (recog_data
.operand
[i
])
20028 && REGNO (recog_data
.operand
[i
]) >= 4)
20033 /* Return nonzero when P points to register encoded via REX prefix.
20034 Called via for_each_rtx. */
20036 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
20038 unsigned int regno
;
20041 regno
= REGNO (*p
);
20042 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
20045 /* Return true when INSN mentions register that must be encoded using REX
20048 x86_extended_reg_mentioned_p (rtx insn
)
20050 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
20053 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20054 optabs would emit if we didn't have TFmode patterns. */
20057 x86_emit_floatuns (rtx operands
[2])
20059 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
20060 enum machine_mode mode
, inmode
;
20062 inmode
= GET_MODE (operands
[1]);
20063 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20066 in
= force_reg (inmode
, operands
[1]);
20067 mode
= GET_MODE (out
);
20068 neglab
= gen_label_rtx ();
20069 donelab
= gen_label_rtx ();
20070 f0
= gen_reg_rtx (mode
);
20072 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20074 expand_float (out
, in
, 0);
20076 emit_jump_insn (gen_jump (donelab
));
20079 emit_label (neglab
);
20081 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20083 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20085 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20087 expand_float (f0
, i0
, 0);
20089 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20091 emit_label (donelab
);
20094 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20095 with all elements equal to VAR. Return true if successful. */
20098 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
20099 rtx target
, rtx val
)
20101 enum machine_mode smode
, wsmode
, wvmode
;
20116 val
= force_reg (GET_MODE_INNER (mode
), val
);
20117 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20118 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20124 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20126 val
= gen_lowpart (SImode
, val
);
20127 x
= gen_rtx_TRUNCATE (HImode
, val
);
20128 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20129 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20151 /* Extend HImode to SImode using a paradoxical SUBREG. */
20152 tmp1
= gen_reg_rtx (SImode
);
20153 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20154 /* Insert the SImode value as low element of V4SImode vector. */
20155 tmp2
= gen_reg_rtx (V4SImode
);
20156 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20157 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20158 CONST0_RTX (V4SImode
),
20160 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20161 /* Cast the V4SImode vector back to a V8HImode vector. */
20162 tmp1
= gen_reg_rtx (V8HImode
);
20163 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20164 /* Duplicate the low short through the whole low SImode word. */
20165 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20166 /* Cast the V8HImode vector back to a V4SImode vector. */
20167 tmp2
= gen_reg_rtx (V4SImode
);
20168 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20169 /* Replicate the low element of the V4SImode vector. */
20170 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20171 /* Cast the V2SImode back to V8HImode, and store in target. */
20172 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20183 /* Extend QImode to SImode using a paradoxical SUBREG. */
20184 tmp1
= gen_reg_rtx (SImode
);
20185 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20186 /* Insert the SImode value as low element of V4SImode vector. */
20187 tmp2
= gen_reg_rtx (V4SImode
);
20188 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20189 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20190 CONST0_RTX (V4SImode
),
20192 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20193 /* Cast the V4SImode vector back to a V16QImode vector. */
20194 tmp1
= gen_reg_rtx (V16QImode
);
20195 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20196 /* Duplicate the low byte through the whole low SImode word. */
20197 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20198 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20199 /* Cast the V16QImode vector back to a V4SImode vector. */
20200 tmp2
= gen_reg_rtx (V4SImode
);
20201 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20202 /* Replicate the low element of the V4SImode vector. */
20203 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20204 /* Cast the V2SImode back to V16QImode, and store in target. */
20205 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20213 /* Replicate the value once into the next wider mode and recurse. */
20214 val
= convert_modes (wsmode
, smode
, val
, true);
20215 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20216 GEN_INT (GET_MODE_BITSIZE (smode
)),
20217 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20218 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20220 x
= gen_reg_rtx (wvmode
);
20221 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20222 gcc_unreachable ();
20223 emit_move_insn (target
, gen_lowpart (mode
, x
));
20231 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20232 whose ONE_VAR element is VAR, and other elements are zero. Return true
20236 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20237 rtx target
, rtx var
, int one_var
)
20239 enum machine_mode vsimode
;
20255 var
= force_reg (GET_MODE_INNER (mode
), var
);
20256 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20257 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20262 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20263 new_target
= gen_reg_rtx (mode
);
20265 new_target
= target
;
20266 var
= force_reg (GET_MODE_INNER (mode
), var
);
20267 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20268 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20269 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20272 /* We need to shuffle the value to the correct position, so
20273 create a new pseudo to store the intermediate result. */
20275 /* With SSE2, we can use the integer shuffle insns. */
20276 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20278 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20280 GEN_INT (one_var
== 1 ? 0 : 1),
20281 GEN_INT (one_var
== 2 ? 0 : 1),
20282 GEN_INT (one_var
== 3 ? 0 : 1)));
20283 if (target
!= new_target
)
20284 emit_move_insn (target
, new_target
);
20288 /* Otherwise convert the intermediate result to V4SFmode and
20289 use the SSE1 shuffle instructions. */
20290 if (mode
!= V4SFmode
)
20292 tmp
= gen_reg_rtx (V4SFmode
);
20293 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20298 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20300 GEN_INT (one_var
== 1 ? 0 : 1),
20301 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20302 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20304 if (mode
!= V4SFmode
)
20305 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20306 else if (tmp
!= target
)
20307 emit_move_insn (target
, tmp
);
20309 else if (target
!= new_target
)
20310 emit_move_insn (target
, new_target
);
20315 vsimode
= V4SImode
;
20321 vsimode
= V2SImode
;
20327 /* Zero extend the variable element to SImode and recurse. */
20328 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20330 x
= gen_reg_rtx (vsimode
);
20331 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20333 gcc_unreachable ();
20335 emit_move_insn (target
, gen_lowpart (mode
, x
));
20343 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20344 consisting of the values in VALS. It is known that all elements
20345 except ONE_VAR are constants. Return true if successful. */
20348 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20349 rtx target
, rtx vals
, int one_var
)
20351 rtx var
= XVECEXP (vals
, 0, one_var
);
20352 enum machine_mode wmode
;
20355 const_vec
= copy_rtx (vals
);
20356 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20357 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20365 /* For the two element vectors, it's just as easy to use
20366 the general case. */
20382 /* There's no way to set one QImode entry easily. Combine
20383 the variable value with its adjacent constant value, and
20384 promote to an HImode set. */
20385 x
= XVECEXP (vals
, 0, one_var
^ 1);
20388 var
= convert_modes (HImode
, QImode
, var
, true);
20389 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20390 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20391 x
= GEN_INT (INTVAL (x
) & 0xff);
20395 var
= convert_modes (HImode
, QImode
, var
, true);
20396 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20398 if (x
!= const0_rtx
)
20399 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20400 1, OPTAB_LIB_WIDEN
);
20402 x
= gen_reg_rtx (wmode
);
20403 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20404 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20406 emit_move_insn (target
, gen_lowpart (mode
, x
));
20413 emit_move_insn (target
, const_vec
);
20414 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20418 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20419 all values variable, and none identical. */
20422 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20423 rtx target
, rtx vals
)
20425 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20426 rtx op0
= NULL
, op1
= NULL
;
20427 bool use_vec_concat
= false;
20433 if (!mmx_ok
&& !TARGET_SSE
)
20439 /* For the two element vectors, we always implement VEC_CONCAT. */
20440 op0
= XVECEXP (vals
, 0, 0);
20441 op1
= XVECEXP (vals
, 0, 1);
20442 use_vec_concat
= true;
20446 half_mode
= V2SFmode
;
20449 half_mode
= V2SImode
;
20455 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20456 Recurse to load the two halves. */
20458 op0
= gen_reg_rtx (half_mode
);
20459 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20460 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20462 op1
= gen_reg_rtx (half_mode
);
20463 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20464 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20466 use_vec_concat
= true;
20477 gcc_unreachable ();
20480 if (use_vec_concat
)
20482 if (!register_operand (op0
, half_mode
))
20483 op0
= force_reg (half_mode
, op0
);
20484 if (!register_operand (op1
, half_mode
))
20485 op1
= force_reg (half_mode
, op1
);
20487 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20488 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20492 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20493 enum machine_mode inner_mode
;
20494 rtx words
[4], shift
;
20496 inner_mode
= GET_MODE_INNER (mode
);
20497 n_elts
= GET_MODE_NUNITS (mode
);
20498 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20499 n_elt_per_word
= n_elts
/ n_words
;
20500 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20502 for (i
= 0; i
< n_words
; ++i
)
20504 rtx word
= NULL_RTX
;
20506 for (j
= 0; j
< n_elt_per_word
; ++j
)
20508 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20509 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20515 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20516 word
, 1, OPTAB_LIB_WIDEN
);
20517 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20518 word
, 1, OPTAB_LIB_WIDEN
);
20526 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20527 else if (n_words
== 2)
20529 rtx tmp
= gen_reg_rtx (mode
);
20530 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20531 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20532 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20533 emit_move_insn (target
, tmp
);
20535 else if (n_words
== 4)
20537 rtx tmp
= gen_reg_rtx (V4SImode
);
20538 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20539 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20540 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20543 gcc_unreachable ();
20547 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20548 instructions unless MMX_OK is true. */
20551 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20553 enum machine_mode mode
= GET_MODE (target
);
20554 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20555 int n_elts
= GET_MODE_NUNITS (mode
);
20556 int n_var
= 0, one_var
= -1;
20557 bool all_same
= true, all_const_zero
= true;
20561 for (i
= 0; i
< n_elts
; ++i
)
20563 x
= XVECEXP (vals
, 0, i
);
20564 if (!CONSTANT_P (x
))
20565 n_var
++, one_var
= i
;
20566 else if (x
!= CONST0_RTX (inner_mode
))
20567 all_const_zero
= false;
20568 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20572 /* Constants are best loaded from the constant pool. */
20575 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20579 /* If all values are identical, broadcast the value. */
20581 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20582 XVECEXP (vals
, 0, 0)))
20585 /* Values where only one field is non-constant are best loaded from
20586 the pool and overwritten via move later. */
20590 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20591 XVECEXP (vals
, 0, one_var
),
20595 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20599 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20603 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20605 enum machine_mode mode
= GET_MODE (target
);
20606 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20607 bool use_vec_merge
= false;
20616 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20617 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20619 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20621 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20622 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20632 /* For the two element vectors, we implement a VEC_CONCAT with
20633 the extraction of the other element. */
20635 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20636 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20639 op0
= val
, op1
= tmp
;
20641 op0
= tmp
, op1
= val
;
20643 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20644 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20652 use_vec_merge
= true;
20656 /* tmp = target = A B C D */
20657 tmp
= copy_to_reg (target
);
20658 /* target = A A B B */
20659 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20660 /* target = X A B B */
20661 ix86_expand_vector_set (false, target
, val
, 0);
20662 /* target = A X C D */
20663 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20664 GEN_INT (1), GEN_INT (0),
20665 GEN_INT (2+4), GEN_INT (3+4)));
20669 /* tmp = target = A B C D */
20670 tmp
= copy_to_reg (target
);
20671 /* tmp = X B C D */
20672 ix86_expand_vector_set (false, tmp
, val
, 0);
20673 /* target = A B X D */
20674 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20675 GEN_INT (0), GEN_INT (1),
20676 GEN_INT (0+4), GEN_INT (3+4)));
20680 /* tmp = target = A B C D */
20681 tmp
= copy_to_reg (target
);
20682 /* tmp = X B C D */
20683 ix86_expand_vector_set (false, tmp
, val
, 0);
20684 /* target = A B X D */
20685 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20686 GEN_INT (0), GEN_INT (1),
20687 GEN_INT (2+4), GEN_INT (0+4)));
20691 gcc_unreachable ();
20696 /* Element 0 handled by vec_merge below. */
20699 use_vec_merge
= true;
20705 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20706 store into element 0, then shuffle them back. */
20710 order
[0] = GEN_INT (elt
);
20711 order
[1] = const1_rtx
;
20712 order
[2] = const2_rtx
;
20713 order
[3] = GEN_INT (3);
20714 order
[elt
] = const0_rtx
;
20716 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20717 order
[1], order
[2], order
[3]));
20719 ix86_expand_vector_set (false, target
, val
, 0);
20721 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20722 order
[1], order
[2], order
[3]));
20726 /* For SSE1, we have to reuse the V4SF code. */
20727 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20728 gen_lowpart (SFmode
, val
), elt
);
20733 use_vec_merge
= TARGET_SSE2
;
20736 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20747 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20748 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20749 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20753 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20755 emit_move_insn (mem
, target
);
20757 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20758 emit_move_insn (tmp
, val
);
20760 emit_move_insn (target
, mem
);
20765 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20767 enum machine_mode mode
= GET_MODE (vec
);
20768 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20769 bool use_vec_extr
= false;
20782 use_vec_extr
= true;
20794 tmp
= gen_reg_rtx (mode
);
20795 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20796 GEN_INT (elt
), GEN_INT (elt
),
20797 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20801 tmp
= gen_reg_rtx (mode
);
20802 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20806 gcc_unreachable ();
20809 use_vec_extr
= true;
20824 tmp
= gen_reg_rtx (mode
);
20825 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20826 GEN_INT (elt
), GEN_INT (elt
),
20827 GEN_INT (elt
), GEN_INT (elt
)));
20831 tmp
= gen_reg_rtx (mode
);
20832 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20836 gcc_unreachable ();
20839 use_vec_extr
= true;
20844 /* For SSE1, we have to reuse the V4SF code. */
20845 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20846 gen_lowpart (V4SFmode
, vec
), elt
);
20852 use_vec_extr
= TARGET_SSE2
;
20855 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20860 /* ??? Could extract the appropriate HImode element and shift. */
20867 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20868 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20870 /* Let the rtl optimizers know about the zero extension performed. */
20871 if (inner_mode
== HImode
)
20873 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20874 target
= gen_lowpart (SImode
, target
);
20877 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20881 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20883 emit_move_insn (mem
, vec
);
20885 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20886 emit_move_insn (target
, tmp
);
20890 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20891 pattern to reduce; DEST is the destination; IN is the input vector. */
20894 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20896 rtx tmp1
, tmp2
, tmp3
;
20898 tmp1
= gen_reg_rtx (V4SFmode
);
20899 tmp2
= gen_reg_rtx (V4SFmode
);
20900 tmp3
= gen_reg_rtx (V4SFmode
);
20902 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20903 emit_insn (fn (tmp2
, tmp1
, in
));
20905 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20906 GEN_INT (1), GEN_INT (1),
20907 GEN_INT (1+4), GEN_INT (1+4)));
20908 emit_insn (fn (dest
, tmp2
, tmp3
));
20911 /* Target hook for scalar_mode_supported_p. */
20913 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20915 if (DECIMAL_FLOAT_MODE_P (mode
))
20918 return default_scalar_mode_supported_p (mode
);
20921 /* Implements target hook vector_mode_supported_p. */
20923 ix86_vector_mode_supported_p (enum machine_mode mode
)
20925 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20927 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20929 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20931 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20936 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20938 We do this in the new i386 backend to maintain source compatibility
20939 with the old cc0-based compiler. */
20942 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20943 tree inputs ATTRIBUTE_UNUSED
,
20946 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20948 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20953 /* Return true if this goes in small data/bss. */
20956 ix86_in_large_data_p (tree exp
)
20958 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20961 /* Functions are never large data. */
20962 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20965 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20967 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20968 if (strcmp (section
, ".ldata") == 0
20969 || strcmp (section
, ".lbss") == 0)
20975 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20977 /* If this is an incomplete type with size 0, then we can't put it
20978 in data because it might be too big when completed. */
20979 if (!size
|| size
> ix86_section_threshold
)
20986 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20988 default_encode_section_info (decl
, rtl
, first
);
20990 if (TREE_CODE (decl
) == VAR_DECL
20991 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20992 && ix86_in_large_data_p (decl
))
20993 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20996 /* Worker function for REVERSE_CONDITION. */
20999 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
21001 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
21002 ? reverse_condition (code
)
21003 : reverse_condition_maybe_unordered (code
));
21006 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21010 output_387_reg_move (rtx insn
, rtx
*operands
)
21012 if (REG_P (operands
[1])
21013 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21015 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
21016 return output_387_ffreep (operands
, 0);
21017 return "fstp\t%y0";
21019 if (STACK_TOP_P (operands
[0]))
21020 return "fld%z1\t%y1";
21024 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21025 FP status register is set. */
21028 ix86_emit_fp_unordered_jump (rtx label
)
21030 rtx reg
= gen_reg_rtx (HImode
);
21033 emit_insn (gen_x86_fnstsw_1 (reg
));
21035 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
21037 emit_insn (gen_x86_sahf_1 (reg
));
21039 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21040 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
21044 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
21046 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21047 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
21050 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
21051 gen_rtx_LABEL_REF (VOIDmode
, label
),
21053 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
21055 emit_jump_insn (temp
);
21056 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21059 /* Output code to perform a log1p XFmode calculation. */
21061 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
21063 rtx label1
= gen_label_rtx ();
21064 rtx label2
= gen_label_rtx ();
21066 rtx tmp
= gen_reg_rtx (XFmode
);
21067 rtx tmp2
= gen_reg_rtx (XFmode
);
21069 emit_insn (gen_absxf2 (tmp
, op1
));
21070 emit_insn (gen_cmpxf (tmp
,
21071 CONST_DOUBLE_FROM_REAL_VALUE (
21072 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
21074 emit_jump_insn (gen_bge (label1
));
21076 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21077 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
21078 emit_jump (label2
);
21080 emit_label (label1
);
21081 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
21082 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
21083 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21084 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
21086 emit_label (label2
);
21089 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21092 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21095 /* With Binutils 2.15, the "@unwind" marker must be specified on
21096 every occurrence of the ".eh_frame" section, not just the first
21099 && strcmp (name
, ".eh_frame") == 0)
21101 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21102 flags
& SECTION_WRITE
? "aw" : "a");
21105 default_elf_asm_named_section (name
, flags
, decl
);
21108 /* Return the mangling of TYPE if it is an extended fundamental type. */
21110 static const char *
21111 ix86_mangle_fundamental_type (tree type
)
21113 switch (TYPE_MODE (type
))
21116 /* __float128 is "g". */
21119 /* "long double" or __float80 is "e". */
21126 /* For 32-bit code we can save PIC register setup by using
21127 __stack_chk_fail_local hidden function instead of calling
21128 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21129 register, so it is better to call __stack_chk_fail directly. */
21132 ix86_stack_protect_fail (void)
21134 return TARGET_64BIT
21135 ? default_external_stack_protect_fail ()
21136 : default_hidden_stack_protect_fail ();
21139 /* Select a format to encode pointers in exception handling data. CODE
21140 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21141 true if the symbol may be affected by dynamic relocations.
21143 ??? All x86 object file formats are capable of representing this.
21144 After all, the relocation needed is the same as for the call insn.
21145 Whether or not a particular assembler allows us to enter such, I
21146 guess we'll have to see. */
21148 asm_preferred_eh_data_format (int code
, int global
)
21152 int type
= DW_EH_PE_sdata8
;
21154 || ix86_cmodel
== CM_SMALL_PIC
21155 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21156 type
= DW_EH_PE_sdata4
;
21157 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21159 if (ix86_cmodel
== CM_SMALL
21160 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21161 return DW_EH_PE_udata4
;
21162 return DW_EH_PE_absptr
;
21165 /* Expand copysign from SIGN to the positive value ABS_VALUE
21166 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21169 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21171 enum machine_mode mode
= GET_MODE (sign
);
21172 rtx sgn
= gen_reg_rtx (mode
);
21173 if (mask
== NULL_RTX
)
21175 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21176 if (!VECTOR_MODE_P (mode
))
21178 /* We need to generate a scalar mode mask in this case. */
21179 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21180 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21181 mask
= gen_reg_rtx (mode
);
21182 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21186 mask
= gen_rtx_NOT (mode
, mask
);
21187 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21188 gen_rtx_AND (mode
, mask
, sign
)));
21189 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21190 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21193 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21194 mask for masking out the sign-bit is stored in *SMASK, if that is
21197 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21199 enum machine_mode mode
= GET_MODE (op0
);
21202 xa
= gen_reg_rtx (mode
);
21203 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21204 if (!VECTOR_MODE_P (mode
))
21206 /* We need to generate a scalar mode mask in this case. */
21207 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21208 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21209 mask
= gen_reg_rtx (mode
);
21210 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21212 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21213 gen_rtx_AND (mode
, op0
, mask
)));
21221 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21222 swapping the operands if SWAP_OPERANDS is true. The expanded
21223 code is a forward jump to a newly created label in case the
21224 comparison is true. The generated label rtx is returned. */
21226 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21227 bool swap_operands
)
21238 label
= gen_label_rtx ();
21239 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21240 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21241 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21242 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21243 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21244 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21245 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21246 JUMP_LABEL (tmp
) = label
;
21251 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21252 using comparison code CODE. Operands are swapped for the comparison if
21253 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21255 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21256 bool swap_operands
)
21258 enum machine_mode mode
= GET_MODE (op0
);
21259 rtx mask
= gen_reg_rtx (mode
);
21268 if (mode
== DFmode
)
21269 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21270 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21272 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21273 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21278 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21279 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21281 ix86_gen_TWO52 (enum machine_mode mode
)
21283 REAL_VALUE_TYPE TWO52r
;
21286 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21287 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21288 TWO52
= force_reg (mode
, TWO52
);
21293 /* Expand SSE sequence for computing lround from OP1 storing
21296 ix86_expand_lround (rtx op0
, rtx op1
)
21298 /* C code for the stuff we're doing below:
21299 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21302 enum machine_mode mode
= GET_MODE (op1
);
21303 const struct real_format
*fmt
;
21304 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21307 /* load nextafter (0.5, 0.0) */
21308 fmt
= REAL_MODE_FORMAT (mode
);
21309 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21310 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21312 /* adj = copysign (0.5, op1) */
21313 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21314 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21316 /* adj = op1 + adj */
21317 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21319 /* op0 = (imode)adj */
21320 expand_fix (op0
, adj
, 0);
21323 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21326 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21328 /* C code for the stuff we're doing below (for do_floor):
21330 xi -= (double)xi > op1 ? 1 : 0;
21333 enum machine_mode fmode
= GET_MODE (op1
);
21334 enum machine_mode imode
= GET_MODE (op0
);
21335 rtx ireg
, freg
, label
, tmp
;
21337 /* reg = (long)op1 */
21338 ireg
= gen_reg_rtx (imode
);
21339 expand_fix (ireg
, op1
, 0);
21341 /* freg = (double)reg */
21342 freg
= gen_reg_rtx (fmode
);
21343 expand_float (freg
, ireg
, 0);
21345 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21346 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21347 freg
, op1
, !do_floor
);
21348 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21349 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21350 emit_move_insn (ireg
, tmp
);
21352 emit_label (label
);
21353 LABEL_NUSES (label
) = 1;
21355 emit_move_insn (op0
, ireg
);
21358 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21359 result in OPERAND0. */
21361 ix86_expand_rint (rtx operand0
, rtx operand1
)
21363 /* C code for the stuff we're doing below:
21364 xa = fabs (operand1);
21365 if (!isless (xa, 2**52))
21367 xa = xa + 2**52 - 2**52;
21368 return copysign (xa, operand1);
21370 enum machine_mode mode
= GET_MODE (operand0
);
21371 rtx res
, xa
, label
, TWO52
, mask
;
21373 res
= gen_reg_rtx (mode
);
21374 emit_move_insn (res
, operand1
);
21376 /* xa = abs (operand1) */
21377 xa
= ix86_expand_sse_fabs (res
, &mask
);
21379 /* if (!isless (xa, TWO52)) goto label; */
21380 TWO52
= ix86_gen_TWO52 (mode
);
21381 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21383 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21384 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21386 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21388 emit_label (label
);
21389 LABEL_NUSES (label
) = 1;
21391 emit_move_insn (operand0
, res
);
21394 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21397 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21399 /* C code for the stuff we expand below.
21400 double xa = fabs (x), x2;
21401 if (!isless (xa, TWO52))
21403 xa = xa + TWO52 - TWO52;
21404 x2 = copysign (xa, x);
21413 enum machine_mode mode
= GET_MODE (operand0
);
21414 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21416 TWO52
= ix86_gen_TWO52 (mode
);
21418 /* Temporary for holding the result, initialized to the input
21419 operand to ease control flow. */
21420 res
= gen_reg_rtx (mode
);
21421 emit_move_insn (res
, operand1
);
21423 /* xa = abs (operand1) */
21424 xa
= ix86_expand_sse_fabs (res
, &mask
);
21426 /* if (!isless (xa, TWO52)) goto label; */
21427 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21429 /* xa = xa + TWO52 - TWO52; */
21430 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21431 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21433 /* xa = copysign (xa, operand1) */
21434 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21436 /* generate 1.0 or -1.0 */
21437 one
= force_reg (mode
,
21438 const_double_from_real_value (do_floor
21439 ? dconst1
: dconstm1
, mode
));
21441 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21442 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21443 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21444 gen_rtx_AND (mode
, one
, tmp
)));
21445 /* We always need to subtract here to preserve signed zero. */
21446 tmp
= expand_simple_binop (mode
, MINUS
,
21447 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21448 emit_move_insn (res
, tmp
);
21450 emit_label (label
);
21451 LABEL_NUSES (label
) = 1;
21453 emit_move_insn (operand0
, res
);
21456 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21459 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21461 /* C code for the stuff we expand below.
21462 double xa = fabs (x), x2;
21463 if (!isless (xa, TWO52))
21465 x2 = (double)(long)x;
21472 if (HONOR_SIGNED_ZEROS (mode))
21473 return copysign (x2, x);
21476 enum machine_mode mode
= GET_MODE (operand0
);
21477 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21479 TWO52
= ix86_gen_TWO52 (mode
);
21481 /* Temporary for holding the result, initialized to the input
21482 operand to ease control flow. */
21483 res
= gen_reg_rtx (mode
);
21484 emit_move_insn (res
, operand1
);
21486 /* xa = abs (operand1) */
21487 xa
= ix86_expand_sse_fabs (res
, &mask
);
21489 /* if (!isless (xa, TWO52)) goto label; */
21490 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21492 /* xa = (double)(long)x */
21493 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21494 expand_fix (xi
, res
, 0);
21495 expand_float (xa
, xi
, 0);
21498 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21500 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21501 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21502 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21503 gen_rtx_AND (mode
, one
, tmp
)));
21504 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21505 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21506 emit_move_insn (res
, tmp
);
21508 if (HONOR_SIGNED_ZEROS (mode
))
21509 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21511 emit_label (label
);
21512 LABEL_NUSES (label
) = 1;
21514 emit_move_insn (operand0
, res
);
21517 /* Expand SSE sequence for computing round from OPERAND1 storing
21518 into OPERAND0. Sequence that works without relying on DImode truncation
21519 via cvttsd2siq that is only available on 64bit targets. */
21521 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21523 /* C code for the stuff we expand below.
21524 double xa = fabs (x), xa2, x2;
21525 if (!isless (xa, TWO52))
21527 Using the absolute value and copying back sign makes
21528 -0.0 -> -0.0 correct.
21529 xa2 = xa + TWO52 - TWO52;
21534 else if (dxa > 0.5)
21536 x2 = copysign (xa2, x);
21539 enum machine_mode mode
= GET_MODE (operand0
);
21540 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21542 TWO52
= ix86_gen_TWO52 (mode
);
21544 /* Temporary for holding the result, initialized to the input
21545 operand to ease control flow. */
21546 res
= gen_reg_rtx (mode
);
21547 emit_move_insn (res
, operand1
);
21549 /* xa = abs (operand1) */
21550 xa
= ix86_expand_sse_fabs (res
, &mask
);
21552 /* if (!isless (xa, TWO52)) goto label; */
21553 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21555 /* xa2 = xa + TWO52 - TWO52; */
21556 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21557 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21559 /* dxa = xa2 - xa; */
21560 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21562 /* generate 0.5, 1.0 and -0.5 */
21563 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21564 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21565 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21569 tmp
= gen_reg_rtx (mode
);
21570 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21571 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21572 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21573 gen_rtx_AND (mode
, one
, tmp
)));
21574 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21575 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21576 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21577 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21578 gen_rtx_AND (mode
, one
, tmp
)));
21579 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21581 /* res = copysign (xa2, operand1) */
21582 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21584 emit_label (label
);
21585 LABEL_NUSES (label
) = 1;
21587 emit_move_insn (operand0
, res
);
21590 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21593 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21595 /* C code for SSE variant we expand below.
21596 double xa = fabs (x), x2;
21597 if (!isless (xa, TWO52))
21599 x2 = (double)(long)x;
21600 if (HONOR_SIGNED_ZEROS (mode))
21601 return copysign (x2, x);
21604 enum machine_mode mode
= GET_MODE (operand0
);
21605 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21607 TWO52
= ix86_gen_TWO52 (mode
);
21609 /* Temporary for holding the result, initialized to the input
21610 operand to ease control flow. */
21611 res
= gen_reg_rtx (mode
);
21612 emit_move_insn (res
, operand1
);
21614 /* xa = abs (operand1) */
21615 xa
= ix86_expand_sse_fabs (res
, &mask
);
21617 /* if (!isless (xa, TWO52)) goto label; */
21618 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21620 /* x = (double)(long)x */
21621 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21622 expand_fix (xi
, res
, 0);
21623 expand_float (res
, xi
, 0);
21625 if (HONOR_SIGNED_ZEROS (mode
))
21626 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21628 emit_label (label
);
21629 LABEL_NUSES (label
) = 1;
21631 emit_move_insn (operand0
, res
);
21634 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21637 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21639 enum machine_mode mode
= GET_MODE (operand0
);
21640 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21642 /* C code for SSE variant we expand below.
21643 double xa = fabs (x), x2;
21644 if (!isless (xa, TWO52))
21646 xa2 = xa + TWO52 - TWO52;
21650 x2 = copysign (xa2, x);
21654 TWO52
= ix86_gen_TWO52 (mode
);
21656 /* Temporary for holding the result, initialized to the input
21657 operand to ease control flow. */
21658 res
= gen_reg_rtx (mode
);
21659 emit_move_insn (res
, operand1
);
21661 /* xa = abs (operand1) */
21662 xa
= ix86_expand_sse_fabs (res
, &smask
);
21664 /* if (!isless (xa, TWO52)) goto label; */
21665 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21667 /* res = xa + TWO52 - TWO52; */
21668 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21669 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21670 emit_move_insn (res
, tmp
);
21673 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21675 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21676 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21677 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21678 gen_rtx_AND (mode
, mask
, one
)));
21679 tmp
= expand_simple_binop (mode
, MINUS
,
21680 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21681 emit_move_insn (res
, tmp
);
21683 /* res = copysign (res, operand1) */
21684 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21686 emit_label (label
);
21687 LABEL_NUSES (label
) = 1;
21689 emit_move_insn (operand0
, res
);
21692 /* Expand SSE sequence for computing round from OPERAND1 storing
21695 ix86_expand_round (rtx operand0
, rtx operand1
)
21697 /* C code for the stuff we're doing below:
21698 double xa = fabs (x);
21699 if (!isless (xa, TWO52))
21701 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21702 return copysign (xa, x);
21704 enum machine_mode mode
= GET_MODE (operand0
);
21705 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21706 const struct real_format
*fmt
;
21707 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21709 /* Temporary for holding the result, initialized to the input
21710 operand to ease control flow. */
21711 res
= gen_reg_rtx (mode
);
21712 emit_move_insn (res
, operand1
);
21714 TWO52
= ix86_gen_TWO52 (mode
);
21715 xa
= ix86_expand_sse_fabs (res
, &mask
);
21716 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21718 /* load nextafter (0.5, 0.0) */
21719 fmt
= REAL_MODE_FORMAT (mode
);
21720 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21721 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21723 /* xa = xa + 0.5 */
21724 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21725 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21727 /* xa = (double)(int64_t)xa */
21728 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21729 expand_fix (xi
, xa
, 0);
21730 expand_float (xa
, xi
, 0);
21732 /* res = copysign (xa, operand1) */
21733 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21735 emit_label (label
);
21736 LABEL_NUSES (label
) = 1;
21738 emit_move_insn (operand0
, res
);
21741 #include "gt-i386.h"