1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_fisttp
= m_NOCONA
;
529 const int x86_3dnow_a
= m_ATHLON_K8
;
530 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints
= 0;
536 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
537 const int x86_partial_reg_stall
= m_PPRO
;
538 const int x86_use_loop
= m_K6
;
539 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6
;
540 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
541 const int x86_use_mov0
= m_K6
;
542 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
543 const int x86_read_modify_write
= ~m_PENT
;
544 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
545 const int x86_split_long_moves
= m_PPRO
;
546 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
547 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
548 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
549 const int x86_qimode_math
= ~(0);
550 const int x86_promote_qi_regs
= 0;
551 const int x86_himode_math
= ~(m_PPRO
);
552 const int x86_promote_hi_regs
= m_PPRO
;
553 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
554 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
555 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
556 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
557 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
558 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
559 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
560 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
561 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
562 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
563 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
564 const int x86_shift1
= ~m_486
;
565 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
566 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
571 const int x86_sse_split_regs
= m_ATHLON_K8
;
572 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
573 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
574 const int x86_use_ffreep
= m_ATHLON_K8
;
575 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
585 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6
| m_PENT
;
586 const int x86_use_bt
= m_ATHLON_K8
;
587 /* Compare and exchange was added for 80486. */
588 const int x86_cmpxchg
= ~m_386
;
589 /* Exchange and add was added for 80486. */
590 const int x86_xadd
= ~m_386
;
592 /* In case the average insn count for single function invocation is
593 lower than this constant, emit fast (but longer) prologue and
595 #define FAST_PROLOGUE_INSN_COUNT 20
597 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
598 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
599 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
600 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
602 /* Array of the smallest class containing reg number REGNO, indexed by
603 REGNO. Used by REGNO_REG_CLASS in i386.h. */
605 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
608 AREG
, DREG
, CREG
, BREG
,
610 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
612 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
613 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
616 /* flags, fpsr, dirflag, frame */
617 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
618 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
620 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
622 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
623 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
624 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
628 /* The "default" register map used in 32bit mode. */
630 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
632 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
633 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
634 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
635 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
636 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
638 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
641 static int const x86_64_int_parameter_registers
[6] =
643 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
647 static int const x86_64_int_return_registers
[4] =
649 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
652 /* The "default" register map used in 64bit mode. */
653 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
655 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
656 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
657 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
658 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
659 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
660 8,9,10,11,12,13,14,15, /* extended integer registers */
661 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
664 /* Define the register numbers to be used in Dwarf debugging information.
665 The SVR4 reference port C compiler uses the following register numbers
666 in its Dwarf output code:
667 0 for %eax (gcc regno = 0)
668 1 for %ecx (gcc regno = 2)
669 2 for %edx (gcc regno = 1)
670 3 for %ebx (gcc regno = 3)
671 4 for %esp (gcc regno = 7)
672 5 for %ebp (gcc regno = 6)
673 6 for %esi (gcc regno = 4)
674 7 for %edi (gcc regno = 5)
675 The following three DWARF register numbers are never generated by
676 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677 believes these numbers have these meanings.
678 8 for %eip (no gcc equivalent)
679 9 for %eflags (gcc regno = 17)
680 10 for %trapno (no gcc equivalent)
681 It is not at all clear how we should number the FP stack registers
682 for the x86 architecture. If the version of SDB on x86/svr4 were
683 a bit less brain dead with respect to floating-point then we would
684 have a precedent to follow with respect to DWARF register numbers
685 for x86 FP registers, but the SDB on x86/svr4 is so completely
686 broken with respect to FP registers that it is hardly worth thinking
687 of it as something to strive for compatibility with.
688 The version of x86/svr4 SDB I have at the moment does (partially)
689 seem to believe that DWARF register number 11 is associated with
690 the x86 register %st(0), but that's about all. Higher DWARF
691 register numbers don't seem to be associated with anything in
692 particular, and even for DWARF regno 11, SDB only seems to under-
693 stand that it should say that a variable lives in %st(0) (when
694 asked via an `=' command) if we said it was in DWARF regno 11,
695 but SDB still prints garbage when asked for the value of the
696 variable in question (via a `/' command).
697 (Also note that the labels SDB prints for various FP stack regs
698 when doing an `x' command are all wrong.)
699 Note that these problems generally don't affect the native SVR4
700 C compiler because it doesn't allow the use of -O with -g and
701 because when it is *not* optimizing, it allocates a memory
702 location for each floating-point variable, and the memory
703 location is what gets described in the DWARF AT_location
704 attribute for the variable in question.
705 Regardless of the severe mental illness of the x86/svr4 SDB, we
706 do something sensible here and we use the following DWARF
707 register numbers. Note that these are all stack-top-relative
709 11 for %st(0) (gcc regno = 8)
710 12 for %st(1) (gcc regno = 9)
711 13 for %st(2) (gcc regno = 10)
712 14 for %st(3) (gcc regno = 11)
713 15 for %st(4) (gcc regno = 12)
714 16 for %st(5) (gcc regno = 13)
715 17 for %st(6) (gcc regno = 14)
716 18 for %st(7) (gcc regno = 15)
718 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
720 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
721 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
722 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
723 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
724 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
726 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
729 /* Test and compare insns in i386.md store the information needed to
730 generate branch and scc insns here. */
732 rtx ix86_compare_op0
= NULL_RTX
;
733 rtx ix86_compare_op1
= NULL_RTX
;
734 rtx ix86_compare_emitted
= NULL_RTX
;
736 #define MAX_386_STACK_LOCALS 3
737 /* Size of the register save area. */
738 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
740 /* Define the structure for the machine field in struct function. */
742 struct stack_local_entry
GTY(())
747 struct stack_local_entry
*next
;
750 /* Structure describing stack frame layout.
751 Stack grows downward:
757 saved frame pointer if frame_pointer_needed
758 <- HARD_FRAME_POINTER
764 > to_allocate <- FRAME_POINTER
776 int outgoing_arguments_size
;
779 HOST_WIDE_INT to_allocate
;
780 /* The offsets relative to ARG_POINTER. */
781 HOST_WIDE_INT frame_pointer_offset
;
782 HOST_WIDE_INT hard_frame_pointer_offset
;
783 HOST_WIDE_INT stack_pointer_offset
;
785 /* When save_regs_using_mov is set, emit prologue using
786 move instead of push instructions. */
787 bool save_regs_using_mov
;
790 /* Code model option as passed by user. */
791 static const char *ix86_cmodel_string
;
793 enum cmodel ix86_cmodel
;
795 static const char *ix86_asm_string
;
796 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
798 static const char *ix86_tls_dialect_string
;
799 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
801 /* Which unit we are generating floating point math for. */
802 enum fpmath_unit ix86_fpmath
;
804 /* Which cpu are we scheduling for. */
805 enum processor_type ix86_tune
;
806 /* Which instruction set architecture to use. */
807 enum processor_type ix86_arch
;
809 /* Strings to hold which cpu and instruction set architecture to use. */
810 const char *ix86_tune_string
; /* for -mtune=<xxx> */
811 const char *ix86_arch_string
; /* for -march=<xxx> */
812 static const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
814 /* # of registers to use to pass arguments. */
815 static const char *ix86_regparm_string
;
817 /* true if sse prefetch instruction is not NOOP. */
818 int x86_prefetch_sse
;
820 /* ix86_regparm_string as a number */
821 static int ix86_regparm
;
823 /* Alignment to use for loops and jumps: */
825 /* Power of two alignment for loops. */
826 static const char *ix86_align_loops_string
;
828 /* Power of two alignment for non-loop jumps. */
829 static const char *ix86_align_jumps_string
;
831 /* Power of two alignment for stack boundary in bytes. */
832 static const char *ix86_preferred_stack_boundary_string
;
834 /* Preferred alignment for stack boundary in bits. */
835 unsigned int ix86_preferred_stack_boundary
;
837 /* Values 1-5: see jump.c */
838 int ix86_branch_cost
;
839 static const char *ix86_branch_cost_string
;
841 /* Power of two alignment for functions. */
842 static const char *ix86_align_funcs_string
;
844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
845 char internal_label_prefix
[16];
846 int internal_label_prefix_len
;
848 static bool ix86_handle_option (size_t, const char *, int);
849 static void output_pic_addr_const (FILE *, rtx
, int);
850 static void put_condition_code (enum rtx_code
, enum machine_mode
,
852 static const char *get_some_local_dynamic_name (void);
853 static int get_some_local_dynamic_name_1 (rtx
*, void *);
854 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
855 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
857 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
858 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
860 static rtx
get_thread_pointer (int);
861 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
862 static void get_pc_thunk_name (char [32], unsigned int);
863 static rtx
gen_push (rtx
);
864 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
865 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
866 static struct machine_function
* ix86_init_machine_status (void);
867 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
868 static int ix86_nsaved_regs (void);
869 static void ix86_emit_save_regs (void);
870 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
871 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
872 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
873 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
874 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
875 static rtx
ix86_expand_aligntest (rtx
, int);
876 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
877 static int ix86_issue_rate (void);
878 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
879 static int ia32_multipass_dfa_lookahead (void);
880 static void ix86_init_mmx_sse_builtins (void);
881 static rtx
x86_this_parameter (tree
);
882 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
883 HOST_WIDE_INT
, tree
);
884 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
885 static void x86_file_start (void);
886 static void ix86_reorg (void);
887 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
888 static tree
ix86_build_builtin_va_list (void);
889 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
891 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
892 static bool ix86_vector_mode_supported_p (enum machine_mode
);
894 static int ix86_address_cost (rtx
);
895 static bool ix86_cannot_force_const_mem (rtx
);
896 static rtx
ix86_delegitimize_address (rtx
);
898 struct builtin_description
;
899 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
901 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
903 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
904 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
905 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
906 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
907 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
908 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
912 static int ix86_fp_comparison_cost (enum rtx_code code
);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame
*);
916 static int ix86_comp_type_attributes (tree
, tree
);
917 static int ix86_function_regparm (tree
, tree
);
918 const struct attribute_spec ix86_attribute_table
[];
919 static bool ix86_function_ok_for_sibcall (tree
, tree
);
920 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
921 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
922 static int ix86_value_regno (enum machine_mode
, tree
);
923 static bool contains_128bit_aligned_vector_p (tree
);
924 static rtx
ix86_struct_value_rtx (tree
, int);
925 static bool ix86_ms_bitfield_layout_p (tree
);
926 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
927 static int extended_reg_mentioned_1 (rtx
*, void *);
928 static bool ix86_rtx_costs (rtx
, int, int, int *);
929 static int min_insn_size (rtx
);
930 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
931 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
932 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
934 static void ix86_init_builtins (void);
935 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
937 /* This function is only used on Solaris. */
938 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
941 /* Register class used for passing given 64bit part of the argument.
942 These represent classes as documented by the PS ABI, with the exception
943 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
944 use SF or DFmode move instead of DImode to avoid reformatting penalties.
946 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
947 whenever possible (upper half does contain padding).
949 enum x86_64_reg_class
952 X86_64_INTEGER_CLASS
,
953 X86_64_INTEGERSI_CLASS
,
960 X86_64_COMPLEX_X87_CLASS
,
963 static const char * const x86_64_reg_class_name
[] = {
964 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
965 "sseup", "x87", "x87up", "cplx87", "no"
968 #define MAX_CLASSES 4
970 /* Table of constants used by fldpi, fldln2, etc.... */
971 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
972 static bool ext_80387_constants_init
= 0;
973 static void init_ext_80387_constants (void);
975 /* Initialize the GCC target structure. */
976 #undef TARGET_ATTRIBUTE_TABLE
977 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
978 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
979 # undef TARGET_MERGE_DECL_ATTRIBUTES
980 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
983 #undef TARGET_COMP_TYPE_ATTRIBUTES
984 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
986 #undef TARGET_INIT_BUILTINS
987 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1020 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1021 ia32_multipass_dfa_lookahead
1023 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1024 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1027 #undef TARGET_HAVE_TLS
1028 #define TARGET_HAVE_TLS true
1030 #undef TARGET_CANNOT_FORCE_CONST_MEM
1031 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1033 #undef TARGET_DELEGITIMIZE_ADDRESS
1034 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1036 #undef TARGET_MS_BITFIELD_LAYOUT_P
1037 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1039 #undef TARGET_ASM_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1044 #undef TARGET_ASM_FILE_START
1045 #define TARGET_ASM_FILE_START x86_file_start
1047 #undef TARGET_DEFAULT_TARGET_FLAGS
1048 #define TARGET_DEFAULT_TARGET_FLAGS \
1050 | TARGET_64BIT_DEFAULT \
1051 | TARGET_SUBTARGET_DEFAULT \
1052 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1054 #undef TARGET_HANDLE_OPTION
1055 #define TARGET_HANDLE_OPTION ix86_handle_option
1057 #undef TARGET_RTX_COSTS
1058 #define TARGET_RTX_COSTS ix86_rtx_costs
1059 #undef TARGET_ADDRESS_COST
1060 #define TARGET_ADDRESS_COST ix86_address_cost
1062 #undef TARGET_FIXED_CONDITION_CODE_REGS
1063 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1064 #undef TARGET_CC_MODES_COMPATIBLE
1065 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1067 #undef TARGET_MACHINE_DEPENDENT_REORG
1068 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1070 #undef TARGET_BUILD_BUILTIN_VA_LIST
1071 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1073 #undef TARGET_MD_ASM_CLOBBERS
1074 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1076 #undef TARGET_PROMOTE_PROTOTYPES
1077 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1078 #undef TARGET_STRUCT_VALUE_RTX
1079 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1080 #undef TARGET_SETUP_INCOMING_VARARGS
1081 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1082 #undef TARGET_MUST_PASS_IN_STACK
1083 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1084 #undef TARGET_PASS_BY_REFERENCE
1085 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1087 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1088 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1090 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1091 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1093 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1094 #undef TARGET_INSERT_ATTRIBUTES
1095 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1098 struct gcc_target targetm
= TARGET_INITIALIZER
;
1101 /* The svr4 ABI for the i386 says that records and unions are returned
1103 #ifndef DEFAULT_PCC_STRUCT_RETURN
1104 #define DEFAULT_PCC_STRUCT_RETURN 1
1107 /* Implement TARGET_HANDLE_OPTION. */
1110 ix86_handle_option (size_t code
, const char *arg
, int value
)
1117 target_flags
&= ~MASK_3DNOW_A
;
1118 target_flags_explicit
|= MASK_3DNOW_A
;
1122 case OPT_malign_functions_
:
1123 ix86_align_funcs_string
= arg
;
1126 case OPT_malign_jumps_
:
1127 ix86_align_jumps_string
= arg
;
1130 case OPT_malign_loops_
:
1131 ix86_align_loops_string
= arg
;
1135 ix86_arch_string
= arg
;
1139 ix86_asm_string
= arg
;
1142 case OPT_mbranch_cost_
:
1143 ix86_branch_cost_string
= arg
;
1147 ix86_cmodel_string
= arg
;
1151 ix86_fpmath_string
= arg
;
1157 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1158 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1162 case OPT_mpreferred_stack_boundary_
:
1163 ix86_preferred_stack_boundary_string
= arg
;
1167 ix86_regparm_string
= arg
;
1173 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1174 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1181 target_flags
&= ~MASK_SSE3
;
1182 target_flags_explicit
|= MASK_SSE3
;
1186 case OPT_mtls_dialect_
:
1187 ix86_tls_dialect_string
= arg
;
1191 ix86_tune_string
= arg
;
1199 /* Sometimes certain combinations of command options do not make
1200 sense on a particular target machine. You can define a macro
1201 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1202 defined, is executed once just after all the command options have
1205 Don't use this macro to turn on various extra optimizations for
1206 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1209 override_options (void)
1212 int ix86_tune_defaulted
= 0;
1214 /* Comes from final.c -- no real reason to change it. */
1215 #define MAX_CODE_ALIGN 16
1219 const struct processor_costs
*cost
; /* Processor costs */
1220 const int target_enable
; /* Target flags to enable. */
1221 const int target_disable
; /* Target flags to disable. */
1222 const int align_loop
; /* Default alignments. */
1223 const int align_loop_max_skip
;
1224 const int align_jump
;
1225 const int align_jump_max_skip
;
1226 const int align_func
;
1228 const processor_target_table
[PROCESSOR_max
] =
1230 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1231 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1232 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1233 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1234 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1235 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1236 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1237 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1238 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1241 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1244 const char *const name
; /* processor name or nickname. */
1245 const enum processor_type processor
;
1246 const enum pta_flags
1252 PTA_PREFETCH_SSE
= 16,
1258 const processor_alias_table
[] =
1260 {"i386", PROCESSOR_I386
, 0},
1261 {"i486", PROCESSOR_I486
, 0},
1262 {"i586", PROCESSOR_PENTIUM
, 0},
1263 {"pentium", PROCESSOR_PENTIUM
, 0},
1264 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1265 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1266 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1267 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1268 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1269 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1270 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1271 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1272 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1273 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1274 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1275 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1276 | PTA_MMX
| PTA_PREFETCH_SSE
},
1277 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1278 | PTA_MMX
| PTA_PREFETCH_SSE
},
1279 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1280 | PTA_MMX
| PTA_PREFETCH_SSE
},
1281 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1282 | PTA_MMX
| PTA_PREFETCH_SSE
},
1283 {"k6", PROCESSOR_K6
, PTA_MMX
},
1284 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1285 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1286 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1288 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1289 | PTA_3DNOW
| PTA_3DNOW_A
},
1290 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1291 | PTA_3DNOW_A
| PTA_SSE
},
1292 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1293 | PTA_3DNOW_A
| PTA_SSE
},
1294 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1295 | PTA_3DNOW_A
| PTA_SSE
},
1296 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1297 | PTA_SSE
| PTA_SSE2
},
1298 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1299 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1300 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1301 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1302 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1303 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1304 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1305 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1308 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1310 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1311 SUBTARGET_OVERRIDE_OPTIONS
;
1314 /* Set the default values for switches whose default depends on TARGET_64BIT
1315 in case they weren't overwritten by command line options. */
1318 if (flag_omit_frame_pointer
== 2)
1319 flag_omit_frame_pointer
= 1;
1320 if (flag_asynchronous_unwind_tables
== 2)
1321 flag_asynchronous_unwind_tables
= 1;
1322 if (flag_pcc_struct_return
== 2)
1323 flag_pcc_struct_return
= 0;
1327 if (flag_omit_frame_pointer
== 2)
1328 flag_omit_frame_pointer
= 0;
1329 if (flag_asynchronous_unwind_tables
== 2)
1330 flag_asynchronous_unwind_tables
= 0;
1331 if (flag_pcc_struct_return
== 2)
1332 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1335 if (!ix86_tune_string
&& ix86_arch_string
)
1336 ix86_tune_string
= ix86_arch_string
;
1337 if (!ix86_tune_string
)
1339 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1340 ix86_tune_defaulted
= 1;
1342 if (!ix86_arch_string
)
1343 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1345 if (ix86_cmodel_string
!= 0)
1347 if (!strcmp (ix86_cmodel_string
, "small"))
1348 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1350 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1351 else if (!strcmp (ix86_cmodel_string
, "32"))
1352 ix86_cmodel
= CM_32
;
1353 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1354 ix86_cmodel
= CM_KERNEL
;
1355 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1356 ix86_cmodel
= CM_MEDIUM
;
1357 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1358 ix86_cmodel
= CM_LARGE
;
1360 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1364 ix86_cmodel
= CM_32
;
1366 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1368 if (ix86_asm_string
!= 0)
1370 if (!strcmp (ix86_asm_string
, "intel"))
1371 ix86_asm_dialect
= ASM_INTEL
;
1372 else if (!strcmp (ix86_asm_string
, "att"))
1373 ix86_asm_dialect
= ASM_ATT
;
1375 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1377 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1378 error ("code model %qs not supported in the %s bit mode",
1379 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1380 if (ix86_cmodel
== CM_LARGE
)
1381 sorry ("code model %<large%> not supported yet");
1382 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1383 sorry ("%i-bit mode not compiled in",
1384 (target_flags
& MASK_64BIT
) ? 64 : 32);
1386 for (i
= 0; i
< pta_size
; i
++)
1387 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1389 ix86_arch
= processor_alias_table
[i
].processor
;
1390 /* Default cpu tuning to the architecture. */
1391 ix86_tune
= ix86_arch
;
1392 if (processor_alias_table
[i
].flags
& PTA_MMX
1393 && !(target_flags_explicit
& MASK_MMX
))
1394 target_flags
|= MASK_MMX
;
1395 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1396 && !(target_flags_explicit
& MASK_3DNOW
))
1397 target_flags
|= MASK_3DNOW
;
1398 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1399 && !(target_flags_explicit
& MASK_3DNOW_A
))
1400 target_flags
|= MASK_3DNOW_A
;
1401 if (processor_alias_table
[i
].flags
& PTA_SSE
1402 && !(target_flags_explicit
& MASK_SSE
))
1403 target_flags
|= MASK_SSE
;
1404 if (processor_alias_table
[i
].flags
& PTA_SSE2
1405 && !(target_flags_explicit
& MASK_SSE2
))
1406 target_flags
|= MASK_SSE2
;
1407 if (processor_alias_table
[i
].flags
& PTA_SSE3
1408 && !(target_flags_explicit
& MASK_SSE3
))
1409 target_flags
|= MASK_SSE3
;
1410 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1411 x86_prefetch_sse
= true;
1412 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1413 error ("CPU you selected does not support x86-64 "
1419 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1421 for (i
= 0; i
< pta_size
; i
++)
1422 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1424 ix86_tune
= processor_alias_table
[i
].processor
;
1425 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1427 if (ix86_tune_defaulted
)
1429 ix86_tune_string
= "x86-64";
1430 for (i
= 0; i
< pta_size
; i
++)
1431 if (! strcmp (ix86_tune_string
,
1432 processor_alias_table
[i
].name
))
1434 ix86_tune
= processor_alias_table
[i
].processor
;
1437 error ("CPU you selected does not support x86-64 "
1440 /* Intel CPUs have always interpreted SSE prefetch instructions as
1441 NOPs; so, we can enable SSE prefetch instructions even when
1442 -mtune (rather than -march) points us to a processor that has them.
1443 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1444 higher processors. */
1445 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1446 x86_prefetch_sse
= true;
1450 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1453 ix86_cost
= &size_cost
;
1455 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1456 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1457 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1459 /* Arrange to set up i386_stack_locals for all functions. */
1460 init_machine_status
= ix86_init_machine_status
;
1462 /* Validate -mregparm= value. */
1463 if (ix86_regparm_string
)
1465 i
= atoi (ix86_regparm_string
);
1466 if (i
< 0 || i
> REGPARM_MAX
)
1467 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1473 ix86_regparm
= REGPARM_MAX
;
1475 /* If the user has provided any of the -malign-* options,
1476 warn and use that value only if -falign-* is not set.
1477 Remove this code in GCC 3.2 or later. */
1478 if (ix86_align_loops_string
)
1480 warning (0, "-malign-loops is obsolete, use -falign-loops");
1481 if (align_loops
== 0)
1483 i
= atoi (ix86_align_loops_string
);
1484 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1485 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1487 align_loops
= 1 << i
;
1491 if (ix86_align_jumps_string
)
1493 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1494 if (align_jumps
== 0)
1496 i
= atoi (ix86_align_jumps_string
);
1497 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1498 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1500 align_jumps
= 1 << i
;
1504 if (ix86_align_funcs_string
)
1506 warning (0, "-malign-functions is obsolete, use -falign-functions");
1507 if (align_functions
== 0)
1509 i
= atoi (ix86_align_funcs_string
);
1510 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1511 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1513 align_functions
= 1 << i
;
1517 /* Default align_* from the processor table. */
1518 if (align_loops
== 0)
1520 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1521 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1523 if (align_jumps
== 0)
1525 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1526 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1528 if (align_functions
== 0)
1530 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1533 /* Validate -mpreferred-stack-boundary= value, or provide default.
1534 The default of 128 bits is for Pentium III's SSE __m128, but we
1535 don't want additional code to keep the stack aligned when
1536 optimizing for code size. */
1537 ix86_preferred_stack_boundary
= (optimize_size
1538 ? TARGET_64BIT
? 128 : 32
1540 if (ix86_preferred_stack_boundary_string
)
1542 i
= atoi (ix86_preferred_stack_boundary_string
);
1543 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1544 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1545 TARGET_64BIT
? 4 : 2);
1547 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1550 /* Validate -mbranch-cost= value, or provide default. */
1551 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1552 if (ix86_branch_cost_string
)
1554 i
= atoi (ix86_branch_cost_string
);
1556 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1558 ix86_branch_cost
= i
;
1561 if (ix86_tls_dialect_string
)
1563 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1564 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1565 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1566 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1568 error ("bad value (%s) for -mtls-dialect= switch",
1569 ix86_tls_dialect_string
);
1572 /* Keep nonleaf frame pointers. */
1573 if (flag_omit_frame_pointer
)
1574 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1575 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1576 flag_omit_frame_pointer
= 1;
1578 /* If we're doing fast math, we don't care about comparison order
1579 wrt NaNs. This lets us use a shorter comparison sequence. */
1580 if (flag_unsafe_math_optimizations
)
1581 target_flags
&= ~MASK_IEEE_FP
;
1583 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1584 since the insns won't need emulation. */
1585 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1586 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1588 /* Likewise, if the target doesn't have a 387, or we've specified
1589 software floating point, don't use 387 inline instrinsics. */
1591 target_flags
|= MASK_NO_FANCY_MATH_387
;
1593 /* Turn on SSE2 builtins for -msse3. */
1595 target_flags
|= MASK_SSE2
;
1597 /* Turn on SSE builtins for -msse2. */
1599 target_flags
|= MASK_SSE
;
1601 /* Turn on MMX builtins for -msse. */
1604 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1605 x86_prefetch_sse
= true;
1608 /* Turn on MMX builtins for 3Dnow. */
1610 target_flags
|= MASK_MMX
;
1614 if (TARGET_ALIGN_DOUBLE
)
1615 error ("-malign-double makes no sense in the 64bit mode");
1617 error ("-mrtd calling convention not supported in the 64bit mode");
1619 /* Enable by default the SSE and MMX builtins. Do allow the user to
1620 explicitly disable any of these. In particular, disabling SSE and
1621 MMX for kernel code is extremely useful. */
1623 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1624 & ~target_flags_explicit
);
1628 /* i386 ABI does not specify red zone. It still makes sense to use it
1629 when programmer takes care to stack from being destroyed. */
1630 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1631 target_flags
|= MASK_NO_RED_ZONE
;
1634 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
1636 if (ix86_fpmath_string
!= 0)
1638 if (! strcmp (ix86_fpmath_string
, "387"))
1639 ix86_fpmath
= FPMATH_387
;
1640 else if (! strcmp (ix86_fpmath_string
, "sse"))
1644 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1645 ix86_fpmath
= FPMATH_387
;
1648 ix86_fpmath
= FPMATH_SSE
;
1650 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1651 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1655 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1656 ix86_fpmath
= FPMATH_387
;
1658 else if (!TARGET_80387
)
1660 warning (0, "387 instruction set disabled, using SSE arithmetics");
1661 ix86_fpmath
= FPMATH_SSE
;
1664 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1667 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1670 /* If the i387 is disabled, then do not return values in it. */
1672 target_flags
&= ~MASK_FLOAT_RETURNS
;
1674 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1675 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1677 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1679 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1682 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1683 p
= strchr (internal_label_prefix
, 'X');
1684 internal_label_prefix_len
= p
- internal_label_prefix
;
1688 /* When scheduling description is not available, disable scheduler pass
1689 so it won't slow down the compilation and make x87 code slower. */
1690 if (!TARGET_SCHEDULE
)
1691 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
1695 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1697 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1698 make the problem with not enough registers even worse. */
1699 #ifdef INSN_SCHEDULING
1701 flag_schedule_insns
= 0;
1704 /* The default values of these switches depend on the TARGET_64BIT
1705 that is not known at this moment. Mark these values with 2 and
1706 let user the to override these. In case there is no command line option
1707 specifying them, we will set the defaults in override_options. */
1709 flag_omit_frame_pointer
= 2;
1710 flag_pcc_struct_return
= 2;
1711 flag_asynchronous_unwind_tables
= 2;
1712 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1713 SUBTARGET_OPTIMIZATION_OPTIONS
;
1717 /* Table of valid machine attributes. */
1718 const struct attribute_spec ix86_attribute_table
[] =
1720 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1721 /* Stdcall attribute says callee is responsible for popping arguments
1722 if they are not variable. */
1723 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1724 /* Fastcall attribute says callee is responsible for popping arguments
1725 if they are not variable. */
1726 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1727 /* Cdecl attribute says the callee is a normal C declaration */
1728 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1729 /* Regparm attribute specifies how many integer arguments are to be
1730 passed in registers. */
1731 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1732 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1733 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
1734 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
1735 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1737 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1738 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1739 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1740 SUBTARGET_ATTRIBUTE_TABLE
,
1742 { NULL
, 0, 0, false, false, false, NULL
}
1745 /* Decide whether we can make a sibling call to a function. DECL is the
1746 declaration of the function being targeted by the call and EXP is the
1747 CALL_EXPR representing the call. */
1750 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1754 /* If we are generating position-independent code, we cannot sibcall
1755 optimize any indirect call, or a direct call to a global function,
1756 as the PLT requires %ebx be live. */
1757 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1765 /* If we are returning floats on the 80387 register stack, we cannot
1766 make a sibcall from a function that doesn't return a float to a
1767 function that does or, conversely, from a function that does return
1768 a float to a function that doesn't; the necessary stack adjustment
1769 would not be executed. */
1770 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
), func
))
1771 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
1775 /* If this call is indirect, we'll need to be able to use a call-clobbered
1776 register for the address of the target function. Make sure that all
1777 such registers are not used for passing parameters. */
1778 if (!decl
&& !TARGET_64BIT
)
1782 /* We're looking at the CALL_EXPR, we need the type of the function. */
1783 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1784 type
= TREE_TYPE (type
); /* pointer type */
1785 type
= TREE_TYPE (type
); /* function type */
1787 if (ix86_function_regparm (type
, NULL
) >= 3)
1789 /* ??? Need to count the actual number of registers to be used,
1790 not the possible number of registers. Fix later. */
1795 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1796 /* Dllimport'd functions are also called indirectly. */
1797 if (decl
&& lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl
))
1798 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
1802 /* Otherwise okay. That also includes certain types of indirect calls. */
1806 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1807 arguments as in struct attribute_spec.handler. */
1809 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1810 tree args ATTRIBUTE_UNUSED
,
1811 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1813 if (TREE_CODE (*node
) != FUNCTION_TYPE
1814 && TREE_CODE (*node
) != METHOD_TYPE
1815 && TREE_CODE (*node
) != FIELD_DECL
1816 && TREE_CODE (*node
) != TYPE_DECL
)
1818 warning (0, "%qs attribute only applies to functions",
1819 IDENTIFIER_POINTER (name
));
1820 *no_add_attrs
= true;
1824 if (is_attribute_p ("fastcall", name
))
1826 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1828 error ("fastcall and stdcall attributes are not compatible");
1830 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1832 error ("fastcall and regparm attributes are not compatible");
1835 else if (is_attribute_p ("stdcall", name
))
1837 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1839 error ("fastcall and stdcall attributes are not compatible");
1846 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name
));
1847 *no_add_attrs
= true;
1853 /* Handle a "regparm" attribute;
1854 arguments as in struct attribute_spec.handler. */
1856 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1857 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1859 if (TREE_CODE (*node
) != FUNCTION_TYPE
1860 && TREE_CODE (*node
) != METHOD_TYPE
1861 && TREE_CODE (*node
) != FIELD_DECL
1862 && TREE_CODE (*node
) != TYPE_DECL
)
1864 warning (0, "%qs attribute only applies to functions",
1865 IDENTIFIER_POINTER (name
));
1866 *no_add_attrs
= true;
1872 cst
= TREE_VALUE (args
);
1873 if (TREE_CODE (cst
) != INTEGER_CST
)
1875 warning (0, "%qs attribute requires an integer constant argument",
1876 IDENTIFIER_POINTER (name
));
1877 *no_add_attrs
= true;
1879 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1881 warning (0, "argument to %qs attribute larger than %d",
1882 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1883 *no_add_attrs
= true;
1886 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1888 error ("fastcall and regparm attributes are not compatible");
1895 /* Return 0 if the attributes for two types are incompatible, 1 if they
1896 are compatible, and 2 if they are nearly compatible (which causes a
1897 warning to be generated). */
1900 ix86_comp_type_attributes (tree type1
, tree type2
)
1902 /* Check for mismatch of non-default calling convention. */
1903 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1905 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1908 /* Check for mismatched fastcall types */
1909 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1910 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1913 /* Check for mismatched return types (cdecl vs stdcall). */
1914 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1915 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1917 if (ix86_function_regparm (type1
, NULL
)
1918 != ix86_function_regparm (type2
, NULL
))
1923 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1924 DECL may be NULL when calling function indirectly
1925 or considering a libcall. */
1928 ix86_function_regparm (tree type
, tree decl
)
1931 int regparm
= ix86_regparm
;
1932 bool user_convention
= false;
1936 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1939 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1940 user_convention
= true;
1943 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1946 user_convention
= true;
1949 /* Use register calling convention for local functions when possible. */
1950 if (!TARGET_64BIT
&& !user_convention
&& decl
1951 && flag_unit_at_a_time
&& !profile_flag
)
1953 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1956 /* We can't use regparm(3) for nested functions as these use
1957 static chain pointer in third argument. */
1958 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1968 /* Return true if EAX is live at the start of the function. Used by
1969 ix86_expand_prologue to determine if we need special help before
1970 calling allocate_stack_worker. */
1973 ix86_eax_live_at_start_p (void)
1975 /* Cheat. Don't bother working forward from ix86_function_regparm
1976 to the function type to whether an actual argument is located in
1977 eax. Instead just look at cfg info, which is still close enough
1978 to correct at this point. This gives false positives for broken
1979 functions that might use uninitialized data that happens to be
1980 allocated in eax, but who cares? */
1981 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1984 /* Value is the number of bytes of arguments automatically
1985 popped when returning from a subroutine call.
1986 FUNDECL is the declaration node of the function (as a tree),
1987 FUNTYPE is the data type of the function (as a tree),
1988 or for a library call it is an identifier node for the subroutine name.
1989 SIZE is the number of bytes of arguments passed on the stack.
1991 On the 80386, the RTD insn may be used to pop them if the number
1992 of args is fixed, but if the number is variable then the caller
1993 must pop them all. RTD can't be used for library calls now
1994 because the library is compiled with the Unix compiler.
1995 Use of RTD is a selectable option, since it is incompatible with
1996 standard Unix calling sequences. If the option is not selected,
1997 the caller must always pop the args.
1999 The attribute stdcall is equivalent to RTD on a per module basis. */
2002 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2004 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2006 /* Cdecl functions override -mrtd, and never pop the stack. */
2007 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2009 /* Stdcall and fastcall functions will pop the stack if not
2011 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2012 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2016 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2017 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2018 == void_type_node
)))
2022 /* Lose any fake structure return argument if it is passed on the stack. */
2023 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2025 && !KEEP_AGGREGATE_RETURN_POINTER
)
2027 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2030 return GET_MODE_SIZE (Pmode
);
2036 /* Argument support functions. */
2038 /* Return true when register may be used to pass function parameters. */
2040 ix86_function_arg_regno_p (int regno
)
2044 return (regno
< REGPARM_MAX
2045 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
2046 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
2048 /* RAX is used as hidden argument to va_arg functions. */
2051 for (i
= 0; i
< REGPARM_MAX
; i
++)
2052 if (regno
== x86_64_int_parameter_registers
[i
])
2057 /* Return if we do not know how to pass TYPE solely in registers. */
2060 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2062 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2065 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2066 The layout_type routine is crafty and tries to trick us into passing
2067 currently unsupported vector types on the stack by using TImode. */
2068 return (!TARGET_64BIT
&& mode
== TImode
2069 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2072 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2073 for a call to a function whose data type is FNTYPE.
2074 For a library call, FNTYPE is 0. */
2077 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2078 tree fntype
, /* tree ptr for function decl */
2079 rtx libname
, /* SYMBOL_REF of library name or 0 */
2082 static CUMULATIVE_ARGS zero_cum
;
2083 tree param
, next_param
;
2085 if (TARGET_DEBUG_ARG
)
2087 fprintf (stderr
, "\ninit_cumulative_args (");
2089 fprintf (stderr
, "fntype code = %s, ret code = %s",
2090 tree_code_name
[(int) TREE_CODE (fntype
)],
2091 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2093 fprintf (stderr
, "no fntype");
2096 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2101 /* Set up the number of registers to use for passing arguments. */
2103 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2105 cum
->nregs
= ix86_regparm
;
2107 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2109 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2110 cum
->warn_sse
= true;
2111 cum
->warn_mmx
= true;
2112 cum
->maybe_vaarg
= false;
2114 /* Use ecx and edx registers if function has fastcall attribute */
2115 if (fntype
&& !TARGET_64BIT
)
2117 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2124 /* Determine if this function has variable arguments. This is
2125 indicated by the last argument being 'void_type_mode' if there
2126 are no variable arguments. If there are variable arguments, then
2127 we won't pass anything in registers in 32-bit mode. */
2129 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2131 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2132 param
!= 0; param
= next_param
)
2134 next_param
= TREE_CHAIN (param
);
2135 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2146 cum
->maybe_vaarg
= true;
2150 if ((!fntype
&& !libname
)
2151 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2152 cum
->maybe_vaarg
= true;
2154 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2155 in SSE registers even for 32-bit mode and not just 3, but up to
2156 8 SSE arguments in registers. */
2157 if (!TARGET_64BIT
&& !cum
->maybe_vaarg
&& !cum
->fastcall
2158 && cum
->sse_nregs
== SSE_REGPARM_MAX
&& fndecl
2159 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2161 struct cgraph_local_info
*i
= cgraph_local_info (fndecl
);
2165 cum
->float_in_sse
= true;
2169 if (TARGET_DEBUG_ARG
)
2170 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2175 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2176 But in the case of vector types, it is some vector mode.
2178 When we have only some of our vector isa extensions enabled, then there
2179 are some modes for which vector_mode_supported_p is false. For these
2180 modes, the generic vector support in gcc will choose some non-vector mode
2181 in order to implement the type. By computing the natural mode, we'll
2182 select the proper ABI location for the operand and not depend on whatever
2183 the middle-end decides to do with these vector types. */
2185 static enum machine_mode
2186 type_natural_mode (tree type
)
2188 enum machine_mode mode
= TYPE_MODE (type
);
2190 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2192 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2193 if ((size
== 8 || size
== 16)
2194 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2195 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2197 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2199 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2200 mode
= MIN_MODE_VECTOR_FLOAT
;
2202 mode
= MIN_MODE_VECTOR_INT
;
2204 /* Get the mode which has this inner mode and number of units. */
2205 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2206 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2207 && GET_MODE_INNER (mode
) == innermode
)
2217 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2218 this may not agree with the mode that the type system has chosen for the
2219 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2220 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2223 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2228 if (orig_mode
!= BLKmode
)
2229 tmp
= gen_rtx_REG (orig_mode
, regno
);
2232 tmp
= gen_rtx_REG (mode
, regno
);
2233 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2234 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2240 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2241 of this code is to classify each 8bytes of incoming argument by the register
2242 class and assign registers accordingly. */
2244 /* Return the union class of CLASS1 and CLASS2.
2245 See the x86-64 PS ABI for details. */
2247 static enum x86_64_reg_class
2248 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2250 /* Rule #1: If both classes are equal, this is the resulting class. */
2251 if (class1
== class2
)
2254 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2256 if (class1
== X86_64_NO_CLASS
)
2258 if (class2
== X86_64_NO_CLASS
)
2261 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2262 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2263 return X86_64_MEMORY_CLASS
;
2265 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2266 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2267 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2268 return X86_64_INTEGERSI_CLASS
;
2269 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2270 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2271 return X86_64_INTEGER_CLASS
;
2273 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2275 if (class1
== X86_64_X87_CLASS
2276 || class1
== X86_64_X87UP_CLASS
2277 || class1
== X86_64_COMPLEX_X87_CLASS
2278 || class2
== X86_64_X87_CLASS
2279 || class2
== X86_64_X87UP_CLASS
2280 || class2
== X86_64_COMPLEX_X87_CLASS
)
2281 return X86_64_MEMORY_CLASS
;
2283 /* Rule #6: Otherwise class SSE is used. */
2284 return X86_64_SSE_CLASS
;
2287 /* Classify the argument of type TYPE and mode MODE.
2288 CLASSES will be filled by the register class used to pass each word
2289 of the operand. The number of words is returned. In case the parameter
2290 should be passed in memory, 0 is returned. As a special case for zero
2291 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2293 BIT_OFFSET is used internally for handling records and specifies offset
2294 of the offset in bits modulo 256 to avoid overflow cases.
2296 See the x86-64 PS ABI for details.
2300 classify_argument (enum machine_mode mode
, tree type
,
2301 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2303 HOST_WIDE_INT bytes
=
2304 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2305 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2307 /* Variable sized entities are always passed/returned in memory. */
2311 if (mode
!= VOIDmode
2312 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2315 if (type
&& AGGREGATE_TYPE_P (type
))
2319 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2321 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2325 for (i
= 0; i
< words
; i
++)
2326 classes
[i
] = X86_64_NO_CLASS
;
2328 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2329 signalize memory class, so handle it as special case. */
2332 classes
[0] = X86_64_NO_CLASS
;
2336 /* Classify each field of record and merge classes. */
2337 switch (TREE_CODE (type
))
2340 /* For classes first merge in the field of the subclasses. */
2341 if (TYPE_BINFO (type
))
2343 tree binfo
, base_binfo
;
2346 for (binfo
= TYPE_BINFO (type
), basenum
= 0;
2347 BINFO_BASE_ITERATE (binfo
, basenum
, base_binfo
); basenum
++)
2350 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2351 tree type
= BINFO_TYPE (base_binfo
);
2353 num
= classify_argument (TYPE_MODE (type
),
2355 (offset
+ bit_offset
) % 256);
2358 for (i
= 0; i
< num
; i
++)
2360 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2362 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2366 /* And now merge the fields of structure. */
2367 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2369 if (TREE_CODE (field
) == FIELD_DECL
)
2373 /* Bitfields are always classified as integer. Handle them
2374 early, since later code would consider them to be
2375 misaligned integers. */
2376 if (DECL_BIT_FIELD (field
))
2378 for (i
= int_bit_position (field
) / 8 / 8;
2379 i
< (int_bit_position (field
)
2380 + tree_low_cst (DECL_SIZE (field
), 0)
2383 merge_classes (X86_64_INTEGER_CLASS
,
2388 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2389 TREE_TYPE (field
), subclasses
,
2390 (int_bit_position (field
)
2391 + bit_offset
) % 256);
2394 for (i
= 0; i
< num
; i
++)
2397 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2399 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2407 /* Arrays are handled as small records. */
2410 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2411 TREE_TYPE (type
), subclasses
, bit_offset
);
2415 /* The partial classes are now full classes. */
2416 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2417 subclasses
[0] = X86_64_SSE_CLASS
;
2418 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2419 subclasses
[0] = X86_64_INTEGER_CLASS
;
2421 for (i
= 0; i
< words
; i
++)
2422 classes
[i
] = subclasses
[i
% num
];
2427 case QUAL_UNION_TYPE
:
2428 /* Unions are similar to RECORD_TYPE but offset is always 0.
2431 /* Unions are not derived. */
2432 gcc_assert (!TYPE_BINFO (type
)
2433 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type
)));
2434 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2436 if (TREE_CODE (field
) == FIELD_DECL
)
2439 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2440 TREE_TYPE (field
), subclasses
,
2444 for (i
= 0; i
< num
; i
++)
2445 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2454 /* Final merger cleanup. */
2455 for (i
= 0; i
< words
; i
++)
2457 /* If one class is MEMORY, everything should be passed in
2459 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2462 /* The X86_64_SSEUP_CLASS should be always preceded by
2463 X86_64_SSE_CLASS. */
2464 if (classes
[i
] == X86_64_SSEUP_CLASS
2465 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2466 classes
[i
] = X86_64_SSE_CLASS
;
2468 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2469 if (classes
[i
] == X86_64_X87UP_CLASS
2470 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2471 classes
[i
] = X86_64_SSE_CLASS
;
2476 /* Compute alignment needed. We align all types to natural boundaries with
2477 exception of XFmode that is aligned to 64bits. */
2478 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2480 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2483 mode_alignment
= 128;
2484 else if (mode
== XCmode
)
2485 mode_alignment
= 256;
2486 if (COMPLEX_MODE_P (mode
))
2487 mode_alignment
/= 2;
2488 /* Misaligned fields are always returned in memory. */
2489 if (bit_offset
% mode_alignment
)
2493 /* for V1xx modes, just use the base mode */
2494 if (VECTOR_MODE_P (mode
)
2495 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2496 mode
= GET_MODE_INNER (mode
);
2498 /* Classification of atomic types. */
2508 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2509 classes
[0] = X86_64_INTEGERSI_CLASS
;
2511 classes
[0] = X86_64_INTEGER_CLASS
;
2515 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2520 if (!(bit_offset
% 64))
2521 classes
[0] = X86_64_SSESF_CLASS
;
2523 classes
[0] = X86_64_SSE_CLASS
;
2526 classes
[0] = X86_64_SSEDF_CLASS
;
2529 classes
[0] = X86_64_X87_CLASS
;
2530 classes
[1] = X86_64_X87UP_CLASS
;
2533 classes
[0] = X86_64_SSE_CLASS
;
2534 classes
[1] = X86_64_SSEUP_CLASS
;
2537 classes
[0] = X86_64_SSE_CLASS
;
2540 classes
[0] = X86_64_SSEDF_CLASS
;
2541 classes
[1] = X86_64_SSEDF_CLASS
;
2544 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2547 /* This modes is larger than 16 bytes. */
2555 classes
[0] = X86_64_SSE_CLASS
;
2556 classes
[1] = X86_64_SSEUP_CLASS
;
2562 classes
[0] = X86_64_SSE_CLASS
;
2568 gcc_assert (VECTOR_MODE_P (mode
));
2573 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
2575 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2576 classes
[0] = X86_64_INTEGERSI_CLASS
;
2578 classes
[0] = X86_64_INTEGER_CLASS
;
2579 classes
[1] = X86_64_INTEGER_CLASS
;
2580 return 1 + (bytes
> 8);
2584 /* Examine the argument and return set number of register required in each
2585 class. Return 0 iff parameter should be passed in memory. */
2587 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2588 int *int_nregs
, int *sse_nregs
)
2590 enum x86_64_reg_class
class[MAX_CLASSES
];
2591 int n
= classify_argument (mode
, type
, class, 0);
2597 for (n
--; n
>= 0; n
--)
2600 case X86_64_INTEGER_CLASS
:
2601 case X86_64_INTEGERSI_CLASS
:
2604 case X86_64_SSE_CLASS
:
2605 case X86_64_SSESF_CLASS
:
2606 case X86_64_SSEDF_CLASS
:
2609 case X86_64_NO_CLASS
:
2610 case X86_64_SSEUP_CLASS
:
2612 case X86_64_X87_CLASS
:
2613 case X86_64_X87UP_CLASS
:
2617 case X86_64_COMPLEX_X87_CLASS
:
2618 return in_return
? 2 : 0;
2619 case X86_64_MEMORY_CLASS
:
2625 /* Construct container for the argument used by GCC interface. See
2626 FUNCTION_ARG for the detailed description. */
2629 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
2630 tree type
, int in_return
, int nintregs
, int nsseregs
,
2631 const int *intreg
, int sse_regno
)
2633 enum machine_mode tmpmode
;
2635 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2636 enum x86_64_reg_class
class[MAX_CLASSES
];
2640 int needed_sseregs
, needed_intregs
;
2641 rtx exp
[MAX_CLASSES
];
2644 n
= classify_argument (mode
, type
, class, 0);
2645 if (TARGET_DEBUG_ARG
)
2648 fprintf (stderr
, "Memory class\n");
2651 fprintf (stderr
, "Classes:");
2652 for (i
= 0; i
< n
; i
++)
2654 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2656 fprintf (stderr
, "\n");
2661 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
2664 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2667 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2668 some less clueful developer tries to use floating-point anyway. */
2669 if (needed_sseregs
&& !TARGET_SSE
)
2671 static bool issued_error
;
2674 issued_error
= true;
2676 error ("SSE register return with SSE disabled");
2678 error ("SSE register argument with SSE disabled");
2683 /* First construct simple cases. Avoid SCmode, since we want to use
2684 single register to pass this type. */
2685 if (n
== 1 && mode
!= SCmode
)
2688 case X86_64_INTEGER_CLASS
:
2689 case X86_64_INTEGERSI_CLASS
:
2690 return gen_rtx_REG (mode
, intreg
[0]);
2691 case X86_64_SSE_CLASS
:
2692 case X86_64_SSESF_CLASS
:
2693 case X86_64_SSEDF_CLASS
:
2694 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
2695 case X86_64_X87_CLASS
:
2696 case X86_64_COMPLEX_X87_CLASS
:
2697 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2698 case X86_64_NO_CLASS
:
2699 /* Zero sized array, struct or class. */
2704 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2706 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2708 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2709 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2710 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2711 && class[1] == X86_64_INTEGER_CLASS
2712 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2713 && intreg
[0] + 1 == intreg
[1])
2714 return gen_rtx_REG (mode
, intreg
[0]);
2716 /* Otherwise figure out the entries of the PARALLEL. */
2717 for (i
= 0; i
< n
; i
++)
2721 case X86_64_NO_CLASS
:
2723 case X86_64_INTEGER_CLASS
:
2724 case X86_64_INTEGERSI_CLASS
:
2725 /* Merge TImodes on aligned occasions here too. */
2726 if (i
* 8 + 8 > bytes
)
2727 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2728 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2732 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2733 if (tmpmode
== BLKmode
)
2735 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2736 gen_rtx_REG (tmpmode
, *intreg
),
2740 case X86_64_SSESF_CLASS
:
2741 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2742 gen_rtx_REG (SFmode
,
2743 SSE_REGNO (sse_regno
)),
2747 case X86_64_SSEDF_CLASS
:
2748 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2749 gen_rtx_REG (DFmode
,
2750 SSE_REGNO (sse_regno
)),
2754 case X86_64_SSE_CLASS
:
2755 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2759 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2760 gen_rtx_REG (tmpmode
,
2761 SSE_REGNO (sse_regno
)),
2763 if (tmpmode
== TImode
)
2772 /* Empty aligned struct, union or class. */
2776 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2777 for (i
= 0; i
< nexps
; i
++)
2778 XVECEXP (ret
, 0, i
) = exp
[i
];
2782 /* Update the data in CUM to advance over an argument
2783 of mode MODE and data type TYPE.
2784 (TYPE is null for libcalls where that information may not be available.) */
2787 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2788 tree type
, int named
)
2791 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2792 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2795 mode
= type_natural_mode (type
);
2797 if (TARGET_DEBUG_ARG
)
2798 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2799 "mode=%s, named=%d)\n\n",
2800 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
2801 GET_MODE_NAME (mode
), named
);
2805 int int_nregs
, sse_nregs
;
2806 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2807 cum
->words
+= words
;
2808 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2810 cum
->nregs
-= int_nregs
;
2811 cum
->sse_nregs
-= sse_nregs
;
2812 cum
->regno
+= int_nregs
;
2813 cum
->sse_regno
+= sse_nregs
;
2816 cum
->words
+= words
;
2834 cum
->words
+= words
;
2835 cum
->nregs
-= words
;
2836 cum
->regno
+= words
;
2838 if (cum
->nregs
<= 0)
2849 if (!cum
->float_in_sse
)
2860 if (!type
|| !AGGREGATE_TYPE_P (type
))
2862 cum
->sse_words
+= words
;
2863 cum
->sse_nregs
-= 1;
2864 cum
->sse_regno
+= 1;
2865 if (cum
->sse_nregs
<= 0)
2877 if (!type
|| !AGGREGATE_TYPE_P (type
))
2879 cum
->mmx_words
+= words
;
2880 cum
->mmx_nregs
-= 1;
2881 cum
->mmx_regno
+= 1;
2882 if (cum
->mmx_nregs
<= 0)
2893 /* Define where to put the arguments to a function.
2894 Value is zero to push the argument on the stack,
2895 or a hard register in which to store the argument.
2897 MODE is the argument's machine mode.
2898 TYPE is the data type of the argument (as a tree).
2899 This is null for libcalls where that information may
2901 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2902 the preceding args and about the function being called.
2903 NAMED is nonzero if this argument is a named parameter
2904 (otherwise it is an extra parameter matching an ellipsis). */
2907 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
2908 tree type
, int named
)
2910 enum machine_mode mode
= orig_mode
;
2913 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2914 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2915 static bool warnedsse
, warnedmmx
;
2917 /* To simplify the code below, represent vector types with a vector mode
2918 even if MMX/SSE are not active. */
2919 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
2920 mode
= type_natural_mode (type
);
2922 /* Handle a hidden AL argument containing number of registers for varargs
2923 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2925 if (mode
== VOIDmode
)
2928 return GEN_INT (cum
->maybe_vaarg
2929 ? (cum
->sse_nregs
< 0
2937 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
2939 &x86_64_int_parameter_registers
[cum
->regno
],
2944 /* For now, pass fp/complex values on the stack. */
2956 if (words
<= cum
->nregs
)
2958 int regno
= cum
->regno
;
2960 /* Fastcall allocates the first two DWORD (SImode) or
2961 smaller arguments to ECX and EDX. */
2964 if (mode
== BLKmode
|| mode
== DImode
)
2967 /* ECX not EAX is the first allocated register. */
2971 ret
= gen_rtx_REG (mode
, regno
);
2978 if (!cum
->float_in_sse
)
2988 if (!type
|| !AGGREGATE_TYPE_P (type
))
2990 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
2993 warning (0, "SSE vector argument without SSE enabled "
2997 ret
= gen_reg_or_parallel (mode
, orig_mode
,
2998 cum
->sse_regno
+ FIRST_SSE_REG
);
3005 if (!type
|| !AGGREGATE_TYPE_P (type
))
3007 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3010 warning (0, "MMX vector argument without MMX enabled "
3014 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3015 cum
->mmx_regno
+ FIRST_MMX_REG
);
3020 if (TARGET_DEBUG_ARG
)
3023 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3024 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3027 print_simple_rtl (stderr
, ret
);
3029 fprintf (stderr
, ", stack");
3031 fprintf (stderr
, " )\n");
3037 /* A C expression that indicates when an argument must be passed by
3038 reference. If nonzero for an argument, a copy of that argument is
3039 made in memory and a pointer to the argument is passed instead of
3040 the argument itself. The pointer is passed in whatever way is
3041 appropriate for passing a pointer to that type. */
3044 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3045 enum machine_mode mode ATTRIBUTE_UNUSED
,
3046 tree type
, bool named ATTRIBUTE_UNUSED
)
3051 if (type
&& int_size_in_bytes (type
) == -1)
3053 if (TARGET_DEBUG_ARG
)
3054 fprintf (stderr
, "function_arg_pass_by_reference\n");
3061 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3062 ABI. Only called if TARGET_SSE. */
3064 contains_128bit_aligned_vector_p (tree type
)
3066 enum machine_mode mode
= TYPE_MODE (type
);
3067 if (SSE_REG_MODE_P (mode
)
3068 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3070 if (TYPE_ALIGN (type
) < 128)
3073 if (AGGREGATE_TYPE_P (type
))
3075 /* Walk the aggregates recursively. */
3076 switch (TREE_CODE (type
))
3080 case QUAL_UNION_TYPE
:
3084 if (TYPE_BINFO (type
))
3086 tree binfo
, base_binfo
;
3089 for (binfo
= TYPE_BINFO (type
), i
= 0;
3090 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
3091 if (contains_128bit_aligned_vector_p
3092 (BINFO_TYPE (base_binfo
)))
3095 /* And now merge the fields of structure. */
3096 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3098 if (TREE_CODE (field
) == FIELD_DECL
3099 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3106 /* Just for use if some languages passes arrays by value. */
3107 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3117 /* Gives the alignment boundary, in bits, of an argument with the
3118 specified mode and type. */
3121 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3125 align
= TYPE_ALIGN (type
);
3127 align
= GET_MODE_ALIGNMENT (mode
);
3128 if (align
< PARM_BOUNDARY
)
3129 align
= PARM_BOUNDARY
;
3132 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3133 make an exception for SSE modes since these require 128bit
3136 The handling here differs from field_alignment. ICC aligns MMX
3137 arguments to 4 byte boundaries, while structure fields are aligned
3138 to 8 byte boundaries. */
3140 align
= PARM_BOUNDARY
;
3143 if (!SSE_REG_MODE_P (mode
))
3144 align
= PARM_BOUNDARY
;
3148 if (!contains_128bit_aligned_vector_p (type
))
3149 align
= PARM_BOUNDARY
;
3157 /* Return true if N is a possible register number of function value. */
3159 ix86_function_value_regno_p (int regno
)
3163 return ((regno
) == 0
3164 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
3165 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
3167 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
3168 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
3169 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
3172 /* Define how to find the value returned by a function.
3173 VALTYPE is the data type of the value (as a tree).
3174 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3175 otherwise, FUNC is 0. */
3177 ix86_function_value (tree valtype
, tree func
)
3179 enum machine_mode natmode
= type_natural_mode (valtype
);
3183 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
3184 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3185 x86_64_int_return_registers
, 0);
3186 /* For zero sized structures, construct_container return NULL, but we
3187 need to keep rest of compiler happy by returning meaningful value. */
3189 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3193 return gen_rtx_REG (TYPE_MODE (valtype
), ix86_value_regno (natmode
, func
));
3196 /* Return false iff type is returned in memory. */
3198 ix86_return_in_memory (tree type
)
3200 int needed_intregs
, needed_sseregs
, size
;
3201 enum machine_mode mode
= type_natural_mode (type
);
3204 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3206 if (mode
== BLKmode
)
3209 size
= int_size_in_bytes (type
);
3211 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3214 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3216 /* User-created vectors small enough to fit in EAX. */
3220 /* MMX/3dNow values are returned on the stack, since we've
3221 got to EMMS/FEMMS before returning. */
3225 /* SSE values are returned in XMM0, except when it doesn't exist. */
3227 return (TARGET_SSE
? 0 : 1);
3238 /* When returning SSE vector types, we have a choice of either
3239 (1) being abi incompatible with a -march switch, or
3240 (2) generating an error.
3241 Given no good solution, I think the safest thing is one warning.
3242 The user won't be able to use -Werror, but....
3244 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3245 called in response to actually generating a caller or callee that
3246 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3247 via aggregate_value_p for general type probing from tree-ssa. */
3250 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3254 if (!TARGET_SSE
&& type
&& !warned
)
3256 /* Look at the return type of the function, not the function type. */
3257 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3260 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3263 warning (0, "SSE vector return without SSE enabled changes the ABI");
3270 /* Define how to find the value returned by a library function
3271 assuming the value has mode MODE. */
3273 ix86_libcall_value (enum machine_mode mode
)
3284 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3287 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3291 return gen_rtx_REG (mode
, 0);
3295 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
));
3298 /* Given a mode, return the register to use for a return value. */
3301 ix86_value_regno (enum machine_mode mode
, tree func
)
3303 gcc_assert (!TARGET_64BIT
);
3305 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3306 we prevent this case when sse is not available. */
3307 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3308 return FIRST_SSE_REG
;
3310 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3311 if (GET_MODE_CLASS (mode
) != MODE_FLOAT
|| !TARGET_FLOAT_RETURNS_IN_80387
)
3314 /* Floating point return values in %st(0), except for local functions when
3315 SSE math is enabled. */
3316 if (func
&& SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
3317 && flag_unit_at_a_time
)
3319 struct cgraph_local_info
*i
= cgraph_local_info (func
);
3321 return FIRST_SSE_REG
;
3324 return FIRST_FLOAT_REG
;
3327 /* Create the va_list data type. */
3330 ix86_build_builtin_va_list (void)
3332 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3334 /* For i386 we use plain pointer to argument area. */
3336 return build_pointer_type (char_type_node
);
3338 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3339 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3341 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3342 unsigned_type_node
);
3343 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3344 unsigned_type_node
);
3345 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3347 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3350 va_list_gpr_counter_field
= f_gpr
;
3351 va_list_fpr_counter_field
= f_fpr
;
3353 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3354 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3355 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3356 DECL_FIELD_CONTEXT (f_sav
) = record
;
3358 TREE_CHAIN (record
) = type_decl
;
3359 TYPE_NAME (record
) = type_decl
;
3360 TYPE_FIELDS (record
) = f_gpr
;
3361 TREE_CHAIN (f_gpr
) = f_fpr
;
3362 TREE_CHAIN (f_fpr
) = f_ovf
;
3363 TREE_CHAIN (f_ovf
) = f_sav
;
3365 layout_type (record
);
3367 /* The correct type is an array type of one element. */
3368 return build_array_type (record
, build_index_type (size_zero_node
));
3371 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3374 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3375 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3378 CUMULATIVE_ARGS next_cum
;
3379 rtx save_area
= NULL_RTX
, mem
;
3392 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
3395 /* Indicate to allocate space on the stack for varargs save area. */
3396 ix86_save_varrargs_registers
= 1;
3398 cfun
->stack_alignment_needed
= 128;
3400 fntype
= TREE_TYPE (current_function_decl
);
3401 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3402 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3403 != void_type_node
));
3405 /* For varargs, we do not want to skip the dummy va_dcl argument.
3406 For stdargs, we do want to skip the last named argument. */
3409 function_arg_advance (&next_cum
, mode
, type
, 1);
3412 save_area
= frame_pointer_rtx
;
3414 set
= get_varargs_alias_set ();
3416 for (i
= next_cum
.regno
;
3418 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
3421 mem
= gen_rtx_MEM (Pmode
,
3422 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3423 set_mem_alias_set (mem
, set
);
3424 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3425 x86_64_int_parameter_registers
[i
]));
3428 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
3430 /* Now emit code to save SSE registers. The AX parameter contains number
3431 of SSE parameter registers used to call this function. We use
3432 sse_prologue_save insn template that produces computed jump across
3433 SSE saves. We need some preparation work to get this working. */
3435 label
= gen_label_rtx ();
3436 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3438 /* Compute address to jump to :
3439 label - 5*eax + nnamed_sse_arguments*5 */
3440 tmp_reg
= gen_reg_rtx (Pmode
);
3441 nsse_reg
= gen_reg_rtx (Pmode
);
3442 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3443 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3444 gen_rtx_MULT (Pmode
, nsse_reg
,
3446 if (next_cum
.sse_regno
)
3449 gen_rtx_CONST (DImode
,
3450 gen_rtx_PLUS (DImode
,
3452 GEN_INT (next_cum
.sse_regno
* 4))));
3454 emit_move_insn (nsse_reg
, label_ref
);
3455 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3457 /* Compute address of memory block we save into. We always use pointer
3458 pointing 127 bytes after first byte to store - this is needed to keep
3459 instruction size limited by 4 bytes. */
3460 tmp_reg
= gen_reg_rtx (Pmode
);
3461 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3462 plus_constant (save_area
,
3463 8 * REGPARM_MAX
+ 127)));
3464 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3465 set_mem_alias_set (mem
, set
);
3466 set_mem_align (mem
, BITS_PER_WORD
);
3468 /* And finally do the dirty job! */
3469 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3470 GEN_INT (next_cum
.sse_regno
), label
));
3475 /* Implement va_start. */
3478 ix86_va_start (tree valist
, rtx nextarg
)
3480 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3481 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3482 tree gpr
, fpr
, ovf
, sav
, t
;
3484 /* Only 64bit target needs something special. */
3487 std_expand_builtin_va_start (valist
, nextarg
);
3491 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3492 f_fpr
= TREE_CHAIN (f_gpr
);
3493 f_ovf
= TREE_CHAIN (f_fpr
);
3494 f_sav
= TREE_CHAIN (f_ovf
);
3496 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3497 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3498 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3499 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3500 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3502 /* Count number of gp and fp argument registers used. */
3503 words
= current_function_args_info
.words
;
3504 n_gpr
= current_function_args_info
.regno
;
3505 n_fpr
= current_function_args_info
.sse_regno
;
3507 if (TARGET_DEBUG_ARG
)
3508 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3509 (int) words
, (int) n_gpr
, (int) n_fpr
);
3511 if (cfun
->va_list_gpr_size
)
3513 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3514 build_int_cst (NULL_TREE
, n_gpr
* 8));
3515 TREE_SIDE_EFFECTS (t
) = 1;
3516 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3519 if (cfun
->va_list_fpr_size
)
3521 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3522 build_int_cst (NULL_TREE
, n_fpr
* 16 + 8*REGPARM_MAX
));
3523 TREE_SIDE_EFFECTS (t
) = 1;
3524 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3527 /* Find the overflow area. */
3528 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3530 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3531 build_int_cst (NULL_TREE
, words
* UNITS_PER_WORD
));
3532 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3533 TREE_SIDE_EFFECTS (t
) = 1;
3534 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3536 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
3538 /* Find the register save area.
3539 Prologue of the function save it right above stack frame. */
3540 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3541 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3542 TREE_SIDE_EFFECTS (t
) = 1;
3543 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3547 /* Implement va_arg. */
3550 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3552 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3553 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3554 tree gpr
, fpr
, ovf
, sav
, t
;
3556 tree lab_false
, lab_over
= NULL_TREE
;
3561 enum machine_mode nat_mode
;
3563 /* Only 64bit target needs something special. */
3565 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3567 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3568 f_fpr
= TREE_CHAIN (f_gpr
);
3569 f_ovf
= TREE_CHAIN (f_fpr
);
3570 f_sav
= TREE_CHAIN (f_ovf
);
3572 valist
= build_va_arg_indirect_ref (valist
);
3573 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3574 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3575 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3576 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3578 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
3580 type
= build_pointer_type (type
);
3581 size
= int_size_in_bytes (type
);
3582 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3584 nat_mode
= type_natural_mode (type
);
3585 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
3586 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3588 /* Pull the value out of the saved registers. */
3590 addr
= create_tmp_var (ptr_type_node
, "addr");
3591 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3595 int needed_intregs
, needed_sseregs
;
3597 tree int_addr
, sse_addr
;
3599 lab_false
= create_artificial_label ();
3600 lab_over
= create_artificial_label ();
3602 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
3604 need_temp
= (!REG_P (container
)
3605 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3606 || TYPE_ALIGN (type
) > 128));
3608 /* In case we are passing structure, verify that it is consecutive block
3609 on the register save area. If not we need to do moves. */
3610 if (!need_temp
&& !REG_P (container
))
3612 /* Verify that all registers are strictly consecutive */
3613 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3617 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3619 rtx slot
= XVECEXP (container
, 0, i
);
3620 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3621 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3629 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3631 rtx slot
= XVECEXP (container
, 0, i
);
3632 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3633 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3645 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3646 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3647 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3648 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3651 /* First ensure that we fit completely in registers. */
3654 t
= build_int_cst (TREE_TYPE (gpr
),
3655 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
3656 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3657 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3658 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3659 gimplify_and_add (t
, pre_p
);
3663 t
= build_int_cst (TREE_TYPE (fpr
),
3664 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3666 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3667 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3668 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3669 gimplify_and_add (t
, pre_p
);
3672 /* Compute index to start of area used for integer regs. */
3675 /* int_addr = gpr + sav; */
3676 t
= fold_convert (ptr_type_node
, gpr
);
3677 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
3678 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3679 gimplify_and_add (t
, pre_p
);
3683 /* sse_addr = fpr + sav; */
3684 t
= fold_convert (ptr_type_node
, fpr
);
3685 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
3686 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3687 gimplify_and_add (t
, pre_p
);
3692 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3695 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3696 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3697 gimplify_and_add (t
, pre_p
);
3699 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3701 rtx slot
= XVECEXP (container
, 0, i
);
3702 rtx reg
= XEXP (slot
, 0);
3703 enum machine_mode mode
= GET_MODE (reg
);
3704 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3705 tree addr_type
= build_pointer_type (piece_type
);
3708 tree dest_addr
, dest
;
3710 if (SSE_REGNO_P (REGNO (reg
)))
3712 src_addr
= sse_addr
;
3713 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3717 src_addr
= int_addr
;
3718 src_offset
= REGNO (reg
) * 8;
3720 src_addr
= fold_convert (addr_type
, src_addr
);
3721 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3722 size_int (src_offset
)));
3723 src
= build_va_arg_indirect_ref (src_addr
);
3725 dest_addr
= fold_convert (addr_type
, addr
);
3726 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3727 size_int (INTVAL (XEXP (slot
, 1)))));
3728 dest
= build_va_arg_indirect_ref (dest_addr
);
3730 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3731 gimplify_and_add (t
, pre_p
);
3737 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3738 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
3739 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3740 gimplify_and_add (t
, pre_p
);
3744 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3745 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
3746 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3747 gimplify_and_add (t
, pre_p
);
3750 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3751 gimplify_and_add (t
, pre_p
);
3753 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3754 append_to_statement_list (t
, pre_p
);
3757 /* ... otherwise out of the overflow area. */
3759 /* Care for on-stack alignment if needed. */
3760 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3764 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3765 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
3766 build_int_cst (TREE_TYPE (ovf
), align
- 1));
3767 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3768 build_int_cst (TREE_TYPE (t
), -align
));
3770 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3772 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3773 gimplify_and_add (t2
, pre_p
);
3775 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3776 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
3777 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3778 gimplify_and_add (t
, pre_p
);
3782 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3783 append_to_statement_list (t
, pre_p
);
3786 ptrtype
= build_pointer_type (type
);
3787 addr
= fold_convert (ptrtype
, addr
);
3790 addr
= build_va_arg_indirect_ref (addr
);
3791 return build_va_arg_indirect_ref (addr
);
3794 /* Return nonzero if OPNUM's MEM should be matched
3795 in movabs* patterns. */
3798 ix86_check_movabs (rtx insn
, int opnum
)
3802 set
= PATTERN (insn
);
3803 if (GET_CODE (set
) == PARALLEL
)
3804 set
= XVECEXP (set
, 0, 0);
3805 gcc_assert (GET_CODE (set
) == SET
);
3806 mem
= XEXP (set
, opnum
);
3807 while (GET_CODE (mem
) == SUBREG
)
3808 mem
= SUBREG_REG (mem
);
3809 gcc_assert (GET_CODE (mem
) == MEM
);
3810 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3813 /* Initialize the table of extra 80387 mathematical constants. */
3816 init_ext_80387_constants (void)
3818 static const char * cst
[5] =
3820 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3821 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3822 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3823 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3824 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3828 for (i
= 0; i
< 5; i
++)
3830 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
3831 /* Ensure each constant is rounded to XFmode precision. */
3832 real_convert (&ext_80387_constants_table
[i
],
3833 XFmode
, &ext_80387_constants_table
[i
]);
3836 ext_80387_constants_init
= 1;
3839 /* Return true if the constant is something that can be loaded with
3840 a special instruction. */
3843 standard_80387_constant_p (rtx x
)
3845 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3848 if (x
== CONST0_RTX (GET_MODE (x
)))
3850 if (x
== CONST1_RTX (GET_MODE (x
)))
3853 /* For XFmode constants, try to find a special 80387 instruction when
3854 optimizing for size or on those CPUs that benefit from them. */
3855 if (GET_MODE (x
) == XFmode
3856 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
3861 if (! ext_80387_constants_init
)
3862 init_ext_80387_constants ();
3864 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3865 for (i
= 0; i
< 5; i
++)
3866 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
3873 /* Return the opcode of the special instruction to be used to load
3877 standard_80387_constant_opcode (rtx x
)
3879 switch (standard_80387_constant_p (x
))
3900 /* Return the CONST_DOUBLE representing the 80387 constant that is
3901 loaded by the specified special instruction. The argument IDX
3902 matches the return value from standard_80387_constant_p. */
3905 standard_80387_constant_rtx (int idx
)
3909 if (! ext_80387_constants_init
)
3910 init_ext_80387_constants ();
3926 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
3930 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3933 standard_sse_constant_p (rtx x
)
3935 if (x
== const0_rtx
)
3937 return (x
== CONST0_RTX (GET_MODE (x
)));
3940 /* Returns 1 if OP contains a symbol reference */
3943 symbolic_reference_mentioned_p (rtx op
)
3948 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3951 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3952 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3958 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3959 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3963 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3970 /* Return 1 if it is appropriate to emit `ret' instructions in the
3971 body of a function. Do this only if the epilogue is simple, needing a
3972 couple of insns. Prior to reloading, we can't tell how many registers
3973 must be saved, so return 0 then. Return 0 if there is no frame
3974 marker to de-allocate. */
3977 ix86_can_use_return_insn_p (void)
3979 struct ix86_frame frame
;
3981 if (! reload_completed
|| frame_pointer_needed
)
3984 /* Don't allow more than 32 pop, since that's all we can do
3985 with one instruction. */
3986 if (current_function_pops_args
3987 && current_function_args_size
>= 32768)
3990 ix86_compute_frame_layout (&frame
);
3991 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3994 /* Value should be nonzero if functions must have frame pointers.
3995 Zero means the frame pointer need not be set up (and parms may
3996 be accessed via the stack pointer) in functions that seem suitable. */
3999 ix86_frame_pointer_required (void)
4001 /* If we accessed previous frames, then the generated code expects
4002 to be able to access the saved ebp value in our frame. */
4003 if (cfun
->machine
->accesses_prev_frame
)
4006 /* Several x86 os'es need a frame pointer for other reasons,
4007 usually pertaining to setjmp. */
4008 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4011 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4012 the frame pointer by default. Turn it back on now if we've not
4013 got a leaf function. */
4014 if (TARGET_OMIT_LEAF_FRAME_POINTER
4015 && (!current_function_is_leaf
))
4018 if (current_function_profile
)
4024 /* Record that the current function accesses previous call frames. */
4027 ix86_setup_frame_addresses (void)
4029 cfun
->machine
->accesses_prev_frame
= 1;
4032 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4033 # define USE_HIDDEN_LINKONCE 1
4035 # define USE_HIDDEN_LINKONCE 0
4038 static int pic_labels_used
;
4040 /* Fills in the label name that should be used for a pc thunk for
4041 the given register. */
4044 get_pc_thunk_name (char name
[32], unsigned int regno
)
4046 if (USE_HIDDEN_LINKONCE
)
4047 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4049 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4053 /* This function generates code for -fpic that loads %ebx with
4054 the return address of the caller and then returns. */
4057 ix86_file_end (void)
4062 for (regno
= 0; regno
< 8; ++regno
)
4066 if (! ((pic_labels_used
>> regno
) & 1))
4069 get_pc_thunk_name (name
, regno
);
4071 if (USE_HIDDEN_LINKONCE
)
4075 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4077 TREE_PUBLIC (decl
) = 1;
4078 TREE_STATIC (decl
) = 1;
4079 DECL_ONE_ONLY (decl
) = 1;
4081 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4082 named_section (decl
, NULL
, 0);
4084 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4085 fputs ("\t.hidden\t", asm_out_file
);
4086 assemble_name (asm_out_file
, name
);
4087 fputc ('\n', asm_out_file
);
4088 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4093 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4096 xops
[0] = gen_rtx_REG (SImode
, regno
);
4097 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4098 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4099 output_asm_insn ("ret", xops
);
4102 if (NEED_INDICATE_EXEC_STACK
)
4103 file_end_indicate_exec_stack ();
4106 /* Emit code for the SET_GOT patterns. */
4109 output_set_got (rtx dest
)
4114 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4116 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4118 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4121 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4123 output_asm_insn ("call\t%a2", xops
);
4126 /* Output the "canonical" label name ("Lxx$pb") here too. This
4127 is what will be referred to by the Mach-O PIC subsystem. */
4128 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4130 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4131 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4134 output_asm_insn ("pop{l}\t%0", xops
);
4139 get_pc_thunk_name (name
, REGNO (dest
));
4140 pic_labels_used
|= 1 << REGNO (dest
);
4142 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4143 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4144 output_asm_insn ("call\t%X2", xops
);
4147 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4148 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4149 else if (!TARGET_MACHO
)
4150 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4155 /* Generate an "push" pattern for input ARG. */
4160 return gen_rtx_SET (VOIDmode
,
4162 gen_rtx_PRE_DEC (Pmode
,
4163 stack_pointer_rtx
)),
4167 /* Return >= 0 if there is an unused call-clobbered register available
4168 for the entire function. */
4171 ix86_select_alt_pic_regnum (void)
4173 if (current_function_is_leaf
&& !current_function_profile
)
4176 for (i
= 2; i
>= 0; --i
)
4177 if (!regs_ever_live
[i
])
4181 return INVALID_REGNUM
;
4184 /* Return 1 if we need to save REGNO. */
4186 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4188 if (pic_offset_table_rtx
4189 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4190 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4191 || current_function_profile
4192 || current_function_calls_eh_return
4193 || current_function_uses_const_pool
))
4195 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4200 if (current_function_calls_eh_return
&& maybe_eh_return
)
4205 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4206 if (test
== INVALID_REGNUM
)
4213 return (regs_ever_live
[regno
]
4214 && !call_used_regs
[regno
]
4215 && !fixed_regs
[regno
]
4216 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4219 /* Return number of registers to be saved on the stack. */
4222 ix86_nsaved_regs (void)
4227 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4228 if (ix86_save_reg (regno
, true))
4233 /* Return the offset between two registers, one to be eliminated, and the other
4234 its replacement, at the start of a routine. */
4237 ix86_initial_elimination_offset (int from
, int to
)
4239 struct ix86_frame frame
;
4240 ix86_compute_frame_layout (&frame
);
4242 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4243 return frame
.hard_frame_pointer_offset
;
4244 else if (from
== FRAME_POINTER_REGNUM
4245 && to
== HARD_FRAME_POINTER_REGNUM
)
4246 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4249 gcc_assert (to
== STACK_POINTER_REGNUM
);
4251 if (from
== ARG_POINTER_REGNUM
)
4252 return frame
.stack_pointer_offset
;
4254 gcc_assert (from
== FRAME_POINTER_REGNUM
);
4255 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4259 /* Fill structure ix86_frame about frame of currently computed function. */
4262 ix86_compute_frame_layout (struct ix86_frame
*frame
)
4264 HOST_WIDE_INT total_size
;
4265 unsigned int stack_alignment_needed
;
4266 HOST_WIDE_INT offset
;
4267 unsigned int preferred_alignment
;
4268 HOST_WIDE_INT size
= get_frame_size ();
4270 frame
->nregs
= ix86_nsaved_regs ();
4273 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4274 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4276 /* During reload iteration the amount of registers saved can change.
4277 Recompute the value as needed. Do not recompute when amount of registers
4278 didn't change as reload does multiple calls to the function and does not
4279 expect the decision to change within single iteration. */
4281 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4283 int count
= frame
->nregs
;
4285 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4286 /* The fast prologue uses move instead of push to save registers. This
4287 is significantly longer, but also executes faster as modern hardware
4288 can execute the moves in parallel, but can't do that for push/pop.
4290 Be careful about choosing what prologue to emit: When function takes
4291 many instructions to execute we may use slow version as well as in
4292 case function is known to be outside hot spot (this is known with
4293 feedback only). Weight the size of function by number of registers
4294 to save as it is cheap to use one or two push instructions but very
4295 slow to use many of them. */
4297 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4298 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4299 || (flag_branch_probabilities
4300 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4301 cfun
->machine
->use_fast_prologue_epilogue
= false;
4303 cfun
->machine
->use_fast_prologue_epilogue
4304 = !expensive_function_p (count
);
4306 if (TARGET_PROLOGUE_USING_MOVE
4307 && cfun
->machine
->use_fast_prologue_epilogue
)
4308 frame
->save_regs_using_mov
= true;
4310 frame
->save_regs_using_mov
= false;
4313 /* Skip return address and saved base pointer. */
4314 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4316 frame
->hard_frame_pointer_offset
= offset
;
4318 /* Do some sanity checking of stack_alignment_needed and
4319 preferred_alignment, since i386 port is the only using those features
4320 that may break easily. */
4322 gcc_assert (!size
|| stack_alignment_needed
);
4323 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
4324 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
4325 gcc_assert (stack_alignment_needed
4326 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
4328 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4329 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4331 /* Register save area */
4332 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4335 if (ix86_save_varrargs_registers
)
4337 offset
+= X86_64_VARARGS_SIZE
;
4338 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4341 frame
->va_arg_size
= 0;
4343 /* Align start of frame for local function. */
4344 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4345 & -stack_alignment_needed
) - offset
;
4347 offset
+= frame
->padding1
;
4349 /* Frame pointer points here. */
4350 frame
->frame_pointer_offset
= offset
;
4354 /* Add outgoing arguments area. Can be skipped if we eliminated
4355 all the function calls as dead code.
4356 Skipping is however impossible when function calls alloca. Alloca
4357 expander assumes that last current_function_outgoing_args_size
4358 of stack frame are unused. */
4359 if (ACCUMULATE_OUTGOING_ARGS
4360 && (!current_function_is_leaf
|| current_function_calls_alloca
))
4362 offset
+= current_function_outgoing_args_size
;
4363 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4366 frame
->outgoing_arguments_size
= 0;
4368 /* Align stack boundary. Only needed if we're calling another function
4370 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4371 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4372 & -preferred_alignment
) - offset
;
4374 frame
->padding2
= 0;
4376 offset
+= frame
->padding2
;
4378 /* We've reached end of stack frame. */
4379 frame
->stack_pointer_offset
= offset
;
4381 /* Size prologue needs to allocate. */
4382 frame
->to_allocate
=
4383 (size
+ frame
->padding1
+ frame
->padding2
4384 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4386 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
4387 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
4388 frame
->save_regs_using_mov
= false;
4390 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4391 && current_function_is_leaf
)
4393 frame
->red_zone_size
= frame
->to_allocate
;
4394 if (frame
->save_regs_using_mov
)
4395 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
4396 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4397 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4400 frame
->red_zone_size
= 0;
4401 frame
->to_allocate
-= frame
->red_zone_size
;
4402 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4404 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4405 fprintf (stderr
, "size: %i\n", size
);
4406 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4407 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4408 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4409 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4410 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4411 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4412 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4413 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4414 frame
->hard_frame_pointer_offset
);
4415 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4419 /* Emit code to save registers in the prologue. */
4422 ix86_emit_save_regs (void)
4427 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4428 if (ix86_save_reg (regno
, true))
4430 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4431 RTX_FRAME_RELATED_P (insn
) = 1;
4435 /* Emit code to save registers using MOV insns. First register
4436 is restored from POINTER + OFFSET. */
4438 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
4443 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4444 if (ix86_save_reg (regno
, true))
4446 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4448 gen_rtx_REG (Pmode
, regno
));
4449 RTX_FRAME_RELATED_P (insn
) = 1;
4450 offset
+= UNITS_PER_WORD
;
4454 /* Expand prologue or epilogue stack adjustment.
4455 The pattern exist to put a dependency on all ebp-based memory accesses.
4456 STYLE should be negative if instructions should be marked as frame related,
4457 zero if %r11 register is live and cannot be freely used and positive
4461 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
4466 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
4467 else if (x86_64_immediate_operand (offset
, DImode
))
4468 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
4472 /* r11 is used by indirect sibcall return as well, set before the
4473 epilogue and used after the epilogue. ATM indirect sibcall
4474 shouldn't be used together with huge frame sizes in one
4475 function because of the frame_size check in sibcall.c. */
4477 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4478 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
4480 RTX_FRAME_RELATED_P (insn
) = 1;
4481 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
4485 RTX_FRAME_RELATED_P (insn
) = 1;
4488 /* Expand the prologue into a bunch of separate insns. */
4491 ix86_expand_prologue (void)
4495 struct ix86_frame frame
;
4496 HOST_WIDE_INT allocate
;
4498 ix86_compute_frame_layout (&frame
);
4500 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4501 slower on all targets. Also sdb doesn't like it. */
4503 if (frame_pointer_needed
)
4505 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4506 RTX_FRAME_RELATED_P (insn
) = 1;
4508 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4509 RTX_FRAME_RELATED_P (insn
) = 1;
4512 allocate
= frame
.to_allocate
;
4514 if (!frame
.save_regs_using_mov
)
4515 ix86_emit_save_regs ();
4517 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4519 /* When using red zone we may start register saving before allocating
4520 the stack frame saving one cycle of the prologue. */
4521 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
4522 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
4523 : stack_pointer_rtx
,
4524 -frame
.nregs
* UNITS_PER_WORD
);
4528 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4529 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4530 GEN_INT (-allocate
), -1);
4533 /* Only valid for Win32. */
4534 rtx eax
= gen_rtx_REG (SImode
, 0);
4535 bool eax_live
= ix86_eax_live_at_start_p ();
4538 gcc_assert (!TARGET_64BIT
);
4542 emit_insn (gen_push (eax
));
4546 emit_move_insn (eax
, GEN_INT (allocate
));
4548 insn
= emit_insn (gen_allocate_stack_worker (eax
));
4549 RTX_FRAME_RELATED_P (insn
) = 1;
4550 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
4551 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
4552 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
4553 t
, REG_NOTES (insn
));
4557 if (frame_pointer_needed
)
4558 t
= plus_constant (hard_frame_pointer_rtx
,
4561 - frame
.nregs
* UNITS_PER_WORD
);
4563 t
= plus_constant (stack_pointer_rtx
, allocate
);
4564 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
4568 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
4570 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4571 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4573 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4574 -frame
.nregs
* UNITS_PER_WORD
);
4577 pic_reg_used
= false;
4578 if (pic_offset_table_rtx
4579 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4580 || current_function_profile
))
4582 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4584 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4585 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4587 pic_reg_used
= true;
4592 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4594 /* Even with accurate pre-reload life analysis, we can wind up
4595 deleting all references to the pic register after reload.
4596 Consider if cross-jumping unifies two sides of a branch
4597 controlled by a comparison vs the only read from a global.
4598 In which case, allow the set_got to be deleted, though we're
4599 too late to do anything about the ebx save in the prologue. */
4600 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4603 /* Prevent function calls from be scheduled before the call to mcount.
4604 In the pic_reg_used case, make sure that the got load isn't deleted. */
4605 if (current_function_profile
)
4606 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4609 /* Emit code to restore saved registers using MOV insns. First register
4610 is restored from POINTER + OFFSET. */
4612 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
4613 int maybe_eh_return
)
4616 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
4618 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4619 if (ix86_save_reg (regno
, maybe_eh_return
))
4621 /* Ensure that adjust_address won't be forced to produce pointer
4622 out of range allowed by x86-64 instruction set. */
4623 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
4627 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4628 emit_move_insn (r11
, GEN_INT (offset
));
4629 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
4630 base_address
= gen_rtx_MEM (Pmode
, r11
);
4633 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4634 adjust_address (base_address
, Pmode
, offset
));
4635 offset
+= UNITS_PER_WORD
;
4639 /* Restore function stack, frame, and registers. */
4642 ix86_expand_epilogue (int style
)
4645 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4646 struct ix86_frame frame
;
4647 HOST_WIDE_INT offset
;
4649 ix86_compute_frame_layout (&frame
);
4651 /* Calculate start of saved registers relative to ebp. Special care
4652 must be taken for the normal return case of a function using
4653 eh_return: the eax and edx registers are marked as saved, but not
4654 restored along this path. */
4655 offset
= frame
.nregs
;
4656 if (current_function_calls_eh_return
&& style
!= 2)
4658 offset
*= -UNITS_PER_WORD
;
4660 /* If we're only restoring one register and sp is not valid then
4661 using a move instruction to restore the register since it's
4662 less work than reloading sp and popping the register.
4664 The default code result in stack adjustment using add/lea instruction,
4665 while this code results in LEAVE instruction (or discrete equivalent),
4666 so it is profitable in some other cases as well. Especially when there
4667 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4668 and there is exactly one register to pop. This heuristic may need some
4669 tuning in future. */
4670 if ((!sp_valid
&& frame
.nregs
<= 1)
4671 || (TARGET_EPILOGUE_USING_MOVE
4672 && cfun
->machine
->use_fast_prologue_epilogue
4673 && (frame
.nregs
> 1 || frame
.to_allocate
))
4674 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4675 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4676 && cfun
->machine
->use_fast_prologue_epilogue
4677 && frame
.nregs
== 1)
4678 || current_function_calls_eh_return
)
4680 /* Restore registers. We can use ebp or esp to address the memory
4681 locations. If both are available, default to ebp, since offsets
4682 are known to be small. Only exception is esp pointing directly to the
4683 end of block of saved registers, where we may simplify addressing
4686 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4687 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4688 frame
.to_allocate
, style
== 2);
4690 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4691 offset
, style
== 2);
4693 /* eh_return epilogues need %ecx added to the stack pointer. */
4696 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4698 if (frame_pointer_needed
)
4700 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4701 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4702 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4704 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4705 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4707 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
4712 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4713 tmp
= plus_constant (tmp
, (frame
.to_allocate
4714 + frame
.nregs
* UNITS_PER_WORD
));
4715 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4718 else if (!frame_pointer_needed
)
4719 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4720 GEN_INT (frame
.to_allocate
4721 + frame
.nregs
* UNITS_PER_WORD
),
4723 /* If not an i386, mov & pop is faster than "leave". */
4724 else if (TARGET_USE_LEAVE
|| optimize_size
4725 || !cfun
->machine
->use_fast_prologue_epilogue
)
4726 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4729 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4730 hard_frame_pointer_rtx
,
4733 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4735 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4740 /* First step is to deallocate the stack frame so that we can
4741 pop the registers. */
4744 gcc_assert (frame_pointer_needed
);
4745 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4746 hard_frame_pointer_rtx
,
4747 GEN_INT (offset
), style
);
4749 else if (frame
.to_allocate
)
4750 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4751 GEN_INT (frame
.to_allocate
), style
);
4753 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4754 if (ix86_save_reg (regno
, false))
4757 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4759 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4761 if (frame_pointer_needed
)
4763 /* Leave results in shorter dependency chains on CPUs that are
4764 able to grok it fast. */
4765 if (TARGET_USE_LEAVE
)
4766 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4767 else if (TARGET_64BIT
)
4768 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4770 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4774 /* Sibcall epilogues don't want a return instruction. */
4778 if (current_function_pops_args
&& current_function_args_size
)
4780 rtx popc
= GEN_INT (current_function_pops_args
);
4782 /* i386 can only pop 64K bytes. If asked to pop more, pop
4783 return address, do explicit add, and jump indirectly to the
4786 if (current_function_pops_args
>= 65536)
4788 rtx ecx
= gen_rtx_REG (SImode
, 2);
4790 /* There is no "pascal" calling convention in 64bit ABI. */
4791 gcc_assert (!TARGET_64BIT
);
4793 emit_insn (gen_popsi1 (ecx
));
4794 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4795 emit_jump_insn (gen_return_indirect_internal (ecx
));
4798 emit_jump_insn (gen_return_pop_internal (popc
));
4801 emit_jump_insn (gen_return_internal ());
4804 /* Reset from the function's potential modifications. */
4807 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4808 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4810 if (pic_offset_table_rtx
)
4811 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4814 /* Extract the parts of an RTL expression that is a valid memory address
4815 for an instruction. Return 0 if the structure of the address is
4816 grossly off. Return -1 if the address contains ASHIFT, so it is not
4817 strictly valid, but still used for computing length of lea instruction. */
4820 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
4822 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
4823 rtx base_reg
, index_reg
;
4824 HOST_WIDE_INT scale
= 1;
4825 rtx scale_rtx
= NULL_RTX
;
4827 enum ix86_address_seg seg
= SEG_DEFAULT
;
4829 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4831 else if (GET_CODE (addr
) == PLUS
)
4841 addends
[n
++] = XEXP (op
, 1);
4844 while (GET_CODE (op
) == PLUS
);
4849 for (i
= n
; i
>= 0; --i
)
4852 switch (GET_CODE (op
))
4857 index
= XEXP (op
, 0);
4858 scale_rtx
= XEXP (op
, 1);
4862 if (XINT (op
, 1) == UNSPEC_TP
4863 && TARGET_TLS_DIRECT_SEG_REFS
4864 && seg
== SEG_DEFAULT
)
4865 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
4894 else if (GET_CODE (addr
) == MULT
)
4896 index
= XEXP (addr
, 0); /* index*scale */
4897 scale_rtx
= XEXP (addr
, 1);
4899 else if (GET_CODE (addr
) == ASHIFT
)
4903 /* We're called for lea too, which implements ashift on occasion. */
4904 index
= XEXP (addr
, 0);
4905 tmp
= XEXP (addr
, 1);
4906 if (GET_CODE (tmp
) != CONST_INT
)
4908 scale
= INTVAL (tmp
);
4909 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4915 disp
= addr
; /* displacement */
4917 /* Extract the integral value of scale. */
4920 if (GET_CODE (scale_rtx
) != CONST_INT
)
4922 scale
= INTVAL (scale_rtx
);
4925 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
4926 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
4928 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4929 if (base_reg
&& index_reg
&& scale
== 1
4930 && (index_reg
== arg_pointer_rtx
4931 || index_reg
== frame_pointer_rtx
4932 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
4935 tmp
= base
, base
= index
, index
= tmp
;
4936 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
4939 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4940 if ((base_reg
== hard_frame_pointer_rtx
4941 || base_reg
== frame_pointer_rtx
4942 || base_reg
== arg_pointer_rtx
) && !disp
)
4945 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4946 Avoid this by transforming to [%esi+0]. */
4947 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
4948 && base_reg
&& !index_reg
&& !disp
4950 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
4953 /* Special case: encode reg+reg instead of reg*2. */
4954 if (!base
&& index
&& scale
&& scale
== 2)
4955 base
= index
, base_reg
= index_reg
, scale
= 1;
4957 /* Special case: scaling cannot be encoded without base or displacement. */
4958 if (!base
&& !disp
&& index
&& scale
!= 1)
4970 /* Return cost of the memory address x.
4971 For i386, it is better to use a complex address than let gcc copy
4972 the address into a reg and make a new pseudo. But not if the address
4973 requires to two regs - that would mean more pseudos with longer
4976 ix86_address_cost (rtx x
)
4978 struct ix86_address parts
;
4980 int ok
= ix86_decompose_address (x
, &parts
);
4984 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4985 parts
.base
= SUBREG_REG (parts
.base
);
4986 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4987 parts
.index
= SUBREG_REG (parts
.index
);
4989 /* More complex memory references are better. */
4990 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4992 if (parts
.seg
!= SEG_DEFAULT
)
4995 /* Attempt to minimize number of registers in the address. */
4997 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4999 && (!REG_P (parts
.index
)
5000 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5004 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5006 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5007 && parts
.base
!= parts
.index
)
5010 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5011 since it's predecode logic can't detect the length of instructions
5012 and it degenerates to vector decoded. Increase cost of such
5013 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5014 to split such addresses or even refuse such addresses at all.
5016 Following addressing modes are affected:
5021 The first and last case may be avoidable by explicitly coding the zero in
5022 memory address, but I don't have AMD-K6 machine handy to check this
5026 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5027 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5028 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5034 /* If X is a machine specific address (i.e. a symbol or label being
5035 referenced as a displacement from the GOT implemented using an
5036 UNSPEC), then return the base term. Otherwise return X. */
5039 ix86_find_base_term (rtx x
)
5045 if (GET_CODE (x
) != CONST
)
5048 if (GET_CODE (term
) == PLUS
5049 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5050 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5051 term
= XEXP (term
, 0);
5052 if (GET_CODE (term
) != UNSPEC
5053 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5056 term
= XVECEXP (term
, 0, 0);
5058 if (GET_CODE (term
) != SYMBOL_REF
5059 && GET_CODE (term
) != LABEL_REF
)
5065 term
= ix86_delegitimize_address (x
);
5067 if (GET_CODE (term
) != SYMBOL_REF
5068 && GET_CODE (term
) != LABEL_REF
)
5074 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5075 this is used for to form addresses to local data when -fPIC is in
5079 darwin_local_data_pic (rtx disp
)
5081 if (GET_CODE (disp
) == MINUS
)
5083 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5084 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5085 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5087 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5088 if (! strcmp (sym_name
, "<pic base>"))
5096 /* Determine if a given RTX is a valid constant. We already know this
5097 satisfies CONSTANT_P. */
5100 legitimate_constant_p (rtx x
)
5102 switch (GET_CODE (x
))
5107 if (GET_CODE (x
) == PLUS
)
5109 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
5114 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
5117 /* Only some unspecs are valid as "constants". */
5118 if (GET_CODE (x
) == UNSPEC
)
5119 switch (XINT (x
, 1))
5123 return local_exec_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
5125 return local_dynamic_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
5130 /* We must have drilled down to a symbol. */
5131 if (!symbolic_operand (x
, Pmode
))
5136 /* TLS symbols are never valid. */
5137 if (tls_symbolic_operand (x
, Pmode
))
5145 /* Otherwise we handle everything else in the move patterns. */
5149 /* Determine if it's legal to put X into the constant pool. This
5150 is not possible for the address of thread-local symbols, which
5151 is checked above. */
5154 ix86_cannot_force_const_mem (rtx x
)
5156 return !legitimate_constant_p (x
);
5159 /* Determine if a given RTX is a valid constant address. */
5162 constant_address_p (rtx x
)
5164 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5167 /* Nonzero if the constant value X is a legitimate general operand
5168 when generating PIC code. It is given that flag_pic is on and
5169 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5172 legitimate_pic_operand_p (rtx x
)
5176 switch (GET_CODE (x
))
5179 inner
= XEXP (x
, 0);
5181 /* Only some unspecs are valid as "constants". */
5182 if (GET_CODE (inner
) == UNSPEC
)
5183 switch (XINT (inner
, 1))
5186 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5194 return legitimate_pic_address_disp_p (x
);
5201 /* Determine if a given CONST RTX is a valid memory displacement
5205 legitimate_pic_address_disp_p (rtx disp
)
5209 /* In 64bit mode we can allow direct addresses of symbols and labels
5210 when they are not dynamic symbols. */
5213 /* TLS references should always be enclosed in UNSPEC. */
5214 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5216 if (GET_CODE (disp
) == SYMBOL_REF
5217 && ix86_cmodel
== CM_SMALL_PIC
5218 && SYMBOL_REF_LOCAL_P (disp
))
5220 if (GET_CODE (disp
) == LABEL_REF
)
5222 if (GET_CODE (disp
) == CONST
5223 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
5225 rtx op0
= XEXP (XEXP (disp
, 0), 0);
5226 rtx op1
= XEXP (XEXP (disp
, 0), 1);
5228 /* TLS references should always be enclosed in UNSPEC. */
5229 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
5231 if (((GET_CODE (op0
) == SYMBOL_REF
5232 && ix86_cmodel
== CM_SMALL_PIC
5233 && SYMBOL_REF_LOCAL_P (op0
))
5234 || GET_CODE (op0
) == LABEL_REF
)
5235 && GET_CODE (op1
) == CONST_INT
5236 && INTVAL (op1
) < 16*1024*1024
5237 && INTVAL (op1
) >= -16*1024*1024)
5241 if (GET_CODE (disp
) != CONST
)
5243 disp
= XEXP (disp
, 0);
5247 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5248 of GOT tables. We should not need these anyway. */
5249 if (GET_CODE (disp
) != UNSPEC
5250 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5253 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5254 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5260 if (GET_CODE (disp
) == PLUS
)
5262 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5264 disp
= XEXP (disp
, 0);
5268 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
5271 if (GET_CODE (disp
) != UNSPEC
)
5274 switch (XINT (disp
, 1))
5279 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5281 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
5282 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
5283 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5285 case UNSPEC_GOTTPOFF
:
5286 case UNSPEC_GOTNTPOFF
:
5287 case UNSPEC_INDNTPOFF
:
5290 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5292 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5294 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5300 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5301 memory address for an instruction. The MODE argument is the machine mode
5302 for the MEM expression that wants to use this address.
5304 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5305 convert common non-canonical forms to canonical form so that they will
5309 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5311 struct ix86_address parts
;
5312 rtx base
, index
, disp
;
5313 HOST_WIDE_INT scale
;
5314 const char *reason
= NULL
;
5315 rtx reason_rtx
= NULL_RTX
;
5317 if (TARGET_DEBUG_ADDR
)
5320 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5321 GET_MODE_NAME (mode
), strict
);
5325 if (ix86_decompose_address (addr
, &parts
) <= 0)
5327 reason
= "decomposition failed";
5332 index
= parts
.index
;
5334 scale
= parts
.scale
;
5336 /* Validate base register.
5338 Don't allow SUBREG's that span more than a word here. It can lead to spill
5339 failures when the base is one word out of a two word structure, which is
5340 represented internally as a DImode int. */
5349 else if (GET_CODE (base
) == SUBREG
5350 && REG_P (SUBREG_REG (base
))
5351 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
5353 reg
= SUBREG_REG (base
);
5356 reason
= "base is not a register";
5360 if (GET_MODE (base
) != Pmode
)
5362 reason
= "base is not in Pmode";
5366 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5367 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5369 reason
= "base is not valid";
5374 /* Validate index register.
5376 Don't allow SUBREG's that span more than a word here -- same as above. */
5385 else if (GET_CODE (index
) == SUBREG
5386 && REG_P (SUBREG_REG (index
))
5387 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
5389 reg
= SUBREG_REG (index
);
5392 reason
= "index is not a register";
5396 if (GET_MODE (index
) != Pmode
)
5398 reason
= "index is not in Pmode";
5402 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5403 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5405 reason
= "index is not valid";
5410 /* Validate scale factor. */
5413 reason_rtx
= GEN_INT (scale
);
5416 reason
= "scale without index";
5420 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5422 reason
= "scale is not a valid multiplier";
5427 /* Validate displacement. */
5432 if (GET_CODE (disp
) == CONST
5433 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5434 switch (XINT (XEXP (disp
, 0), 1))
5438 case UNSPEC_GOTPCREL
:
5439 gcc_assert (flag_pic
);
5440 goto is_legitimate_pic
;
5442 case UNSPEC_GOTTPOFF
:
5443 case UNSPEC_GOTNTPOFF
:
5444 case UNSPEC_INDNTPOFF
:
5450 reason
= "invalid address unspec";
5454 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5456 && !machopic_operand_p (disp
)
5461 if (TARGET_64BIT
&& (index
|| base
))
5463 /* foo@dtpoff(%rX) is ok. */
5464 if (GET_CODE (disp
) != CONST
5465 || GET_CODE (XEXP (disp
, 0)) != PLUS
5466 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5467 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5468 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5469 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5471 reason
= "non-constant pic memory reference";
5475 else if (! legitimate_pic_address_disp_p (disp
))
5477 reason
= "displacement is an invalid pic construct";
5481 /* This code used to verify that a symbolic pic displacement
5482 includes the pic_offset_table_rtx register.
5484 While this is good idea, unfortunately these constructs may
5485 be created by "adds using lea" optimization for incorrect
5494 This code is nonsensical, but results in addressing
5495 GOT table with pic_offset_table_rtx base. We can't
5496 just refuse it easily, since it gets matched by
5497 "addsi3" pattern, that later gets split to lea in the
5498 case output register differs from input. While this
5499 can be handled by separate addsi pattern for this case
5500 that never results in lea, this seems to be easier and
5501 correct fix for crash to disable this test. */
5503 else if (GET_CODE (disp
) != LABEL_REF
5504 && GET_CODE (disp
) != CONST_INT
5505 && (GET_CODE (disp
) != CONST
5506 || !legitimate_constant_p (disp
))
5507 && (GET_CODE (disp
) != SYMBOL_REF
5508 || !legitimate_constant_p (disp
)))
5510 reason
= "displacement is not constant";
5513 else if (TARGET_64BIT
5514 && !x86_64_immediate_operand (disp
, VOIDmode
))
5516 reason
= "displacement is out of range";
5521 /* Everything looks valid. */
5522 if (TARGET_DEBUG_ADDR
)
5523 fprintf (stderr
, "Success.\n");
5527 if (TARGET_DEBUG_ADDR
)
5529 fprintf (stderr
, "Error: %s\n", reason
);
5530 debug_rtx (reason_rtx
);
5535 /* Return an unique alias set for the GOT. */
5537 static HOST_WIDE_INT
5538 ix86_GOT_alias_set (void)
5540 static HOST_WIDE_INT set
= -1;
5542 set
= new_alias_set ();
5546 /* Return a legitimate reference for ORIG (an address) using the
5547 register REG. If REG is 0, a new pseudo is generated.
5549 There are two types of references that must be handled:
5551 1. Global data references must load the address from the GOT, via
5552 the PIC reg. An insn is emitted to do this load, and the reg is
5555 2. Static data references, constant pool addresses, and code labels
5556 compute the address as an offset from the GOT, whose base is in
5557 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5558 differentiate them from global data objects. The returned
5559 address is the PIC reg + an unspec constant.
5561 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5562 reg also appears in the address. */
5565 legitimize_pic_address (rtx orig
, rtx reg
)
5573 reg
= gen_reg_rtx (Pmode
);
5574 /* Use the generic Mach-O PIC machinery. */
5575 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5578 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5580 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5582 /* This symbol may be referenced via a displacement from the PIC
5583 base address (@GOTOFF). */
5585 if (reload_in_progress
)
5586 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5587 if (GET_CODE (addr
) == CONST
)
5588 addr
= XEXP (addr
, 0);
5589 if (GET_CODE (addr
) == PLUS
)
5591 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
5592 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
5595 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5596 new = gen_rtx_CONST (Pmode
, new);
5597 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5601 emit_move_insn (reg
, new);
5605 else if (GET_CODE (addr
) == SYMBOL_REF
)
5609 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5610 new = gen_rtx_CONST (Pmode
, new);
5611 new = gen_const_mem (Pmode
, new);
5612 set_mem_alias_set (new, ix86_GOT_alias_set ());
5615 reg
= gen_reg_rtx (Pmode
);
5616 /* Use directly gen_movsi, otherwise the address is loaded
5617 into register for CSE. We don't want to CSE this addresses,
5618 instead we CSE addresses from the GOT table, so skip this. */
5619 emit_insn (gen_movsi (reg
, new));
5624 /* This symbol must be referenced via a load from the
5625 Global Offset Table (@GOT). */
5627 if (reload_in_progress
)
5628 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5629 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5630 new = gen_rtx_CONST (Pmode
, new);
5631 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5632 new = gen_const_mem (Pmode
, new);
5633 set_mem_alias_set (new, ix86_GOT_alias_set ());
5636 reg
= gen_reg_rtx (Pmode
);
5637 emit_move_insn (reg
, new);
5643 if (GET_CODE (addr
) == CONST
)
5645 addr
= XEXP (addr
, 0);
5647 /* We must match stuff we generate before. Assume the only
5648 unspecs that can get here are ours. Not that we could do
5649 anything with them anyway.... */
5650 if (GET_CODE (addr
) == UNSPEC
5651 || (GET_CODE (addr
) == PLUS
5652 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5654 gcc_assert (GET_CODE (addr
) == PLUS
);
5656 if (GET_CODE (addr
) == PLUS
)
5658 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5660 /* Check first to see if this is a constant offset from a @GOTOFF
5661 symbol reference. */
5662 if (local_symbolic_operand (op0
, Pmode
)
5663 && GET_CODE (op1
) == CONST_INT
)
5667 if (reload_in_progress
)
5668 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5669 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5671 new = gen_rtx_PLUS (Pmode
, new, op1
);
5672 new = gen_rtx_CONST (Pmode
, new);
5673 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5677 emit_move_insn (reg
, new);
5683 if (INTVAL (op1
) < -16*1024*1024
5684 || INTVAL (op1
) >= 16*1024*1024)
5685 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
5690 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5691 new = legitimize_pic_address (XEXP (addr
, 1),
5692 base
== reg
? NULL_RTX
: reg
);
5694 if (GET_CODE (new) == CONST_INT
)
5695 new = plus_constant (base
, INTVAL (new));
5698 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5700 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5701 new = XEXP (new, 1);
5703 new = gen_rtx_PLUS (Pmode
, base
, new);
5711 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5714 get_thread_pointer (int to_reg
)
5718 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5722 reg
= gen_reg_rtx (Pmode
);
5723 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
5724 insn
= emit_insn (insn
);
5729 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5730 false if we expect this to be used for a memory address and true if
5731 we expect to load the address into a register. */
5734 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
5736 rtx dest
, base
, off
, pic
;
5741 case TLS_MODEL_GLOBAL_DYNAMIC
:
5742 dest
= gen_reg_rtx (Pmode
);
5745 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5748 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5749 insns
= get_insns ();
5752 emit_libcall_block (insns
, dest
, rax
, x
);
5755 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5758 case TLS_MODEL_LOCAL_DYNAMIC
:
5759 base
= gen_reg_rtx (Pmode
);
5762 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5765 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5766 insns
= get_insns ();
5769 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5770 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5771 emit_libcall_block (insns
, base
, rax
, note
);
5774 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5776 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5777 off
= gen_rtx_CONST (Pmode
, off
);
5779 return gen_rtx_PLUS (Pmode
, base
, off
);
5781 case TLS_MODEL_INITIAL_EXEC
:
5785 type
= UNSPEC_GOTNTPOFF
;
5789 if (reload_in_progress
)
5790 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5791 pic
= pic_offset_table_rtx
;
5792 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5794 else if (!TARGET_GNU_TLS
)
5796 pic
= gen_reg_rtx (Pmode
);
5797 emit_insn (gen_set_got (pic
));
5798 type
= UNSPEC_GOTTPOFF
;
5803 type
= UNSPEC_INDNTPOFF
;
5806 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5807 off
= gen_rtx_CONST (Pmode
, off
);
5809 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5810 off
= gen_const_mem (Pmode
, off
);
5811 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5813 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5815 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5816 off
= force_reg (Pmode
, off
);
5817 return gen_rtx_PLUS (Pmode
, base
, off
);
5821 base
= get_thread_pointer (true);
5822 dest
= gen_reg_rtx (Pmode
);
5823 emit_insn (gen_subsi3 (dest
, base
, off
));
5827 case TLS_MODEL_LOCAL_EXEC
:
5828 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5829 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5830 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5831 off
= gen_rtx_CONST (Pmode
, off
);
5833 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5835 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5836 return gen_rtx_PLUS (Pmode
, base
, off
);
5840 base
= get_thread_pointer (true);
5841 dest
= gen_reg_rtx (Pmode
);
5842 emit_insn (gen_subsi3 (dest
, base
, off
));
5853 /* Try machine-dependent ways of modifying an illegitimate address
5854 to be legitimate. If we find one, return the new, valid address.
5855 This macro is used in only one place: `memory_address' in explow.c.
5857 OLDX is the address as it was before break_out_memory_refs was called.
5858 In some cases it is useful to look at this to decide what needs to be done.
5860 MODE and WIN are passed so that this macro can use
5861 GO_IF_LEGITIMATE_ADDRESS.
5863 It is always safe for this macro to do nothing. It exists to recognize
5864 opportunities to optimize the output.
5866 For the 80386, we handle X+REG by loading X into a register R and
5867 using R+REG. R will go in a general reg and indexing will be used.
5868 However, if REG is a broken-out memory address or multiplication,
5869 nothing needs to be done because REG can certainly go in a general reg.
5871 When -fpic is used, special handling is needed for symbolic references.
5872 See comments by legitimize_pic_address in i386.c for details. */
5875 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
5880 if (TARGET_DEBUG_ADDR
)
5882 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5883 GET_MODE_NAME (mode
));
5887 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
5889 return legitimize_tls_address (x
, log
, false);
5890 if (GET_CODE (x
) == CONST
5891 && GET_CODE (XEXP (x
, 0)) == PLUS
5892 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
5893 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
5895 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
5896 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
5899 if (flag_pic
&& SYMBOLIC_CONST (x
))
5900 return legitimize_pic_address (x
, 0);
5902 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5903 if (GET_CODE (x
) == ASHIFT
5904 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5905 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
5908 log
= INTVAL (XEXP (x
, 1));
5909 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5910 GEN_INT (1 << log
));
5913 if (GET_CODE (x
) == PLUS
)
5915 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5917 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5918 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5919 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
5922 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
5923 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5924 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5925 GEN_INT (1 << log
));
5928 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5929 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5930 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
5933 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
5934 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5935 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5936 GEN_INT (1 << log
));
5939 /* Put multiply first if it isn't already. */
5940 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5942 rtx tmp
= XEXP (x
, 0);
5943 XEXP (x
, 0) = XEXP (x
, 1);
5948 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5949 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5950 created by virtual register instantiation, register elimination, and
5951 similar optimizations. */
5952 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5955 x
= gen_rtx_PLUS (Pmode
,
5956 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5957 XEXP (XEXP (x
, 1), 0)),
5958 XEXP (XEXP (x
, 1), 1));
5962 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5963 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5964 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5965 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5966 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5967 && CONSTANT_P (XEXP (x
, 1)))
5970 rtx other
= NULL_RTX
;
5972 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5974 constant
= XEXP (x
, 1);
5975 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5977 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5979 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5980 other
= XEXP (x
, 1);
5988 x
= gen_rtx_PLUS (Pmode
,
5989 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5990 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5991 plus_constant (other
, INTVAL (constant
)));
5995 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5998 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6001 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6004 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6007 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6011 && GET_CODE (XEXP (x
, 1)) == REG
6012 && GET_CODE (XEXP (x
, 0)) == REG
)
6015 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6018 x
= legitimize_pic_address (x
, 0);
6021 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6024 if (GET_CODE (XEXP (x
, 0)) == REG
)
6026 rtx temp
= gen_reg_rtx (Pmode
);
6027 rtx val
= force_operand (XEXP (x
, 1), temp
);
6029 emit_move_insn (temp
, val
);
6035 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6037 rtx temp
= gen_reg_rtx (Pmode
);
6038 rtx val
= force_operand (XEXP (x
, 0), temp
);
6040 emit_move_insn (temp
, val
);
6050 /* Print an integer constant expression in assembler syntax. Addition
6051 and subtraction are the only arithmetic that may appear in these
6052 expressions. FILE is the stdio stream to write to, X is the rtx, and
6053 CODE is the operand print code from the output string. */
6056 output_pic_addr_const (FILE *file
, rtx x
, int code
)
6060 switch (GET_CODE (x
))
6063 gcc_assert (flag_pic
);
6068 assemble_name (file
, XSTR (x
, 0));
6069 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
6070 fputs ("@PLT", file
);
6077 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6078 assemble_name (asm_out_file
, buf
);
6082 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6086 /* This used to output parentheses around the expression,
6087 but that does not work on the 386 (either ATT or BSD assembler). */
6088 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6092 if (GET_MODE (x
) == VOIDmode
)
6094 /* We can use %d if the number is <32 bits and positive. */
6095 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6096 fprintf (file
, "0x%lx%08lx",
6097 (unsigned long) CONST_DOUBLE_HIGH (x
),
6098 (unsigned long) CONST_DOUBLE_LOW (x
));
6100 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6103 /* We can't handle floating point constants;
6104 PRINT_OPERAND must handle them. */
6105 output_operand_lossage ("floating constant misused");
6109 /* Some assemblers need integer constants to appear first. */
6110 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6112 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6114 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6118 gcc_assert (GET_CODE (XEXP (x
, 1)) == CONST_INT
);
6119 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6121 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6127 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6128 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6130 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6132 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6136 gcc_assert (XVECLEN (x
, 0) == 1);
6137 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6138 switch (XINT (x
, 1))
6141 fputs ("@GOT", file
);
6144 fputs ("@GOTOFF", file
);
6146 case UNSPEC_GOTPCREL
:
6147 fputs ("@GOTPCREL(%rip)", file
);
6149 case UNSPEC_GOTTPOFF
:
6150 /* FIXME: This might be @TPOFF in Sun ld too. */
6151 fputs ("@GOTTPOFF", file
);
6154 fputs ("@TPOFF", file
);
6158 fputs ("@TPOFF", file
);
6160 fputs ("@NTPOFF", file
);
6163 fputs ("@DTPOFF", file
);
6165 case UNSPEC_GOTNTPOFF
:
6167 fputs ("@GOTTPOFF(%rip)", file
);
6169 fputs ("@GOTNTPOFF", file
);
6171 case UNSPEC_INDNTPOFF
:
6172 fputs ("@INDNTPOFF", file
);
6175 output_operand_lossage ("invalid UNSPEC as operand");
6181 output_operand_lossage ("invalid expression as operand");
6185 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6186 We need to emit DTP-relative relocations. */
6189 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
6191 fputs (ASM_LONG
, file
);
6192 output_addr_const (file
, x
);
6193 fputs ("@DTPOFF", file
);
6199 fputs (", 0", file
);
6206 /* In the name of slightly smaller debug output, and to cater to
6207 general assembler lossage, recognize PIC+GOTOFF and turn it back
6208 into a direct symbol reference. */
6211 ix86_delegitimize_address (rtx orig_x
)
6215 if (GET_CODE (x
) == MEM
)
6220 if (GET_CODE (x
) != CONST
6221 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6222 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6223 || GET_CODE (orig_x
) != MEM
)
6225 return XVECEXP (XEXP (x
, 0), 0, 0);
6228 if (GET_CODE (x
) != PLUS
6229 || GET_CODE (XEXP (x
, 1)) != CONST
)
6232 if (GET_CODE (XEXP (x
, 0)) == REG
6233 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6234 /* %ebx + GOT/GOTOFF */
6236 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6238 /* %ebx + %reg * scale + GOT/GOTOFF */
6240 if (GET_CODE (XEXP (y
, 0)) == REG
6241 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6243 else if (GET_CODE (XEXP (y
, 1)) == REG
6244 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6248 if (GET_CODE (y
) != REG
6249 && GET_CODE (y
) != MULT
6250 && GET_CODE (y
) != ASHIFT
)
6256 x
= XEXP (XEXP (x
, 1), 0);
6257 if (GET_CODE (x
) == UNSPEC
6258 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6259 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6262 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6263 return XVECEXP (x
, 0, 0);
6266 if (GET_CODE (x
) == PLUS
6267 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6268 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6269 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6270 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6271 && GET_CODE (orig_x
) != MEM
)))
6273 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6275 return gen_rtx_PLUS (Pmode
, y
, x
);
6283 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6288 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6290 enum rtx_code second_code
, bypass_code
;
6291 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6292 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
6293 code
= ix86_fp_compare_code_to_integer (code
);
6297 code
= reverse_condition (code
);
6308 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
6312 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6313 Those same assemblers have the same but opposite lossage on cmov. */
6314 gcc_assert (mode
== CCmode
);
6315 suffix
= fp
? "nbe" : "a";
6335 gcc_assert (mode
== CCmode
);
6357 gcc_assert (mode
== CCmode
);
6358 suffix
= fp
? "nb" : "ae";
6361 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
6365 gcc_assert (mode
== CCmode
);
6369 suffix
= fp
? "u" : "p";
6372 suffix
= fp
? "nu" : "np";
6377 fputs (suffix
, file
);
6380 /* Print the name of register X to FILE based on its machine mode and number.
6381 If CODE is 'w', pretend the mode is HImode.
6382 If CODE is 'b', pretend the mode is QImode.
6383 If CODE is 'k', pretend the mode is SImode.
6384 If CODE is 'q', pretend the mode is DImode.
6385 If CODE is 'h', pretend the reg is the 'high' byte register.
6386 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6389 print_reg (rtx x
, int code
, FILE *file
)
6391 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
6392 && REGNO (x
) != FRAME_POINTER_REGNUM
6393 && REGNO (x
) != FLAGS_REG
6394 && REGNO (x
) != FPSR_REG
);
6396 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6399 if (code
== 'w' || MMX_REG_P (x
))
6401 else if (code
== 'b')
6403 else if (code
== 'k')
6405 else if (code
== 'q')
6407 else if (code
== 'y')
6409 else if (code
== 'h')
6412 code
= GET_MODE_SIZE (GET_MODE (x
));
6414 /* Irritatingly, AMD extended registers use different naming convention
6415 from the normal registers. */
6416 if (REX_INT_REG_P (x
))
6418 gcc_assert (TARGET_64BIT
);
6422 error ("extended registers have no high halves");
6425 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6428 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6431 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6434 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6437 error ("unsupported operand size for extended register");
6445 if (STACK_TOP_P (x
))
6447 fputs ("st(0)", file
);
6454 if (! ANY_FP_REG_P (x
))
6455 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6460 fputs (hi_reg_name
[REGNO (x
)], file
);
6463 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
6465 fputs (qi_reg_name
[REGNO (x
)], file
);
6468 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
6470 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6477 /* Locate some local-dynamic symbol still in use by this function
6478 so that we can print its name in some tls_local_dynamic_base
6482 get_some_local_dynamic_name (void)
6486 if (cfun
->machine
->some_ld_name
)
6487 return cfun
->machine
->some_ld_name
;
6489 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6491 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6492 return cfun
->machine
->some_ld_name
;
6498 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
6502 if (GET_CODE (x
) == SYMBOL_REF
6503 && local_dynamic_symbolic_operand (x
, Pmode
))
6505 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6513 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6514 C -- print opcode suffix for set/cmov insn.
6515 c -- like C, but print reversed condition
6516 F,f -- likewise, but for floating-point.
6517 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6519 R -- print the prefix for register names.
6520 z -- print the opcode suffix for the size of the current operand.
6521 * -- print a star (in certain assembler syntax)
6522 A -- print an absolute memory reference.
6523 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6524 s -- print a shift double count, followed by the assemblers argument
6526 b -- print the QImode name of the register for the indicated operand.
6527 %b0 would print %al if operands[0] is reg 0.
6528 w -- likewise, print the HImode name of the register.
6529 k -- likewise, print the SImode name of the register.
6530 q -- likewise, print the DImode name of the register.
6531 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6532 y -- print "st(0)" instead of "st" as a register.
6533 D -- print condition for SSE cmp instruction.
6534 P -- if PIC, print an @PLT suffix.
6535 X -- don't print any sort of PIC '@' suffix for a symbol.
6536 & -- print some in-use local-dynamic symbol name.
6537 H -- print a memory address offset by 8; used for sse high-parts
6541 print_operand (FILE *file
, rtx x
, int code
)
6548 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6553 assemble_name (file
, get_some_local_dynamic_name ());
6557 switch (ASSEMBLER_DIALECT
)
6564 /* Intel syntax. For absolute addresses, registers should not
6565 be surrounded by braces. */
6566 if (GET_CODE (x
) != REG
)
6569 PRINT_OPERAND (file
, x
, 0);
6579 PRINT_OPERAND (file
, x
, 0);
6584 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6589 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6594 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6599 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6604 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6609 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6614 /* 387 opcodes don't get size suffixes if the operands are
6616 if (STACK_REG_P (x
))
6619 /* Likewise if using Intel opcodes. */
6620 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6623 /* This is the size of op from size of operand. */
6624 switch (GET_MODE_SIZE (GET_MODE (x
)))
6627 #ifdef HAVE_GAS_FILDS_FISTS
6633 if (GET_MODE (x
) == SFmode
)
6648 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6650 #ifdef GAS_MNEMONICS
6676 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6678 PRINT_OPERAND (file
, x
, 0);
6684 /* Little bit of braindamage here. The SSE compare instructions
6685 does use completely different names for the comparisons that the
6686 fp conditional moves. */
6687 switch (GET_CODE (x
))
6702 fputs ("unord", file
);
6706 fputs ("neq", file
);
6710 fputs ("nlt", file
);
6714 fputs ("nle", file
);
6717 fputs ("ord", file
);
6724 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6725 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6727 switch (GET_MODE (x
))
6729 case HImode
: putc ('w', file
); break;
6731 case SFmode
: putc ('l', file
); break;
6733 case DFmode
: putc ('q', file
); break;
6734 default: gcc_unreachable ();
6741 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6744 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6745 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6748 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6751 /* Like above, but reverse condition */
6753 /* Check to see if argument to %c is really a constant
6754 and not a condition code which needs to be reversed. */
6755 if (!COMPARISON_P (x
))
6757 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6760 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6763 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6764 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6767 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6771 /* It doesn't actually matter what mode we use here, as we're
6772 only going to use this for printing. */
6773 x
= adjust_address_nv (x
, DImode
, 8);
6780 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6783 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6786 int pred_val
= INTVAL (XEXP (x
, 0));
6788 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6789 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6791 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6792 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6794 /* Emit hints only in the case default branch prediction
6795 heuristics would fail. */
6796 if (taken
!= cputaken
)
6798 /* We use 3e (DS) prefix for taken branches and
6799 2e (CS) prefix for not taken branches. */
6801 fputs ("ds ; ", file
);
6803 fputs ("cs ; ", file
);
6810 output_operand_lossage ("invalid operand code '%c'", code
);
6814 if (GET_CODE (x
) == REG
)
6815 print_reg (x
, code
, file
);
6817 else if (GET_CODE (x
) == MEM
)
6819 /* No `byte ptr' prefix for call instructions. */
6820 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6823 switch (GET_MODE_SIZE (GET_MODE (x
)))
6825 case 1: size
= "BYTE"; break;
6826 case 2: size
= "WORD"; break;
6827 case 4: size
= "DWORD"; break;
6828 case 8: size
= "QWORD"; break;
6829 case 12: size
= "XWORD"; break;
6830 case 16: size
= "XMMWORD"; break;
6835 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6838 else if (code
== 'w')
6840 else if (code
== 'k')
6844 fputs (" PTR ", file
);
6848 /* Avoid (%rip) for call operands. */
6849 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6850 && GET_CODE (x
) != CONST_INT
)
6851 output_addr_const (file
, x
);
6852 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6853 output_operand_lossage ("invalid constraints for operand");
6858 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6863 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6864 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6866 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6868 fprintf (file
, "0x%08lx", l
);
6871 /* These float cases don't actually occur as immediate operands. */
6872 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6876 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6877 fprintf (file
, "%s", dstr
);
6880 else if (GET_CODE (x
) == CONST_DOUBLE
6881 && GET_MODE (x
) == XFmode
)
6885 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6886 fprintf (file
, "%s", dstr
);
6891 /* We have patterns that allow zero sets of memory, for instance.
6892 In 64-bit mode, we should probably support all 8-byte vectors,
6893 since we can in fact encode that into an immediate. */
6894 if (GET_CODE (x
) == CONST_VECTOR
)
6896 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
6902 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6904 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6907 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6908 || GET_CODE (x
) == LABEL_REF
)
6910 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6913 fputs ("OFFSET FLAT:", file
);
6916 if (GET_CODE (x
) == CONST_INT
)
6917 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6919 output_pic_addr_const (file
, x
, code
);
6921 output_addr_const (file
, x
);
6925 /* Print a memory operand whose address is ADDR. */
6928 print_operand_address (FILE *file
, rtx addr
)
6930 struct ix86_address parts
;
6931 rtx base
, index
, disp
;
6933 int ok
= ix86_decompose_address (addr
, &parts
);
6938 index
= parts
.index
;
6940 scale
= parts
.scale
;
6948 if (USER_LABEL_PREFIX
[0] == 0)
6950 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
6956 if (!base
&& !index
)
6958 /* Displacement only requires special attention. */
6960 if (GET_CODE (disp
) == CONST_INT
)
6962 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
6964 if (USER_LABEL_PREFIX
[0] == 0)
6966 fputs ("ds:", file
);
6968 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
6971 output_pic_addr_const (file
, disp
, 0);
6973 output_addr_const (file
, disp
);
6975 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6977 && ((GET_CODE (disp
) == SYMBOL_REF
6978 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
6979 || GET_CODE (disp
) == LABEL_REF
6980 || (GET_CODE (disp
) == CONST
6981 && GET_CODE (XEXP (disp
, 0)) == PLUS
6982 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
6983 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
6984 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
6985 fputs ("(%rip)", file
);
6989 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6994 output_pic_addr_const (file
, disp
, 0);
6995 else if (GET_CODE (disp
) == LABEL_REF
)
6996 output_asm_label (disp
);
6998 output_addr_const (file
, disp
);
7003 print_reg (base
, 0, file
);
7007 print_reg (index
, 0, file
);
7009 fprintf (file
, ",%d", scale
);
7015 rtx offset
= NULL_RTX
;
7019 /* Pull out the offset of a symbol; print any symbol itself. */
7020 if (GET_CODE (disp
) == CONST
7021 && GET_CODE (XEXP (disp
, 0)) == PLUS
7022 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7024 offset
= XEXP (XEXP (disp
, 0), 1);
7025 disp
= gen_rtx_CONST (VOIDmode
,
7026 XEXP (XEXP (disp
, 0), 0));
7030 output_pic_addr_const (file
, disp
, 0);
7031 else if (GET_CODE (disp
) == LABEL_REF
)
7032 output_asm_label (disp
);
7033 else if (GET_CODE (disp
) == CONST_INT
)
7036 output_addr_const (file
, disp
);
7042 print_reg (base
, 0, file
);
7045 if (INTVAL (offset
) >= 0)
7047 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7051 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7058 print_reg (index
, 0, file
);
7060 fprintf (file
, "*%d", scale
);
7068 output_addr_const_extra (FILE *file
, rtx x
)
7072 if (GET_CODE (x
) != UNSPEC
)
7075 op
= XVECEXP (x
, 0, 0);
7076 switch (XINT (x
, 1))
7078 case UNSPEC_GOTTPOFF
:
7079 output_addr_const (file
, op
);
7080 /* FIXME: This might be @TPOFF in Sun ld. */
7081 fputs ("@GOTTPOFF", file
);
7084 output_addr_const (file
, op
);
7085 fputs ("@TPOFF", file
);
7088 output_addr_const (file
, op
);
7090 fputs ("@TPOFF", file
);
7092 fputs ("@NTPOFF", file
);
7095 output_addr_const (file
, op
);
7096 fputs ("@DTPOFF", file
);
7098 case UNSPEC_GOTNTPOFF
:
7099 output_addr_const (file
, op
);
7101 fputs ("@GOTTPOFF(%rip)", file
);
7103 fputs ("@GOTNTPOFF", file
);
7105 case UNSPEC_INDNTPOFF
:
7106 output_addr_const (file
, op
);
7107 fputs ("@INDNTPOFF", file
);
7117 /* Split one or more DImode RTL references into pairs of SImode
7118 references. The RTL can be REG, offsettable MEM, integer constant, or
7119 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7120 split and "num" is its length. lo_half and hi_half are output arrays
7121 that parallel "operands". */
7124 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7128 rtx op
= operands
[num
];
7130 /* simplify_subreg refuse to split volatile memory addresses,
7131 but we still have to handle it. */
7132 if (GET_CODE (op
) == MEM
)
7134 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7135 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7139 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7140 GET_MODE (op
) == VOIDmode
7141 ? DImode
: GET_MODE (op
), 0);
7142 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7143 GET_MODE (op
) == VOIDmode
7144 ? DImode
: GET_MODE (op
), 4);
7148 /* Split one or more TImode RTL references into pairs of SImode
7149 references. The RTL can be REG, offsettable MEM, integer constant, or
7150 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7151 split and "num" is its length. lo_half and hi_half are output arrays
7152 that parallel "operands". */
7155 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7159 rtx op
= operands
[num
];
7161 /* simplify_subreg refuse to split volatile memory addresses, but we
7162 still have to handle it. */
7163 if (GET_CODE (op
) == MEM
)
7165 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7166 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7170 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7171 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7176 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7177 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7178 is the expression of the binary operation. The output may either be
7179 emitted here, or returned to the caller, like all output_* functions.
7181 There is no guarantee that the operands are the same mode, as they
7182 might be within FLOAT or FLOAT_EXTEND expressions. */
7184 #ifndef SYSV386_COMPAT
7185 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7186 wants to fix the assemblers because that causes incompatibility
7187 with gcc. No-one wants to fix gcc because that causes
7188 incompatibility with assemblers... You can use the option of
7189 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7190 #define SYSV386_COMPAT 1
7194 output_387_binary_op (rtx insn
, rtx
*operands
)
7196 static char buf
[30];
7199 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
7201 #ifdef ENABLE_CHECKING
7202 /* Even if we do not want to check the inputs, this documents input
7203 constraints. Which helps in understanding the following code. */
7204 if (STACK_REG_P (operands
[0])
7205 && ((REG_P (operands
[1])
7206 && REGNO (operands
[0]) == REGNO (operands
[1])
7207 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7208 || (REG_P (operands
[2])
7209 && REGNO (operands
[0]) == REGNO (operands
[2])
7210 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7211 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7214 gcc_assert (is_sse
);
7217 switch (GET_CODE (operands
[3]))
7220 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7221 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7229 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7230 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7238 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7239 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7247 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7248 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7262 if (GET_MODE (operands
[0]) == SFmode
)
7263 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7265 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7270 switch (GET_CODE (operands
[3]))
7274 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7276 rtx temp
= operands
[2];
7277 operands
[2] = operands
[1];
7281 /* know operands[0] == operands[1]. */
7283 if (GET_CODE (operands
[2]) == MEM
)
7289 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7291 if (STACK_TOP_P (operands
[0]))
7292 /* How is it that we are storing to a dead operand[2]?
7293 Well, presumably operands[1] is dead too. We can't
7294 store the result to st(0) as st(0) gets popped on this
7295 instruction. Instead store to operands[2] (which I
7296 think has to be st(1)). st(1) will be popped later.
7297 gcc <= 2.8.1 didn't have this check and generated
7298 assembly code that the Unixware assembler rejected. */
7299 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7301 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7305 if (STACK_TOP_P (operands
[0]))
7306 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7308 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7313 if (GET_CODE (operands
[1]) == MEM
)
7319 if (GET_CODE (operands
[2]) == MEM
)
7325 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7328 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7329 derived assemblers, confusingly reverse the direction of
7330 the operation for fsub{r} and fdiv{r} when the
7331 destination register is not st(0). The Intel assembler
7332 doesn't have this brain damage. Read !SYSV386_COMPAT to
7333 figure out what the hardware really does. */
7334 if (STACK_TOP_P (operands
[0]))
7335 p
= "{p\t%0, %2|rp\t%2, %0}";
7337 p
= "{rp\t%2, %0|p\t%0, %2}";
7339 if (STACK_TOP_P (operands
[0]))
7340 /* As above for fmul/fadd, we can't store to st(0). */
7341 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7343 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7348 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7351 if (STACK_TOP_P (operands
[0]))
7352 p
= "{rp\t%0, %1|p\t%1, %0}";
7354 p
= "{p\t%1, %0|rp\t%0, %1}";
7356 if (STACK_TOP_P (operands
[0]))
7357 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7359 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7364 if (STACK_TOP_P (operands
[0]))
7366 if (STACK_TOP_P (operands
[1]))
7367 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7369 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7372 else if (STACK_TOP_P (operands
[1]))
7375 p
= "{\t%1, %0|r\t%0, %1}";
7377 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7383 p
= "{r\t%2, %0|\t%0, %2}";
7385 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7398 /* Output code to initialize control word copies used by trunc?f?i and
7399 rounding patterns. CURRENT_MODE is set to current control word,
7400 while NEW_MODE is set to new control word. */
7403 emit_i387_cw_initialization (rtx current_mode
, rtx new_mode
, int mode
)
7405 rtx reg
= gen_reg_rtx (HImode
);
7407 emit_insn (gen_x86_fnstcw_1 (current_mode
));
7408 emit_move_insn (reg
, current_mode
);
7410 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7416 /* round down toward -oo */
7417 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
7421 /* round up toward +oo */
7422 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
7426 /* round toward zero (truncate) */
7427 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7430 case I387_CW_MASK_PM
:
7431 /* mask precision exception for nearbyint() */
7432 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7444 /* round down toward -oo */
7445 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7446 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
7450 /* round up toward +oo */
7451 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7452 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
7456 /* round toward zero (truncate) */
7457 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
7460 case I387_CW_MASK_PM
:
7461 /* mask precision exception for nearbyint() */
7462 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7470 emit_move_insn (new_mode
, reg
);
7473 /* Output code for INSN to convert a float to a signed int. OPERANDS
7474 are the insn operands. The output may be [HSD]Imode and the input
7475 operand may be [SDX]Fmode. */
7478 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
7480 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7481 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7482 int round_mode
= get_attr_i387_cw (insn
);
7484 /* Jump through a hoop or two for DImode, since the hardware has no
7485 non-popping instruction. We used to do this a different way, but
7486 that was somewhat fragile and broke with post-reload splitters. */
7487 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
7488 output_asm_insn ("fld\t%y1", operands
);
7490 gcc_assert (STACK_TOP_P (operands
[1]));
7491 gcc_assert (GET_CODE (operands
[0]) == MEM
);
7494 output_asm_insn ("fisttp%z0\t%0", operands
);
7497 if (round_mode
!= I387_CW_ANY
)
7498 output_asm_insn ("fldcw\t%3", operands
);
7499 if (stack_top_dies
|| dimode_p
)
7500 output_asm_insn ("fistp%z0\t%0", operands
);
7502 output_asm_insn ("fist%z0\t%0", operands
);
7503 if (round_mode
!= I387_CW_ANY
)
7504 output_asm_insn ("fldcw\t%2", operands
);
7510 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7511 should be used. UNORDERED_P is true when fucom should be used. */
7514 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
7517 rtx cmp_op0
, cmp_op1
;
7518 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
7522 cmp_op0
= operands
[0];
7523 cmp_op1
= operands
[1];
7527 cmp_op0
= operands
[1];
7528 cmp_op1
= operands
[2];
7533 if (GET_MODE (operands
[0]) == SFmode
)
7535 return "ucomiss\t{%1, %0|%0, %1}";
7537 return "comiss\t{%1, %0|%0, %1}";
7540 return "ucomisd\t{%1, %0|%0, %1}";
7542 return "comisd\t{%1, %0|%0, %1}";
7545 gcc_assert (STACK_TOP_P (cmp_op0
));
7547 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7549 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
7553 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
7554 return TARGET_USE_FFREEP
? "ffreep\t%y1" : "fstp\t%y1";
7557 return "ftst\n\tfnstsw\t%0";
7560 if (STACK_REG_P (cmp_op1
)
7562 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7563 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7565 /* If both the top of the 387 stack dies, and the other operand
7566 is also a stack register that dies, then this must be a
7567 `fcompp' float compare */
7571 /* There is no double popping fcomi variant. Fortunately,
7572 eflags is immune from the fstp's cc clobbering. */
7574 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7576 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7577 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
7582 return "fucompp\n\tfnstsw\t%0";
7584 return "fcompp\n\tfnstsw\t%0";
7589 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7591 static const char * const alt
[16] =
7593 "fcom%z2\t%y2\n\tfnstsw\t%0",
7594 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7595 "fucom%z2\t%y2\n\tfnstsw\t%0",
7596 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7598 "ficom%z2\t%y2\n\tfnstsw\t%0",
7599 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7603 "fcomi\t{%y1, %0|%0, %y1}",
7604 "fcomip\t{%y1, %0|%0, %y1}",
7605 "fucomi\t{%y1, %0|%0, %y1}",
7606 "fucomip\t{%y1, %0|%0, %y1}",
7617 mask
= eflags_p
<< 3;
7618 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
7619 mask
|= unordered_p
<< 1;
7620 mask
|= stack_top_dies
;
7622 gcc_assert (mask
< 16);
7631 ix86_output_addr_vec_elt (FILE *file
, int value
)
7633 const char *directive
= ASM_LONG
;
7637 directive
= ASM_QUAD
;
7639 gcc_assert (!TARGET_64BIT
);
7642 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7646 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
7649 fprintf (file
, "%s%s%d-%s%d\n",
7650 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7651 else if (HAVE_AS_GOTOFF_IN_DATA
)
7652 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7654 else if (TARGET_MACHO
)
7656 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
7657 machopic_output_function_base_name (file
);
7658 fprintf(file
, "\n");
7662 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7663 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7666 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7670 ix86_expand_clear (rtx dest
)
7674 /* We play register width games, which are only valid after reload. */
7675 gcc_assert (reload_completed
);
7677 /* Avoid HImode and its attendant prefix byte. */
7678 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7679 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7681 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7683 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7684 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7686 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7687 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7693 /* X is an unchanging MEM. If it is a constant pool reference, return
7694 the constant pool rtx, else NULL. */
7697 maybe_get_pool_constant (rtx x
)
7699 x
= ix86_delegitimize_address (XEXP (x
, 0));
7701 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7702 return get_pool_constant (x
);
7708 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
7710 int strict
= (reload_in_progress
|| reload_completed
);
7712 enum tls_model model
;
7717 if (GET_CODE (op1
) == SYMBOL_REF
)
7719 model
= SYMBOL_REF_TLS_MODEL (op1
);
7722 op1
= legitimize_tls_address (op1
, model
, true);
7723 op1
= force_operand (op1
, op0
);
7728 else if (GET_CODE (op1
) == CONST
7729 && GET_CODE (XEXP (op1
, 0)) == PLUS
7730 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
7732 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
7735 rtx addend
= XEXP (XEXP (op1
, 0), 1);
7736 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
7737 op1
= force_operand (op1
, NULL
);
7738 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
7739 op0
, 1, OPTAB_DIRECT
);
7745 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7750 rtx temp
= ((reload_in_progress
7751 || ((op0
&& GET_CODE (op0
) == REG
)
7753 ? op0
: gen_reg_rtx (Pmode
));
7754 op1
= machopic_indirect_data_reference (op1
, temp
);
7755 op1
= machopic_legitimize_pic_address (op1
, mode
,
7756 temp
== op1
? 0 : temp
);
7758 else if (MACHOPIC_INDIRECT
)
7759 op1
= machopic_indirect_data_reference (op1
, 0);
7763 if (GET_CODE (op0
) == MEM
)
7764 op1
= force_reg (Pmode
, op1
);
7766 op1
= legitimize_address (op1
, op1
, Pmode
);
7767 #endif /* TARGET_MACHO */
7771 if (GET_CODE (op0
) == MEM
7772 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7773 || !push_operand (op0
, mode
))
7774 && GET_CODE (op1
) == MEM
)
7775 op1
= force_reg (mode
, op1
);
7777 if (push_operand (op0
, mode
)
7778 && ! general_no_elim_operand (op1
, mode
))
7779 op1
= copy_to_mode_reg (mode
, op1
);
7781 /* Force large constants in 64bit compilation into register
7782 to get them CSEed. */
7783 if (TARGET_64BIT
&& mode
== DImode
7784 && immediate_operand (op1
, mode
)
7785 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
7786 && !register_operand (op0
, mode
)
7787 && optimize
&& !reload_completed
&& !reload_in_progress
)
7788 op1
= copy_to_mode_reg (mode
, op1
);
7790 if (FLOAT_MODE_P (mode
))
7792 /* If we are loading a floating point constant to a register,
7793 force the value to memory now, since we'll get better code
7794 out the back end. */
7798 else if (GET_CODE (op1
) == CONST_DOUBLE
)
7800 op1
= validize_mem (force_const_mem (mode
, op1
));
7801 if (!register_operand (op0
, mode
))
7803 rtx temp
= gen_reg_rtx (mode
);
7804 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
7805 emit_move_insn (op0
, temp
);
7812 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7816 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
7818 rtx op0
= operands
[0], op1
= operands
[1];
7820 /* Force constants other than zero into memory. We do not know how
7821 the instructions used to build constants modify the upper 64 bits
7822 of the register, once we have that information we may be able
7823 to handle some of them more efficiently. */
7824 if ((reload_in_progress
| reload_completed
) == 0
7825 && register_operand (op0
, mode
)
7826 && CONSTANT_P (op1
) && op1
!= CONST0_RTX (mode
))
7827 op1
= validize_mem (force_const_mem (mode
, op1
));
7829 /* Make operand1 a register if it isn't already. */
7831 && !register_operand (op0
, mode
)
7832 && !register_operand (op1
, mode
))
7834 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
7838 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7841 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7842 straight to ix86_expand_vector_move. */
7845 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
7854 /* If we're optimizing for size, movups is the smallest. */
7857 op0
= gen_lowpart (V4SFmode
, op0
);
7858 op1
= gen_lowpart (V4SFmode
, op1
);
7859 emit_insn (gen_sse_movups (op0
, op1
));
7863 /* ??? If we have typed data, then it would appear that using
7864 movdqu is the only way to get unaligned data loaded with
7866 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
7868 op0
= gen_lowpart (V16QImode
, op0
);
7869 op1
= gen_lowpart (V16QImode
, op1
);
7870 emit_insn (gen_sse2_movdqu (op0
, op1
));
7874 if (TARGET_SSE2
&& mode
== V2DFmode
)
7878 /* When SSE registers are split into halves, we can avoid
7879 writing to the top half twice. */
7880 if (TARGET_SSE_SPLIT_REGS
)
7882 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
7887 /* ??? Not sure about the best option for the Intel chips.
7888 The following would seem to satisfy; the register is
7889 entirely cleared, breaking the dependency chain. We
7890 then store to the upper half, with a dependency depth
7891 of one. A rumor has it that Intel recommends two movsd
7892 followed by an unpacklpd, but this is unconfirmed. And
7893 given that the dependency depth of the unpacklpd would
7894 still be one, I'm not sure why this would be better. */
7895 zero
= CONST0_RTX (V2DFmode
);
7898 m
= adjust_address (op1
, DFmode
, 0);
7899 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
7900 m
= adjust_address (op1
, DFmode
, 8);
7901 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
7905 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
7906 emit_move_insn (op0
, CONST0_RTX (mode
));
7908 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
7910 if (mode
!= V4SFmode
)
7911 op0
= gen_lowpart (V4SFmode
, op0
);
7912 m
= adjust_address (op1
, V2SFmode
, 0);
7913 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
7914 m
= adjust_address (op1
, V2SFmode
, 8);
7915 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
7918 else if (MEM_P (op0
))
7920 /* If we're optimizing for size, movups is the smallest. */
7923 op0
= gen_lowpart (V4SFmode
, op0
);
7924 op1
= gen_lowpart (V4SFmode
, op1
);
7925 emit_insn (gen_sse_movups (op0
, op1
));
7929 /* ??? Similar to above, only less clear because of quote
7930 typeless stores unquote. */
7931 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
7932 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
7934 op0
= gen_lowpart (V16QImode
, op0
);
7935 op1
= gen_lowpart (V16QImode
, op1
);
7936 emit_insn (gen_sse2_movdqu (op0
, op1
));
7940 if (TARGET_SSE2
&& mode
== V2DFmode
)
7942 m
= adjust_address (op0
, DFmode
, 0);
7943 emit_insn (gen_sse2_storelpd (m
, op1
));
7944 m
= adjust_address (op0
, DFmode
, 8);
7945 emit_insn (gen_sse2_storehpd (m
, op1
));
7949 if (mode
!= V4SFmode
)
7950 op1
= gen_lowpart (V4SFmode
, op1
);
7951 m
= adjust_address (op0
, V2SFmode
, 0);
7952 emit_insn (gen_sse_storelps (m
, op1
));
7953 m
= adjust_address (op0
, V2SFmode
, 8);
7954 emit_insn (gen_sse_storehps (m
, op1
));
7961 /* Expand a push in MODE. This is some mode for which we do not support
7962 proper push instructions, at least from the registers that we expect
7963 the value to live in. */
7966 ix86_expand_push (enum machine_mode mode
, rtx x
)
7970 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
7971 GEN_INT (-GET_MODE_SIZE (mode
)),
7972 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
7973 if (tmp
!= stack_pointer_rtx
)
7974 emit_move_insn (stack_pointer_rtx
, tmp
);
7976 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
7977 emit_move_insn (tmp
, x
);
7980 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7981 destination to use for the operation. If different from the true
7982 destination in operands[0], a copy operation will be required. */
7985 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
7988 int matching_memory
;
7989 rtx src1
, src2
, dst
;
7995 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7996 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7997 && (rtx_equal_p (dst
, src2
)
7998 || immediate_operand (src1
, mode
)))
8005 /* If the destination is memory, and we do not have matching source
8006 operands, do things in registers. */
8007 matching_memory
= 0;
8008 if (GET_CODE (dst
) == MEM
)
8010 if (rtx_equal_p (dst
, src1
))
8011 matching_memory
= 1;
8012 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8013 && rtx_equal_p (dst
, src2
))
8014 matching_memory
= 2;
8016 dst
= gen_reg_rtx (mode
);
8019 /* Both source operands cannot be in memory. */
8020 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8022 if (matching_memory
!= 2)
8023 src2
= force_reg (mode
, src2
);
8025 src1
= force_reg (mode
, src1
);
8028 /* If the operation is not commutable, source 1 cannot be a constant
8029 or non-matching memory. */
8030 if ((CONSTANT_P (src1
)
8031 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8032 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8033 src1
= force_reg (mode
, src1
);
8035 /* If optimizing, copy to regs to improve CSE */
8036 if (optimize
&& ! no_new_pseudos
)
8038 if (GET_CODE (dst
) == MEM
)
8039 dst
= gen_reg_rtx (mode
);
8040 if (GET_CODE (src1
) == MEM
)
8041 src1
= force_reg (mode
, src1
);
8042 if (GET_CODE (src2
) == MEM
)
8043 src2
= force_reg (mode
, src2
);
8046 src1
= operands
[1] = src1
;
8047 src2
= operands
[2] = src2
;
8051 /* Similarly, but assume that the destination has already been
8055 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
8056 enum machine_mode mode
, rtx operands
[])
8058 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
8059 gcc_assert (dst
== operands
[0]);
8062 /* Attempt to expand a binary operator. Make the expansion closer to the
8063 actual machine, then just general_operand, which will allow 3 separate
8064 memory references (one output, two input) in a single insn. */
8067 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
8070 rtx src1
, src2
, dst
, op
, clob
;
8072 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
8076 /* Emit the instruction. */
8078 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8079 if (reload_in_progress
)
8081 /* Reload doesn't know about the flags register, and doesn't know that
8082 it doesn't want to clobber it. We can only do this with PLUS. */
8083 gcc_assert (code
== PLUS
);
8088 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8089 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8092 /* Fix up the destination if needed. */
8093 if (dst
!= operands
[0])
8094 emit_move_insn (operands
[0], dst
);
8097 /* Return TRUE or FALSE depending on whether the binary operator meets the
8098 appropriate constraints. */
8101 ix86_binary_operator_ok (enum rtx_code code
,
8102 enum machine_mode mode ATTRIBUTE_UNUSED
,
8105 /* Both source operands cannot be in memory. */
8106 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8108 /* If the operation is not commutable, source 1 cannot be a constant. */
8109 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8111 /* If the destination is memory, we must have a matching source operand. */
8112 if (GET_CODE (operands
[0]) == MEM
8113 && ! (rtx_equal_p (operands
[0], operands
[1])
8114 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8115 && rtx_equal_p (operands
[0], operands
[2]))))
8117 /* If the operation is not commutable and the source 1 is memory, we must
8118 have a matching destination. */
8119 if (GET_CODE (operands
[1]) == MEM
8120 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
8121 && ! rtx_equal_p (operands
[0], operands
[1]))
8126 /* Attempt to expand a unary operator. Make the expansion closer to the
8127 actual machine, then just general_operand, which will allow 2 separate
8128 memory references (one output, one input) in a single insn. */
8131 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
8134 int matching_memory
;
8135 rtx src
, dst
, op
, clob
;
8140 /* If the destination is memory, and we do not have matching source
8141 operands, do things in registers. */
8142 matching_memory
= 0;
8145 if (rtx_equal_p (dst
, src
))
8146 matching_memory
= 1;
8148 dst
= gen_reg_rtx (mode
);
8151 /* When source operand is memory, destination must match. */
8152 if (MEM_P (src
) && !matching_memory
)
8153 src
= force_reg (mode
, src
);
8155 /* If optimizing, copy to regs to improve CSE. */
8156 if (optimize
&& ! no_new_pseudos
)
8158 if (GET_CODE (dst
) == MEM
)
8159 dst
= gen_reg_rtx (mode
);
8160 if (GET_CODE (src
) == MEM
)
8161 src
= force_reg (mode
, src
);
8164 /* Emit the instruction. */
8166 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8167 if (reload_in_progress
|| code
== NOT
)
8169 /* Reload doesn't know about the flags register, and doesn't know that
8170 it doesn't want to clobber it. */
8171 gcc_assert (code
== NOT
);
8176 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8177 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8180 /* Fix up the destination if needed. */
8181 if (dst
!= operands
[0])
8182 emit_move_insn (operands
[0], dst
);
8185 /* Return TRUE or FALSE depending on whether the unary operator meets the
8186 appropriate constraints. */
8189 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
8190 enum machine_mode mode ATTRIBUTE_UNUSED
,
8191 rtx operands
[2] ATTRIBUTE_UNUSED
)
8193 /* If one of operands is memory, source and destination must match. */
8194 if ((GET_CODE (operands
[0]) == MEM
8195 || GET_CODE (operands
[1]) == MEM
)
8196 && ! rtx_equal_p (operands
[0], operands
[1]))
8201 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8202 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8203 true, then replicate the mask for all elements of the vector register.
8204 If INVERT is true, then create a mask excluding the sign bit. */
8207 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
8209 enum machine_mode vec_mode
;
8210 HOST_WIDE_INT hi
, lo
;
8215 /* Find the sign bit, sign extended to 2*HWI. */
8217 lo
= 0x80000000, hi
= lo
< 0;
8218 else if (HOST_BITS_PER_WIDE_INT
>= 64)
8219 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
8221 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
8226 /* Force this value into the low part of a fp vector constant. */
8227 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
8228 mask
= gen_lowpart (mode
, mask
);
8233 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
8235 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
8236 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
8237 vec_mode
= V4SFmode
;
8242 v
= gen_rtvec (2, mask
, mask
);
8244 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
8245 vec_mode
= V2DFmode
;
8248 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
8251 /* Generate code for floating point ABS or NEG. */
8254 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
8257 rtx mask
, set
, use
, clob
, dst
, src
;
8258 bool matching_memory
;
8259 bool use_sse
= false;
8260 bool vector_mode
= VECTOR_MODE_P (mode
);
8261 enum machine_mode elt_mode
= mode
;
8265 elt_mode
= GET_MODE_INNER (mode
);
8268 else if (TARGET_SSE_MATH
)
8269 use_sse
= SSE_FLOAT_MODE_P (mode
);
8271 /* NEG and ABS performed with SSE use bitwise mask operations.
8272 Create the appropriate mask now. */
8274 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
8277 /* When not using SSE, we don't use the mask, but prefer to keep the
8278 same general form of the insn pattern to reduce duplication when
8279 it comes time to split. */
8286 /* If the destination is memory, and we don't have matching source
8287 operands, do things in registers. */
8288 matching_memory
= false;
8291 if (rtx_equal_p (dst
, src
) && (!optimize
|| no_new_pseudos
))
8292 matching_memory
= true;
8294 dst
= gen_reg_rtx (mode
);
8296 if (MEM_P (src
) && !matching_memory
)
8297 src
= force_reg (mode
, src
);
8301 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
8302 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
8307 set
= gen_rtx_fmt_e (code
, mode
, src
);
8308 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
8309 use
= gen_rtx_USE (VOIDmode
, mask
);
8310 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8311 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (3, set
, use
, clob
)));
8314 if (dst
!= operands
[0])
8315 emit_move_insn (operands
[0], dst
);
8318 /* Expand a copysign operation. Special case operand 0 being a constant. */
8321 ix86_expand_copysign (rtx operands
[])
8323 enum machine_mode mode
, vmode
;
8324 rtx dest
, op0
, op1
, mask
, nmask
;
8330 mode
= GET_MODE (dest
);
8331 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
8333 if (GET_CODE (op0
) == CONST_DOUBLE
)
8337 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
8338 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
8340 if (op0
== CONST0_RTX (mode
))
8341 op0
= CONST0_RTX (vmode
);
8345 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
8346 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
8348 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
8349 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
8352 mask
= ix86_build_signbit_mask (mode
, 0, 0);
8355 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
8357 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
8361 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
8362 mask
= ix86_build_signbit_mask (mode
, 0, 0);
8365 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
8367 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
8371 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8372 be a constant, and so has already been expanded into a vector constant. */
8375 ix86_split_copysign_const (rtx operands
[])
8377 enum machine_mode mode
, vmode
;
8378 rtx dest
, op0
, op1
, mask
, x
;
8385 mode
= GET_MODE (dest
);
8386 vmode
= GET_MODE (mask
);
8388 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
8389 x
= gen_rtx_AND (vmode
, dest
, mask
);
8390 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8392 if (op0
!= CONST0_RTX (vmode
))
8394 x
= gen_rtx_IOR (vmode
, dest
, op0
);
8395 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8399 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8400 so we have to do two masks. */
8403 ix86_split_copysign_var (rtx operands
[])
8405 enum machine_mode mode
, vmode
;
8406 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
8409 scratch
= operands
[1];
8412 nmask
= operands
[4];
8415 mode
= GET_MODE (dest
);
8416 vmode
= GET_MODE (mask
);
8418 if (rtx_equal_p (op0
, op1
))
8420 /* Shouldn't happen often (it's useless, obviously), but when it does
8421 we'd generate incorrect code if we continue below. */
8422 emit_move_insn (dest
, op0
);
8426 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
8428 gcc_assert (REGNO (op1
) == REGNO (scratch
));
8430 x
= gen_rtx_AND (vmode
, scratch
, mask
);
8431 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
8434 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
8435 x
= gen_rtx_NOT (vmode
, dest
);
8436 x
= gen_rtx_AND (vmode
, x
, op0
);
8437 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8441 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
8443 x
= gen_rtx_AND (vmode
, scratch
, mask
);
8445 else /* alternative 2,4 */
8447 gcc_assert (REGNO (mask
) == REGNO (scratch
));
8448 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
8449 x
= gen_rtx_AND (vmode
, scratch
, op1
);
8451 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
8453 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
8455 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
8456 x
= gen_rtx_AND (vmode
, dest
, nmask
);
8458 else /* alternative 3,4 */
8460 gcc_assert (REGNO (nmask
) == REGNO (dest
));
8462 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
8463 x
= gen_rtx_AND (vmode
, dest
, op0
);
8465 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8468 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
8469 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
8472 /* Return TRUE or FALSE depending on whether the first SET in INSN
8473 has source and destination with matching CC modes, and that the
8474 CC mode is at least as constrained as REQ_MODE. */
8477 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8480 enum machine_mode set_mode
;
8482 set
= PATTERN (insn
);
8483 if (GET_CODE (set
) == PARALLEL
)
8484 set
= XVECEXP (set
, 0, 0);
8485 gcc_assert (GET_CODE (set
) == SET
);
8486 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
8488 set_mode
= GET_MODE (SET_DEST (set
));
8492 if (req_mode
!= CCNOmode
8493 && (req_mode
!= CCmode
8494 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8498 if (req_mode
== CCGCmode
)
8502 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8506 if (req_mode
== CCZmode
)
8516 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8519 /* Generate insn patterns to do an integer compare of OPERANDS. */
8522 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8524 enum machine_mode cmpmode
;
8527 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8528 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8530 /* This is very simple, but making the interface the same as in the
8531 FP case makes the rest of the code easier. */
8532 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8533 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8535 /* Return the test that should be put into the flags user, i.e.
8536 the bcc, scc, or cmov instruction. */
8537 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8540 /* Figure out whether to use ordered or unordered fp comparisons.
8541 Return the appropriate mode to use. */
8544 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8546 /* ??? In order to make all comparisons reversible, we do all comparisons
8547 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8548 all forms trapping and nontrapping comparisons, we can make inequality
8549 comparisons trapping again, since it results in better code when using
8550 FCOM based compares. */
8551 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8555 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8557 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8558 return ix86_fp_compare_mode (code
);
8561 /* Only zero flag is needed. */
8563 case NE
: /* ZF!=0 */
8565 /* Codes needing carry flag. */
8566 case GEU
: /* CF=0 */
8567 case GTU
: /* CF=0 & ZF=0 */
8568 case LTU
: /* CF=1 */
8569 case LEU
: /* CF=1 | ZF=1 */
8571 /* Codes possibly doable only with sign flag when
8572 comparing against zero. */
8573 case GE
: /* SF=OF or SF=0 */
8574 case LT
: /* SF<>OF or SF=1 */
8575 if (op1
== const0_rtx
)
8578 /* For other cases Carry flag is not required. */
8580 /* Codes doable only with sign flag when comparing
8581 against zero, but we miss jump instruction for it
8582 so we need to use relational tests against overflow
8583 that thus needs to be zero. */
8584 case GT
: /* ZF=0 & SF=OF */
8585 case LE
: /* ZF=1 | SF<>OF */
8586 if (op1
== const0_rtx
)
8590 /* strcmp pattern do (use flags) and combine may ask us for proper
8599 /* Return the fixed registers used for condition codes. */
8602 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8609 /* If two condition code modes are compatible, return a condition code
8610 mode which is compatible with both. Otherwise, return
8613 static enum machine_mode
8614 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8619 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8622 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8623 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8651 /* These are only compatible with themselves, which we already
8657 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8660 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8662 enum rtx_code swapped_code
= swap_condition (code
);
8663 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8664 || (ix86_fp_comparison_cost (swapped_code
)
8665 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8668 /* Swap, force into registers, or otherwise massage the two operands
8669 to a fp comparison. The operands are updated in place; the new
8670 comparison code is returned. */
8672 static enum rtx_code
8673 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8675 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8676 rtx op0
= *pop0
, op1
= *pop1
;
8677 enum machine_mode op_mode
= GET_MODE (op0
);
8678 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
8680 /* All of the unordered compare instructions only work on registers.
8681 The same is true of the fcomi compare instructions. The same is
8682 true of the XFmode compare instructions if not comparing with
8683 zero (ftst insn is used in this case). */
8686 && (fpcmp_mode
== CCFPUmode
8687 || (op_mode
== XFmode
8688 && ! (standard_80387_constant_p (op0
) == 1
8689 || standard_80387_constant_p (op1
) == 1))
8690 || ix86_use_fcomi_compare (code
)))
8692 op0
= force_reg (op_mode
, op0
);
8693 op1
= force_reg (op_mode
, op1
);
8697 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8698 things around if they appear profitable, otherwise force op0
8701 if (standard_80387_constant_p (op0
) == 0
8702 || (GET_CODE (op0
) == MEM
8703 && ! (standard_80387_constant_p (op1
) == 0
8704 || GET_CODE (op1
) == MEM
)))
8707 tmp
= op0
, op0
= op1
, op1
= tmp
;
8708 code
= swap_condition (code
);
8711 if (GET_CODE (op0
) != REG
)
8712 op0
= force_reg (op_mode
, op0
);
8714 if (CONSTANT_P (op1
))
8716 int tmp
= standard_80387_constant_p (op1
);
8718 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8722 op1
= force_reg (op_mode
, op1
);
8725 op1
= force_reg (op_mode
, op1
);
8729 /* Try to rearrange the comparison to make it cheaper. */
8730 if (ix86_fp_comparison_cost (code
)
8731 > ix86_fp_comparison_cost (swap_condition (code
))
8732 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8735 tmp
= op0
, op0
= op1
, op1
= tmp
;
8736 code
= swap_condition (code
);
8737 if (GET_CODE (op0
) != REG
)
8738 op0
= force_reg (op_mode
, op0
);
8746 /* Convert comparison codes we use to represent FP comparison to integer
8747 code that will result in proper branch. Return UNKNOWN if no such code
8751 ix86_fp_compare_code_to_integer (enum rtx_code code
)
8780 /* Split comparison code CODE into comparisons we can do using branch
8781 instructions. BYPASS_CODE is comparison code for branch that will
8782 branch around FIRST_CODE and SECOND_CODE. If some of branches
8783 is not required, set value to UNKNOWN.
8784 We never require more than two branches. */
8787 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
8788 enum rtx_code
*first_code
,
8789 enum rtx_code
*second_code
)
8792 *bypass_code
= UNKNOWN
;
8793 *second_code
= UNKNOWN
;
8795 /* The fcomi comparison sets flags as follows:
8805 case GT
: /* GTU - CF=0 & ZF=0 */
8806 case GE
: /* GEU - CF=0 */
8807 case ORDERED
: /* PF=0 */
8808 case UNORDERED
: /* PF=1 */
8809 case UNEQ
: /* EQ - ZF=1 */
8810 case UNLT
: /* LTU - CF=1 */
8811 case UNLE
: /* LEU - CF=1 | ZF=1 */
8812 case LTGT
: /* EQ - ZF=0 */
8814 case LT
: /* LTU - CF=1 - fails on unordered */
8816 *bypass_code
= UNORDERED
;
8818 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8820 *bypass_code
= UNORDERED
;
8822 case EQ
: /* EQ - ZF=1 - fails on unordered */
8824 *bypass_code
= UNORDERED
;
8826 case NE
: /* NE - ZF=0 - fails on unordered */
8828 *second_code
= UNORDERED
;
8830 case UNGE
: /* GEU - CF=0 - fails on unordered */
8832 *second_code
= UNORDERED
;
8834 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8836 *second_code
= UNORDERED
;
8841 if (!TARGET_IEEE_FP
)
8843 *second_code
= UNKNOWN
;
8844 *bypass_code
= UNKNOWN
;
8848 /* Return cost of comparison done fcom + arithmetics operations on AX.
8849 All following functions do use number of instructions as a cost metrics.
8850 In future this should be tweaked to compute bytes for optimize_size and
8851 take into account performance of various instructions on various CPUs. */
8853 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8855 if (!TARGET_IEEE_FP
)
8857 /* The cost of code output by ix86_expand_fp_compare. */
8885 /* Return cost of comparison done using fcomi operation.
8886 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8888 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
8890 enum rtx_code bypass_code
, first_code
, second_code
;
8891 /* Return arbitrarily high cost when instruction is not supported - this
8892 prevents gcc from using it. */
8895 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8896 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
8899 /* Return cost of comparison done using sahf operation.
8900 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8902 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
8904 enum rtx_code bypass_code
, first_code
, second_code
;
8905 /* Return arbitrarily high cost when instruction is not preferred - this
8906 avoids gcc from using it. */
8907 if (!TARGET_USE_SAHF
&& !optimize_size
)
8909 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8910 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
8913 /* Compute cost of the comparison done using any method.
8914 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8916 ix86_fp_comparison_cost (enum rtx_code code
)
8918 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8921 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8922 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8924 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8925 if (min
> sahf_cost
)
8927 if (min
> fcomi_cost
)
8932 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8935 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
8936 rtx
*second_test
, rtx
*bypass_test
)
8938 enum machine_mode fpcmp_mode
, intcmp_mode
;
8940 int cost
= ix86_fp_comparison_cost (code
);
8941 enum rtx_code bypass_code
, first_code
, second_code
;
8943 fpcmp_mode
= ix86_fp_compare_mode (code
);
8944 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8947 *second_test
= NULL_RTX
;
8949 *bypass_test
= NULL_RTX
;
8951 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8953 /* Do fcomi/sahf based test when profitable. */
8954 if ((bypass_code
== UNKNOWN
|| bypass_test
)
8955 && (second_code
== UNKNOWN
|| second_test
)
8956 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8960 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8961 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8967 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8968 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8970 scratch
= gen_reg_rtx (HImode
);
8971 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8972 emit_insn (gen_x86_sahf_1 (scratch
));
8975 /* The FP codes work out to act like unsigned. */
8976 intcmp_mode
= fpcmp_mode
;
8978 if (bypass_code
!= UNKNOWN
)
8979 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8980 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8982 if (second_code
!= UNKNOWN
)
8983 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8984 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8989 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8990 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8991 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8993 scratch
= gen_reg_rtx (HImode
);
8994 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8996 /* In the unordered case, we have to check C2 for NaN's, which
8997 doesn't happen to work out to anything nice combination-wise.
8998 So do some bit twiddling on the value we've got in AH to come
8999 up with an appropriate set of condition codes. */
9001 intcmp_mode
= CCNOmode
;
9006 if (code
== GT
|| !TARGET_IEEE_FP
)
9008 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9013 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9014 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9015 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
9016 intcmp_mode
= CCmode
;
9022 if (code
== LT
&& TARGET_IEEE_FP
)
9024 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9025 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
9026 intcmp_mode
= CCmode
;
9031 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
9037 if (code
== GE
|| !TARGET_IEEE_FP
)
9039 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
9044 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9045 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9052 if (code
== LE
&& TARGET_IEEE_FP
)
9054 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9055 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9056 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9057 intcmp_mode
= CCmode
;
9062 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9068 if (code
== EQ
&& TARGET_IEEE_FP
)
9070 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9071 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9072 intcmp_mode
= CCmode
;
9077 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9084 if (code
== NE
&& TARGET_IEEE_FP
)
9086 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9087 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9093 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9099 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9103 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9112 /* Return the test that should be put into the flags user, i.e.
9113 the bcc, scc, or cmov instruction. */
9114 return gen_rtx_fmt_ee (code
, VOIDmode
,
9115 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9120 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
9123 op0
= ix86_compare_op0
;
9124 op1
= ix86_compare_op1
;
9127 *second_test
= NULL_RTX
;
9129 *bypass_test
= NULL_RTX
;
9131 if (ix86_compare_emitted
)
9133 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
9134 ix86_compare_emitted
= NULL_RTX
;
9136 else if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9137 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9138 second_test
, bypass_test
);
9140 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9145 /* Return true if the CODE will result in nontrivial jump sequence. */
9147 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
9149 enum rtx_code bypass_code
, first_code
, second_code
;
9152 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9153 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
9157 ix86_expand_branch (enum rtx_code code
, rtx label
)
9161 switch (GET_MODE (ix86_compare_op0
))
9167 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9168 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9169 gen_rtx_LABEL_REF (VOIDmode
, label
),
9171 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9180 enum rtx_code bypass_code
, first_code
, second_code
;
9182 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9185 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9187 /* Check whether we will use the natural sequence with one jump. If
9188 so, we can expand jump early. Otherwise delay expansion by
9189 creating compound insn to not confuse optimizers. */
9190 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
9193 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9194 gen_rtx_LABEL_REF (VOIDmode
, label
),
9195 pc_rtx
, NULL_RTX
, NULL_RTX
);
9199 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9200 ix86_compare_op0
, ix86_compare_op1
);
9201 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9202 gen_rtx_LABEL_REF (VOIDmode
, label
),
9204 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9206 use_fcomi
= ix86_use_fcomi_compare (code
);
9207 vec
= rtvec_alloc (3 + !use_fcomi
);
9208 RTVEC_ELT (vec
, 0) = tmp
;
9210 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9212 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9215 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9217 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9225 /* Expand DImode branch into multiple compare+branch. */
9227 rtx lo
[2], hi
[2], label2
;
9228 enum rtx_code code1
, code2
, code3
;
9230 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9232 tmp
= ix86_compare_op0
;
9233 ix86_compare_op0
= ix86_compare_op1
;
9234 ix86_compare_op1
= tmp
;
9235 code
= swap_condition (code
);
9237 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9238 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9240 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9241 avoid two branches. This costs one extra insn, so disable when
9242 optimizing for size. */
9244 if ((code
== EQ
|| code
== NE
)
9246 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9251 if (hi
[1] != const0_rtx
)
9252 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9253 NULL_RTX
, 0, OPTAB_WIDEN
);
9256 if (lo
[1] != const0_rtx
)
9257 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9258 NULL_RTX
, 0, OPTAB_WIDEN
);
9260 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9261 NULL_RTX
, 0, OPTAB_WIDEN
);
9263 ix86_compare_op0
= tmp
;
9264 ix86_compare_op1
= const0_rtx
;
9265 ix86_expand_branch (code
, label
);
9269 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9270 op1 is a constant and the low word is zero, then we can just
9271 examine the high word. */
9273 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9276 case LT
: case LTU
: case GE
: case GEU
:
9277 ix86_compare_op0
= hi
[0];
9278 ix86_compare_op1
= hi
[1];
9279 ix86_expand_branch (code
, label
);
9285 /* Otherwise, we need two or three jumps. */
9287 label2
= gen_label_rtx ();
9290 code2
= swap_condition (code
);
9291 code3
= unsigned_condition (code
);
9295 case LT
: case GT
: case LTU
: case GTU
:
9298 case LE
: code1
= LT
; code2
= GT
; break;
9299 case GE
: code1
= GT
; code2
= LT
; break;
9300 case LEU
: code1
= LTU
; code2
= GTU
; break;
9301 case GEU
: code1
= GTU
; code2
= LTU
; break;
9303 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
9304 case NE
: code2
= UNKNOWN
; break;
9312 * if (hi(a) < hi(b)) goto true;
9313 * if (hi(a) > hi(b)) goto false;
9314 * if (lo(a) < lo(b)) goto true;
9318 ix86_compare_op0
= hi
[0];
9319 ix86_compare_op1
= hi
[1];
9321 if (code1
!= UNKNOWN
)
9322 ix86_expand_branch (code1
, label
);
9323 if (code2
!= UNKNOWN
)
9324 ix86_expand_branch (code2
, label2
);
9326 ix86_compare_op0
= lo
[0];
9327 ix86_compare_op1
= lo
[1];
9328 ix86_expand_branch (code3
, label
);
9330 if (code2
!= UNKNOWN
)
9331 emit_label (label2
);
9340 /* Split branch based on floating point condition. */
9342 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
9343 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
9346 rtx label
= NULL_RTX
;
9348 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9351 if (target2
!= pc_rtx
)
9354 code
= reverse_condition_maybe_unordered (code
);
9359 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9360 tmp
, &second
, &bypass
);
9362 /* Remove pushed operand from stack. */
9364 ix86_free_from_memory (GET_MODE (pushed
));
9366 if (split_branch_probability
>= 0)
9368 /* Distribute the probabilities across the jumps.
9369 Assume the BYPASS and SECOND to be always test
9371 probability
= split_branch_probability
;
9373 /* Value of 1 is low enough to make no need for probability
9374 to be updated. Later we may run some experiments and see
9375 if unordered values are more frequent in practice. */
9377 bypass_probability
= 1;
9379 second_probability
= 1;
9381 if (bypass
!= NULL_RTX
)
9383 label
= gen_label_rtx ();
9384 i
= emit_jump_insn (gen_rtx_SET
9386 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9388 gen_rtx_LABEL_REF (VOIDmode
,
9391 if (bypass_probability
>= 0)
9393 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9394 GEN_INT (bypass_probability
),
9397 i
= emit_jump_insn (gen_rtx_SET
9399 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9400 condition
, target1
, target2
)));
9401 if (probability
>= 0)
9403 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9404 GEN_INT (probability
),
9406 if (second
!= NULL_RTX
)
9408 i
= emit_jump_insn (gen_rtx_SET
9410 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9412 if (second_probability
>= 0)
9414 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9415 GEN_INT (second_probability
),
9418 if (label
!= NULL_RTX
)
9423 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9425 rtx ret
, tmp
, tmpreg
, equiv
;
9426 rtx second_test
, bypass_test
;
9428 if (GET_MODE (ix86_compare_op0
) == DImode
9430 return 0; /* FAIL */
9432 gcc_assert (GET_MODE (dest
) == QImode
);
9434 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9435 PUT_MODE (ret
, QImode
);
9440 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9441 if (bypass_test
|| second_test
)
9443 rtx test
= second_test
;
9445 rtx tmp2
= gen_reg_rtx (QImode
);
9448 gcc_assert (!second_test
);
9451 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9453 PUT_MODE (test
, QImode
);
9454 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9457 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9459 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9462 /* Attach a REG_EQUAL note describing the comparison result. */
9463 if (ix86_compare_op0
&& ix86_compare_op1
)
9465 equiv
= simplify_gen_relational (code
, QImode
,
9466 GET_MODE (ix86_compare_op0
),
9467 ix86_compare_op0
, ix86_compare_op1
);
9468 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9471 return 1; /* DONE */
9474 /* Expand comparison setting or clearing carry flag. Return true when
9475 successful and set pop for the operation. */
9477 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9479 enum machine_mode mode
=
9480 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9482 /* Do not handle DImode compares that go trought special path. Also we can't
9483 deal with FP compares yet. This is possible to add. */
9484 if ((mode
== DImode
&& !TARGET_64BIT
))
9486 if (FLOAT_MODE_P (mode
))
9488 rtx second_test
= NULL
, bypass_test
= NULL
;
9489 rtx compare_op
, compare_seq
;
9491 /* Shortcut: following common codes never translate into carry flag compares. */
9492 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9493 || code
== ORDERED
|| code
== UNORDERED
)
9496 /* These comparisons require zero flag; swap operands so they won't. */
9497 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9503 code
= swap_condition (code
);
9506 /* Try to expand the comparison and verify that we end up with carry flag
9507 based comparison. This is fails to be true only when we decide to expand
9508 comparison using arithmetic that is not too common scenario. */
9510 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9511 &second_test
, &bypass_test
);
9512 compare_seq
= get_insns ();
9515 if (second_test
|| bypass_test
)
9517 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9518 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9519 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9521 code
= GET_CODE (compare_op
);
9522 if (code
!= LTU
&& code
!= GEU
)
9524 emit_insn (compare_seq
);
9528 if (!INTEGRAL_MODE_P (mode
))
9536 /* Convert a==0 into (unsigned)a<1. */
9539 if (op1
!= const0_rtx
)
9542 code
= (code
== EQ
? LTU
: GEU
);
9545 /* Convert a>b into b<a or a>=b-1. */
9548 if (GET_CODE (op1
) == CONST_INT
)
9550 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9551 /* Bail out on overflow. We still can swap operands but that
9552 would force loading of the constant into register. */
9553 if (op1
== const0_rtx
9554 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9556 code
= (code
== GTU
? GEU
: LTU
);
9563 code
= (code
== GTU
? LTU
: GEU
);
9567 /* Convert a>=0 into (unsigned)a<0x80000000. */
9570 if (mode
== DImode
|| op1
!= const0_rtx
)
9572 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9573 code
= (code
== LT
? GEU
: LTU
);
9577 if (mode
== DImode
|| op1
!= constm1_rtx
)
9579 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9580 code
= (code
== LE
? GEU
: LTU
);
9586 /* Swapping operands may cause constant to appear as first operand. */
9587 if (!nonimmediate_operand (op0
, VOIDmode
))
9591 op0
= force_reg (mode
, op0
);
9593 ix86_compare_op0
= op0
;
9594 ix86_compare_op1
= op1
;
9595 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9596 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
9601 ix86_expand_int_movcc (rtx operands
[])
9603 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9604 rtx compare_seq
, compare_op
;
9605 rtx second_test
, bypass_test
;
9606 enum machine_mode mode
= GET_MODE (operands
[0]);
9607 bool sign_bit_compare_p
= false;;
9610 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9611 compare_seq
= get_insns ();
9614 compare_code
= GET_CODE (compare_op
);
9616 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9617 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9618 sign_bit_compare_p
= true;
9620 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9621 HImode insns, we'd be swallowed in word prefix ops. */
9623 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9624 && (mode
!= DImode
|| TARGET_64BIT
)
9625 && GET_CODE (operands
[2]) == CONST_INT
9626 && GET_CODE (operands
[3]) == CONST_INT
)
9628 rtx out
= operands
[0];
9629 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9630 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9634 /* Sign bit compares are better done using shifts than we do by using
9636 if (sign_bit_compare_p
9637 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9638 ix86_compare_op1
, &compare_op
))
9640 /* Detect overlap between destination and compare sources. */
9643 if (!sign_bit_compare_p
)
9647 compare_code
= GET_CODE (compare_op
);
9649 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9650 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9653 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9656 /* To simplify rest of code, restrict to the GEU case. */
9657 if (compare_code
== LTU
)
9659 HOST_WIDE_INT tmp
= ct
;
9662 compare_code
= reverse_condition (compare_code
);
9663 code
= reverse_condition (code
);
9668 PUT_CODE (compare_op
,
9669 reverse_condition_maybe_unordered
9670 (GET_CODE (compare_op
)));
9672 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9676 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9677 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9678 tmp
= gen_reg_rtx (mode
);
9681 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9683 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9687 if (code
== GT
|| code
== GE
)
9688 code
= reverse_condition (code
);
9691 HOST_WIDE_INT tmp
= ct
;
9696 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9697 ix86_compare_op1
, VOIDmode
, 0, -1);
9710 tmp
= expand_simple_binop (mode
, PLUS
,
9712 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9723 tmp
= expand_simple_binop (mode
, IOR
,
9725 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9727 else if (diff
== -1 && ct
)
9737 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9739 tmp
= expand_simple_binop (mode
, PLUS
,
9740 copy_rtx (tmp
), GEN_INT (cf
),
9741 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9749 * andl cf - ct, dest
9759 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9762 tmp
= expand_simple_binop (mode
, AND
,
9764 gen_int_mode (cf
- ct
, mode
),
9765 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9767 tmp
= expand_simple_binop (mode
, PLUS
,
9768 copy_rtx (tmp
), GEN_INT (ct
),
9769 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9772 if (!rtx_equal_p (tmp
, out
))
9773 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9775 return 1; /* DONE */
9781 tmp
= ct
, ct
= cf
, cf
= tmp
;
9783 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9785 /* We may be reversing unordered compare to normal compare, that
9786 is not valid in general (we may convert non-trapping condition
9787 to trapping one), however on i386 we currently emit all
9788 comparisons unordered. */
9789 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9790 code
= reverse_condition_maybe_unordered (code
);
9794 compare_code
= reverse_condition (compare_code
);
9795 code
= reverse_condition (code
);
9799 compare_code
= UNKNOWN
;
9800 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9801 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9803 if (ix86_compare_op1
== const0_rtx
9804 && (code
== LT
|| code
== GE
))
9805 compare_code
= code
;
9806 else if (ix86_compare_op1
== constm1_rtx
)
9810 else if (code
== GT
)
9815 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9816 if (compare_code
!= UNKNOWN
9817 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9818 && (cf
== -1 || ct
== -1))
9820 /* If lea code below could be used, only optimize
9821 if it results in a 2 insn sequence. */
9823 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9824 || diff
== 3 || diff
== 5 || diff
== 9)
9825 || (compare_code
== LT
&& ct
== -1)
9826 || (compare_code
== GE
&& cf
== -1))
9829 * notl op1 (if necessary)
9837 code
= reverse_condition (code
);
9840 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9841 ix86_compare_op1
, VOIDmode
, 0, -1);
9843 out
= expand_simple_binop (mode
, IOR
,
9845 out
, 1, OPTAB_DIRECT
);
9846 if (out
!= operands
[0])
9847 emit_move_insn (operands
[0], out
);
9849 return 1; /* DONE */
9854 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9855 || diff
== 3 || diff
== 5 || diff
== 9)
9856 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9858 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
9864 * lea cf(dest*(ct-cf)),dest
9868 * This also catches the degenerate setcc-only case.
9874 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9875 ix86_compare_op1
, VOIDmode
, 0, 1);
9878 /* On x86_64 the lea instruction operates on Pmode, so we need
9879 to get arithmetics done in proper mode to match. */
9881 tmp
= copy_rtx (out
);
9885 out1
= copy_rtx (out
);
9886 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9890 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9896 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9899 if (!rtx_equal_p (tmp
, out
))
9902 out
= force_operand (tmp
, copy_rtx (out
));
9904 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9906 if (!rtx_equal_p (out
, operands
[0]))
9907 emit_move_insn (operands
[0], copy_rtx (out
));
9909 return 1; /* DONE */
9913 * General case: Jumpful:
9914 * xorl dest,dest cmpl op1, op2
9915 * cmpl op1, op2 movl ct, dest
9917 * decl dest movl cf, dest
9918 * andl (cf-ct),dest 1:
9923 * This is reasonably steep, but branch mispredict costs are
9924 * high on modern cpus, so consider failing only if optimizing
9928 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9929 && BRANCH_COST
>= 2)
9935 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9936 /* We may be reversing unordered compare to normal compare,
9937 that is not valid in general (we may convert non-trapping
9938 condition to trapping one), however on i386 we currently
9939 emit all comparisons unordered. */
9940 code
= reverse_condition_maybe_unordered (code
);
9943 code
= reverse_condition (code
);
9944 if (compare_code
!= UNKNOWN
)
9945 compare_code
= reverse_condition (compare_code
);
9949 if (compare_code
!= UNKNOWN
)
9951 /* notl op1 (if needed)
9956 For x < 0 (resp. x <= -1) there will be no notl,
9957 so if possible swap the constants to get rid of the
9959 True/false will be -1/0 while code below (store flag
9960 followed by decrement) is 0/-1, so the constants need
9961 to be exchanged once more. */
9963 if (compare_code
== GE
|| !cf
)
9965 code
= reverse_condition (code
);
9970 HOST_WIDE_INT tmp
= cf
;
9975 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9976 ix86_compare_op1
, VOIDmode
, 0, -1);
9980 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9981 ix86_compare_op1
, VOIDmode
, 0, 1);
9983 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
9984 copy_rtx (out
), 1, OPTAB_DIRECT
);
9987 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
9988 gen_int_mode (cf
- ct
, mode
),
9989 copy_rtx (out
), 1, OPTAB_DIRECT
);
9991 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
9992 copy_rtx (out
), 1, OPTAB_DIRECT
);
9993 if (!rtx_equal_p (out
, operands
[0]))
9994 emit_move_insn (operands
[0], copy_rtx (out
));
9996 return 1; /* DONE */
10000 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10002 /* Try a few things more with specific constants and a variable. */
10005 rtx var
, orig_out
, out
, tmp
;
10007 if (BRANCH_COST
<= 2)
10008 return 0; /* FAIL */
10010 /* If one of the two operands is an interesting constant, load a
10011 constant with the above and mask it in with a logical operation. */
10013 if (GET_CODE (operands
[2]) == CONST_INT
)
10016 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
10017 operands
[3] = constm1_rtx
, op
= and_optab
;
10018 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
10019 operands
[3] = const0_rtx
, op
= ior_optab
;
10021 return 0; /* FAIL */
10023 else if (GET_CODE (operands
[3]) == CONST_INT
)
10026 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
10027 operands
[2] = constm1_rtx
, op
= and_optab
;
10028 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
10029 operands
[2] = const0_rtx
, op
= ior_optab
;
10031 return 0; /* FAIL */
10034 return 0; /* FAIL */
10036 orig_out
= operands
[0];
10037 tmp
= gen_reg_rtx (mode
);
10040 /* Recurse to get the constant loaded. */
10041 if (ix86_expand_int_movcc (operands
) == 0)
10042 return 0; /* FAIL */
10044 /* Mask in the interesting variable. */
10045 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
10047 if (!rtx_equal_p (out
, orig_out
))
10048 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
10050 return 1; /* DONE */
10054 * For comparison with above,
10064 if (! nonimmediate_operand (operands
[2], mode
))
10065 operands
[2] = force_reg (mode
, operands
[2]);
10066 if (! nonimmediate_operand (operands
[3], mode
))
10067 operands
[3] = force_reg (mode
, operands
[3]);
10069 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10071 rtx tmp
= gen_reg_rtx (mode
);
10072 emit_move_insn (tmp
, operands
[3]);
10075 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10077 rtx tmp
= gen_reg_rtx (mode
);
10078 emit_move_insn (tmp
, operands
[2]);
10082 if (! register_operand (operands
[2], VOIDmode
)
10084 || ! register_operand (operands
[3], VOIDmode
)))
10085 operands
[2] = force_reg (mode
, operands
[2]);
10088 && ! register_operand (operands
[3], VOIDmode
))
10089 operands
[3] = force_reg (mode
, operands
[3]);
10091 emit_insn (compare_seq
);
10092 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10093 gen_rtx_IF_THEN_ELSE (mode
,
10094 compare_op
, operands
[2],
10097 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10098 gen_rtx_IF_THEN_ELSE (mode
,
10100 copy_rtx (operands
[3]),
10101 copy_rtx (operands
[0]))));
10103 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10104 gen_rtx_IF_THEN_ELSE (mode
,
10106 copy_rtx (operands
[2]),
10107 copy_rtx (operands
[0]))));
10109 return 1; /* DONE */
10112 /* Swap, force into registers, or otherwise massage the two operands
10113 to an sse comparison with a mask result. Thus we differ a bit from
10114 ix86_prepare_fp_compare_args which expects to produce a flags result.
10116 The DEST operand exists to help determine whether to commute commutative
10117 operators. The POP0/POP1 operands are updated in place. The new
10118 comparison code is returned, or UNKNOWN if not implementable. */
10120 static enum rtx_code
10121 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
10122 rtx
*pop0
, rtx
*pop1
)
10130 /* We have no LTGT as an operator. We could implement it with
10131 NE & ORDERED, but this requires an extra temporary. It's
10132 not clear that it's worth it. */
10139 /* These are supported directly. */
10146 /* For commutative operators, try to canonicalize the destination
10147 operand to be first in the comparison - this helps reload to
10148 avoid extra moves. */
10149 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
10157 /* These are not supported directly. Swap the comparison operands
10158 to transform into something that is supported. */
10162 code
= swap_condition (code
);
10166 gcc_unreachable ();
10172 /* Detect conditional moves that exactly match min/max operational
10173 semantics. Note that this is IEEE safe, as long as we don't
10174 interchange the operands.
10176 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10177 and TRUE if the operation is successful and instructions are emitted. */
10180 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
10181 rtx cmp_op1
, rtx if_true
, rtx if_false
)
10183 enum machine_mode mode
;
10189 else if (code
== UNGE
)
10192 if_true
= if_false
;
10198 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
10200 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
10205 mode
= GET_MODE (dest
);
10207 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10208 but MODE may be a vector mode and thus not appropriate. */
10209 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
10211 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
10214 if_true
= force_reg (mode
, if_true
);
10215 v
= gen_rtvec (2, if_true
, if_false
);
10216 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
10220 code
= is_min
? SMIN
: SMAX
;
10221 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
10224 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
10228 /* Expand an sse vector comparison. Return the register with the result. */
10231 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
10232 rtx op_true
, rtx op_false
)
10234 enum machine_mode mode
= GET_MODE (dest
);
10237 cmp_op0
= force_reg (mode
, cmp_op0
);
10238 if (!nonimmediate_operand (cmp_op1
, mode
))
10239 cmp_op1
= force_reg (mode
, cmp_op1
);
10242 || reg_overlap_mentioned_p (dest
, op_true
)
10243 || reg_overlap_mentioned_p (dest
, op_false
))
10244 dest
= gen_reg_rtx (mode
);
10246 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
10247 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10252 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10253 operations. This is used for both scalar and vector conditional moves. */
10256 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
10258 enum machine_mode mode
= GET_MODE (dest
);
10261 if (op_false
== CONST0_RTX (mode
))
10263 op_true
= force_reg (mode
, op_true
);
10264 x
= gen_rtx_AND (mode
, cmp
, op_true
);
10265 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10267 else if (op_true
== CONST0_RTX (mode
))
10269 op_false
= force_reg (mode
, op_false
);
10270 x
= gen_rtx_NOT (mode
, cmp
);
10271 x
= gen_rtx_AND (mode
, x
, op_false
);
10272 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10276 op_true
= force_reg (mode
, op_true
);
10277 op_false
= force_reg (mode
, op_false
);
10279 t2
= gen_reg_rtx (mode
);
10281 t3
= gen_reg_rtx (mode
);
10285 x
= gen_rtx_AND (mode
, op_true
, cmp
);
10286 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
10288 x
= gen_rtx_NOT (mode
, cmp
);
10289 x
= gen_rtx_AND (mode
, x
, op_false
);
10290 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
10292 x
= gen_rtx_IOR (mode
, t3
, t2
);
10293 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10297 /* Expand a floating-point conditional move. Return true if successful. */
10300 ix86_expand_fp_movcc (rtx operands
[])
10302 enum machine_mode mode
= GET_MODE (operands
[0]);
10303 enum rtx_code code
= GET_CODE (operands
[1]);
10304 rtx tmp
, compare_op
, second_test
, bypass_test
;
10306 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
10308 enum machine_mode cmode
;
10310 /* Since we've no cmove for sse registers, don't force bad register
10311 allocation just to gain access to it. Deny movcc when the
10312 comparison mode doesn't match the move mode. */
10313 cmode
= GET_MODE (ix86_compare_op0
);
10314 if (cmode
== VOIDmode
)
10315 cmode
= GET_MODE (ix86_compare_op1
);
10319 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
10321 &ix86_compare_op1
);
10322 if (code
== UNKNOWN
)
10325 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
10326 ix86_compare_op1
, operands
[2],
10330 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
10331 ix86_compare_op1
, operands
[2], operands
[3]);
10332 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
10336 /* The floating point conditional move instructions don't directly
10337 support conditions resulting from a signed integer comparison. */
10339 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10341 /* The floating point conditional move instructions don't directly
10342 support signed integer comparisons. */
10344 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10346 gcc_assert (!second_test
&& !bypass_test
);
10347 tmp
= gen_reg_rtx (QImode
);
10348 ix86_expand_setcc (code
, tmp
);
10350 ix86_compare_op0
= tmp
;
10351 ix86_compare_op1
= const0_rtx
;
10352 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10354 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10356 tmp
= gen_reg_rtx (mode
);
10357 emit_move_insn (tmp
, operands
[3]);
10360 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10362 tmp
= gen_reg_rtx (mode
);
10363 emit_move_insn (tmp
, operands
[2]);
10367 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10368 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
10369 operands
[2], operands
[3])));
10371 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10372 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
10373 operands
[3], operands
[0])));
10375 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10376 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
10377 operands
[2], operands
[0])));
10382 /* Expand a floating-point vector conditional move; a vcond operation
10383 rather than a movcc operation. */
10386 ix86_expand_fp_vcond (rtx operands
[])
10388 enum rtx_code code
= GET_CODE (operands
[3]);
10391 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
10392 &operands
[4], &operands
[5]);
10393 if (code
== UNKNOWN
)
10396 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
10397 operands
[5], operands
[1], operands
[2]))
10400 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
10401 operands
[1], operands
[2]);
10402 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
10406 /* Expand a signed integral vector conditional move. */
10409 ix86_expand_int_vcond (rtx operands
[], bool unsignedp
)
10411 enum machine_mode mode
= GET_MODE (operands
[0]);
10412 enum rtx_code code
= GET_CODE (operands
[3]);
10416 code
= signed_condition (code
);
10417 if (code
== NE
|| code
== LE
|| code
== GE
)
10419 /* Inverse of a supported code. */
10421 operands
[1] = operands
[2];
10423 code
= reverse_condition (code
);
10427 /* Swap of a supported code. */
10429 operands
[4] = operands
[5];
10431 code
= swap_condition (code
);
10433 gcc_assert (code
== EQ
|| code
== GT
);
10435 /* Unlike floating-point, we can rely on the optimizers to have already
10436 converted to MIN/MAX expressions, so we don't have to handle that. */
10438 /* Unsigned GT is not directly supported. We can zero-extend QI and
10439 HImode elements to the next wider element size, use a signed compare,
10440 then repack. For three extra instructions, this is definitely a win. */
10441 if (code
== GT
&& unsignedp
)
10443 rtx o0l
, o0h
, o1l
, o1h
, cl
, ch
, zero
;
10444 enum machine_mode wider
;
10445 rtx (*unpackl
) (rtx
, rtx
, rtx
);
10446 rtx (*unpackh
) (rtx
, rtx
, rtx
);
10447 rtx (*pack
) (rtx
, rtx
, rtx
);
10453 unpackl
= gen_sse2_punpcklbw
;
10454 unpackh
= gen_sse2_punpckhbw
;
10455 pack
= gen_sse2_packsswb
;
10459 unpackl
= gen_sse2_punpcklwd
;
10460 unpackh
= gen_sse2_punpckhwd
;
10461 pack
= gen_sse2_packssdw
;
10464 gcc_unreachable ();
10467 operands
[4] = force_reg (mode
, operands
[4]);
10468 operands
[5] = force_reg (mode
, operands
[5]);
10470 o0l
= gen_reg_rtx (wider
);
10471 o0h
= gen_reg_rtx (wider
);
10472 o1l
= gen_reg_rtx (wider
);
10473 o1h
= gen_reg_rtx (wider
);
10474 cl
= gen_reg_rtx (wider
);
10475 ch
= gen_reg_rtx (wider
);
10476 cmp
= gen_reg_rtx (mode
);
10477 zero
= force_reg (mode
, CONST0_RTX (mode
));
10479 emit_insn (unpackl (gen_lowpart (mode
, o0l
), operands
[4], zero
));
10480 emit_insn (unpackh (gen_lowpart (mode
, o0h
), operands
[4], zero
));
10481 emit_insn (unpackl (gen_lowpart (mode
, o1l
), operands
[5], zero
));
10482 emit_insn (unpackh (gen_lowpart (mode
, o1h
), operands
[5], zero
));
10484 x
= gen_rtx_GT (wider
, o0l
, o1l
);
10485 emit_insn (gen_rtx_SET (VOIDmode
, cl
, x
));
10487 x
= gen_rtx_GT (wider
, o0h
, o1h
);
10488 emit_insn (gen_rtx_SET (VOIDmode
, ch
, x
));
10490 emit_insn (pack (cmp
, cl
, ch
));
10493 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
10494 operands
[1], operands
[2]);
10496 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
10500 /* Expand conditional increment or decrement using adb/sbb instructions.
10501 The default case using setcc followed by the conditional move can be
10502 done by generic code. */
10504 ix86_expand_int_addcc (rtx operands
[])
10506 enum rtx_code code
= GET_CODE (operands
[1]);
10508 rtx val
= const0_rtx
;
10509 bool fpcmp
= false;
10510 enum machine_mode mode
= GET_MODE (operands
[0]);
10512 if (operands
[3] != const1_rtx
10513 && operands
[3] != constm1_rtx
)
10515 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10516 ix86_compare_op1
, &compare_op
))
10518 code
= GET_CODE (compare_op
);
10520 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10521 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10524 code
= ix86_fp_compare_code_to_integer (code
);
10531 PUT_CODE (compare_op
,
10532 reverse_condition_maybe_unordered
10533 (GET_CODE (compare_op
)));
10535 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10537 PUT_MODE (compare_op
, mode
);
10539 /* Construct either adc or sbb insn. */
10540 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10542 switch (GET_MODE (operands
[0]))
10545 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10548 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10551 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10554 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10557 gcc_unreachable ();
10562 switch (GET_MODE (operands
[0]))
10565 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10568 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10571 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10574 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10577 gcc_unreachable ();
10580 return 1; /* DONE */
10584 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10585 works for floating pointer parameters and nonoffsetable memories.
10586 For pushes, it returns just stack offsets; the values will be saved
10587 in the right order. Maximally three parts are generated. */
10590 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
10595 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
10597 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10599 gcc_assert (GET_CODE (operand
) != REG
|| !MMX_REGNO_P (REGNO (operand
)));
10600 gcc_assert (size
>= 2 && size
<= 3);
10602 /* Optimize constant pool reference to immediates. This is used by fp
10603 moves, that force all constants to memory to allow combining. */
10604 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
10606 rtx tmp
= maybe_get_pool_constant (operand
);
10611 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10613 /* The only non-offsetable memories we handle are pushes. */
10614 int ok
= push_operand (operand
, VOIDmode
);
10618 operand
= copy_rtx (operand
);
10619 PUT_MODE (operand
, Pmode
);
10620 parts
[0] = parts
[1] = parts
[2] = operand
;
10624 if (GET_CODE (operand
) == CONST_VECTOR
)
10626 enum machine_mode imode
= int_mode_for_mode (mode
);
10627 /* Caution: if we looked through a constant pool memory above,
10628 the operand may actually have a different mode now. That's
10629 ok, since we want to pun this all the way back to an integer. */
10630 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
10631 gcc_assert (operand
!= NULL
);
10637 if (mode
== DImode
)
10638 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10641 if (REG_P (operand
))
10643 gcc_assert (reload_completed
);
10644 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10645 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10647 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10649 else if (offsettable_memref_p (operand
))
10651 operand
= adjust_address (operand
, SImode
, 0);
10652 parts
[0] = operand
;
10653 parts
[1] = adjust_address (operand
, SImode
, 4);
10655 parts
[2] = adjust_address (operand
, SImode
, 8);
10657 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10662 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10666 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10667 parts
[2] = gen_int_mode (l
[2], SImode
);
10670 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10673 gcc_unreachable ();
10675 parts
[1] = gen_int_mode (l
[1], SImode
);
10676 parts
[0] = gen_int_mode (l
[0], SImode
);
10679 gcc_unreachable ();
10684 if (mode
== TImode
)
10685 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10686 if (mode
== XFmode
|| mode
== TFmode
)
10688 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10689 if (REG_P (operand
))
10691 gcc_assert (reload_completed
);
10692 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10693 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10695 else if (offsettable_memref_p (operand
))
10697 operand
= adjust_address (operand
, DImode
, 0);
10698 parts
[0] = operand
;
10699 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10701 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10706 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10707 real_to_target (l
, &r
, mode
);
10709 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10710 if (HOST_BITS_PER_WIDE_INT
>= 64)
10713 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10714 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10717 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10719 if (upper_mode
== SImode
)
10720 parts
[1] = gen_int_mode (l
[2], SImode
);
10721 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10724 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10725 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10728 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10731 gcc_unreachable ();
10738 /* Emit insns to perform a move or push of DI, DF, and XF values.
10739 Return false when normal moves are needed; true when all required
10740 insns have been emitted. Operands 2-4 contain the input values
10741 int the correct order; operands 5-7 contain the output values. */
10744 ix86_split_long_move (rtx operands
[])
10749 int collisions
= 0;
10750 enum machine_mode mode
= GET_MODE (operands
[0]);
10752 /* The DFmode expanders may ask us to move double.
10753 For 64bit target this is single move. By hiding the fact
10754 here we simplify i386.md splitters. */
10755 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10757 /* Optimize constant pool reference to immediates. This is used by
10758 fp moves, that force all constants to memory to allow combining. */
10760 if (GET_CODE (operands
[1]) == MEM
10761 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10762 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10763 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10764 if (push_operand (operands
[0], VOIDmode
))
10766 operands
[0] = copy_rtx (operands
[0]);
10767 PUT_MODE (operands
[0], Pmode
);
10770 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10771 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10772 emit_move_insn (operands
[0], operands
[1]);
10776 /* The only non-offsettable memory we handle is push. */
10777 if (push_operand (operands
[0], VOIDmode
))
10780 gcc_assert (GET_CODE (operands
[0]) != MEM
10781 || offsettable_memref_p (operands
[0]));
10783 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10784 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10786 /* When emitting push, take care for source operands on the stack. */
10787 if (push
&& GET_CODE (operands
[1]) == MEM
10788 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10791 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10792 XEXP (part
[1][2], 0));
10793 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10794 XEXP (part
[1][1], 0));
10797 /* We need to do copy in the right order in case an address register
10798 of the source overlaps the destination. */
10799 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10801 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10803 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10806 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10809 /* Collision in the middle part can be handled by reordering. */
10810 if (collisions
== 1 && nparts
== 3
10811 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10814 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10815 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10818 /* If there are more collisions, we can't handle it by reordering.
10819 Do an lea to the last part and use only one colliding move. */
10820 else if (collisions
> 1)
10826 base
= part
[0][nparts
- 1];
10828 /* Handle the case when the last part isn't valid for lea.
10829 Happens in 64-bit mode storing the 12-byte XFmode. */
10830 if (GET_MODE (base
) != Pmode
)
10831 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10833 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10834 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10835 part
[1][1] = replace_equiv_address (part
[1][1],
10836 plus_constant (base
, UNITS_PER_WORD
));
10838 part
[1][2] = replace_equiv_address (part
[1][2],
10839 plus_constant (base
, 8));
10849 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10850 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10851 emit_move_insn (part
[0][2], part
[1][2]);
10856 /* In 64bit mode we don't have 32bit push available. In case this is
10857 register, it is OK - we will just use larger counterpart. We also
10858 retype memory - these comes from attempt to avoid REX prefix on
10859 moving of second half of TFmode value. */
10860 if (GET_MODE (part
[1][1]) == SImode
)
10862 switch (GET_CODE (part
[1][1]))
10865 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10869 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10873 gcc_unreachable ();
10876 if (GET_MODE (part
[1][0]) == SImode
)
10877 part
[1][0] = part
[1][1];
10880 emit_move_insn (part
[0][1], part
[1][1]);
10881 emit_move_insn (part
[0][0], part
[1][0]);
10885 /* Choose correct order to not overwrite the source before it is copied. */
10886 if ((REG_P (part
[0][0])
10887 && REG_P (part
[1][1])
10888 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10890 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10892 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10896 operands
[2] = part
[0][2];
10897 operands
[3] = part
[0][1];
10898 operands
[4] = part
[0][0];
10899 operands
[5] = part
[1][2];
10900 operands
[6] = part
[1][1];
10901 operands
[7] = part
[1][0];
10905 operands
[2] = part
[0][1];
10906 operands
[3] = part
[0][0];
10907 operands
[5] = part
[1][1];
10908 operands
[6] = part
[1][0];
10915 operands
[2] = part
[0][0];
10916 operands
[3] = part
[0][1];
10917 operands
[4] = part
[0][2];
10918 operands
[5] = part
[1][0];
10919 operands
[6] = part
[1][1];
10920 operands
[7] = part
[1][2];
10924 operands
[2] = part
[0][0];
10925 operands
[3] = part
[0][1];
10926 operands
[5] = part
[1][0];
10927 operands
[6] = part
[1][1];
10931 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10934 if (GET_CODE (operands
[5]) == CONST_INT
10935 && operands
[5] != const0_rtx
10936 && REG_P (operands
[2]))
10938 if (GET_CODE (operands
[6]) == CONST_INT
10939 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
10940 operands
[6] = operands
[2];
10943 && GET_CODE (operands
[7]) == CONST_INT
10944 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
10945 operands
[7] = operands
[2];
10949 && GET_CODE (operands
[6]) == CONST_INT
10950 && operands
[6] != const0_rtx
10951 && REG_P (operands
[3])
10952 && GET_CODE (operands
[7]) == CONST_INT
10953 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
10954 operands
[7] = operands
[3];
10957 emit_move_insn (operands
[2], operands
[5]);
10958 emit_move_insn (operands
[3], operands
[6]);
10960 emit_move_insn (operands
[4], operands
[7]);
10965 /* Helper function of ix86_split_ashldi used to generate an SImode
10966 left shift by a constant, either using a single shift or
10967 a sequence of add instructions. */
10970 ix86_expand_ashlsi3_const (rtx operand
, int count
)
10973 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10974 else if (!optimize_size
10975 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
10978 for (i
=0; i
<count
; i
++)
10979 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10982 emit_insn (gen_ashlsi3 (operand
, operand
, GEN_INT (count
)));
10986 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10988 rtx low
[2], high
[2];
10991 if (GET_CODE (operands
[2]) == CONST_INT
)
10993 split_di (operands
, 2, low
, high
);
10994 count
= INTVAL (operands
[2]) & 63;
10998 emit_move_insn (high
[0], low
[1]);
10999 emit_move_insn (low
[0], const0_rtx
);
11002 ix86_expand_ashlsi3_const (high
[0], count
- 32);
11006 if (!rtx_equal_p (operands
[0], operands
[1]))
11007 emit_move_insn (operands
[0], operands
[1]);
11008 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
11009 ix86_expand_ashlsi3_const (low
[0], count
);
11014 split_di (operands
, 1, low
, high
);
11016 if (operands
[1] == const1_rtx
)
11018 /* Assuming we've chosen a QImode capable registers, then 1LL << N
11019 can be done with two 32-bit shifts, no branches, no cmoves. */
11020 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
11022 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
11024 ix86_expand_clear (low
[0]);
11025 ix86_expand_clear (high
[0]);
11026 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (32)));
11028 d
= gen_lowpart (QImode
, low
[0]);
11029 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
11030 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
11031 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
11033 d
= gen_lowpart (QImode
, high
[0]);
11034 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
11035 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
11036 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
11039 /* Otherwise, we can get the same results by manually performing
11040 a bit extract operation on bit 5, and then performing the two
11041 shifts. The two methods of getting 0/1 into low/high are exactly
11042 the same size. Avoiding the shift in the bit extract case helps
11043 pentium4 a bit; no one else seems to care much either way. */
11048 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
11049 x
= gen_rtx_ZERO_EXTEND (SImode
, operands
[2]);
11051 x
= gen_lowpart (SImode
, operands
[2]);
11052 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
11054 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (5)));
11055 emit_insn (gen_andsi3 (high
[0], high
[0], GEN_INT (1)));
11056 emit_move_insn (low
[0], high
[0]);
11057 emit_insn (gen_xorsi3 (low
[0], low
[0], GEN_INT (1)));
11060 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
11061 emit_insn (gen_ashlsi3 (high
[0], high
[0], operands
[2]));
11065 if (operands
[1] == constm1_rtx
)
11067 /* For -1LL << N, we can avoid the shld instruction, because we
11068 know that we're shifting 0...31 ones into a -1. */
11069 emit_move_insn (low
[0], constm1_rtx
);
11071 emit_move_insn (high
[0], low
[0]);
11073 emit_move_insn (high
[0], constm1_rtx
);
11077 if (!rtx_equal_p (operands
[0], operands
[1]))
11078 emit_move_insn (operands
[0], operands
[1]);
11080 split_di (operands
, 1, low
, high
);
11081 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
11084 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
11086 if (TARGET_CMOVE
&& scratch
)
11088 ix86_expand_clear (scratch
);
11089 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
11092 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
11096 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
11098 rtx low
[2], high
[2];
11101 if (GET_CODE (operands
[2]) == CONST_INT
)
11103 split_di (operands
, 2, low
, high
);
11104 count
= INTVAL (operands
[2]) & 63;
11108 emit_move_insn (high
[0], high
[1]);
11109 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
11110 emit_move_insn (low
[0], high
[0]);
11113 else if (count
>= 32)
11115 emit_move_insn (low
[0], high
[1]);
11116 emit_move_insn (high
[0], low
[0]);
11117 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
11119 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
11123 if (!rtx_equal_p (operands
[0], operands
[1]))
11124 emit_move_insn (operands
[0], operands
[1]);
11125 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
11126 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
11131 if (!rtx_equal_p (operands
[0], operands
[1]))
11132 emit_move_insn (operands
[0], operands
[1]);
11134 split_di (operands
, 1, low
, high
);
11136 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
11137 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
11139 if (TARGET_CMOVE
&& scratch
)
11141 emit_move_insn (scratch
, high
[0]);
11142 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
11143 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11147 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
11152 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
11154 rtx low
[2], high
[2];
11157 if (GET_CODE (operands
[2]) == CONST_INT
)
11159 split_di (operands
, 2, low
, high
);
11160 count
= INTVAL (operands
[2]) & 63;
11164 emit_move_insn (low
[0], high
[1]);
11165 ix86_expand_clear (high
[0]);
11168 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
11172 if (!rtx_equal_p (operands
[0], operands
[1]))
11173 emit_move_insn (operands
[0], operands
[1]);
11174 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
11175 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
11180 if (!rtx_equal_p (operands
[0], operands
[1]))
11181 emit_move_insn (operands
[0], operands
[1]);
11183 split_di (operands
, 1, low
, high
);
11185 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
11186 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
11188 /* Heh. By reversing the arguments, we can reuse this pattern. */
11189 if (TARGET_CMOVE
&& scratch
)
11191 ix86_expand_clear (scratch
);
11192 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11196 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
11200 /* Helper function for the string operations below. Dest VARIABLE whether
11201 it is aligned to VALUE bytes. If true, jump to the label. */
11203 ix86_expand_aligntest (rtx variable
, int value
)
11205 rtx label
= gen_label_rtx ();
11206 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
11207 if (GET_MODE (variable
) == DImode
)
11208 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
11210 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
11211 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
11216 /* Adjust COUNTER by the VALUE. */
11218 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
11220 if (GET_MODE (countreg
) == DImode
)
11221 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
11223 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
11226 /* Zero extend possibly SImode EXP to Pmode register. */
11228 ix86_zero_extend_to_Pmode (rtx exp
)
11231 if (GET_MODE (exp
) == VOIDmode
)
11232 return force_reg (Pmode
, exp
);
11233 if (GET_MODE (exp
) == Pmode
)
11234 return copy_to_mode_reg (Pmode
, exp
);
11235 r
= gen_reg_rtx (Pmode
);
11236 emit_insn (gen_zero_extendsidi2 (r
, exp
));
11240 /* Expand string move (memcpy) operation. Use i386 string operations when
11241 profitable. expand_clrmem contains similar code. */
11243 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
11245 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
11246 enum machine_mode counter_mode
;
11247 HOST_WIDE_INT align
= 0;
11248 unsigned HOST_WIDE_INT count
= 0;
11250 if (GET_CODE (align_exp
) == CONST_INT
)
11251 align
= INTVAL (align_exp
);
11253 /* Can't use any of this if the user has appropriated esi or edi. */
11254 if (global_regs
[4] || global_regs
[5])
11257 /* This simple hack avoids all inlining code and simplifies code below. */
11258 if (!TARGET_ALIGN_STRINGOPS
)
11261 if (GET_CODE (count_exp
) == CONST_INT
)
11263 count
= INTVAL (count_exp
);
11264 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11268 /* Figure out proper mode for counter. For 32bits it is always SImode,
11269 for 64bits use SImode when possible, otherwise DImode.
11270 Set count to number of bytes copied when known at compile time. */
11272 || GET_MODE (count_exp
) == SImode
11273 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
11274 counter_mode
= SImode
;
11276 counter_mode
= DImode
;
11278 gcc_assert (counter_mode
== SImode
|| counter_mode
== DImode
);
11280 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11281 if (destreg
!= XEXP (dst
, 0))
11282 dst
= replace_equiv_address_nv (dst
, destreg
);
11283 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11284 if (srcreg
!= XEXP (src
, 0))
11285 src
= replace_equiv_address_nv (src
, srcreg
);
11287 /* When optimizing for size emit simple rep ; movsb instruction for
11288 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11289 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11290 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11291 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11292 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11293 known to be zero or not. The rep; movsb sequence causes higher
11294 register preasure though, so take that into account. */
11296 if ((!optimize
|| optimize_size
)
11301 || (count
& 3) + count
/ 4 > 6))))
11303 emit_insn (gen_cld ());
11304 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11305 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11306 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
11307 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
11311 /* For constant aligned (or small unaligned) copies use rep movsl
11312 followed by code copying the rest. For PentiumPro ensure 8 byte
11313 alignment to allow rep movsl acceleration. */
11315 else if (count
!= 0
11317 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11318 || optimize_size
|| count
< (unsigned int) 64))
11320 unsigned HOST_WIDE_INT offset
= 0;
11321 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11322 rtx srcmem
, dstmem
;
11324 emit_insn (gen_cld ());
11325 if (count
& ~(size
- 1))
11327 if ((TARGET_SINGLE_STRINGOP
|| optimize_size
) && count
< 5 * 4)
11329 enum machine_mode movs_mode
= size
== 4 ? SImode
: DImode
;
11331 while (offset
< (count
& ~(size
- 1)))
11333 srcmem
= adjust_automodify_address_nv (src
, movs_mode
,
11335 dstmem
= adjust_automodify_address_nv (dst
, movs_mode
,
11337 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11343 countreg
= GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11344 & (TARGET_64BIT
? -1 : 0x3fffffff));
11345 countreg
= copy_to_mode_reg (counter_mode
, countreg
);
11346 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11348 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11349 GEN_INT (size
== 4 ? 2 : 3));
11350 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11351 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11353 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11354 countreg
, destexp
, srcexp
));
11355 offset
= count
& ~(size
- 1);
11358 if (size
== 8 && (count
& 0x04))
11360 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
11362 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11364 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11369 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
11371 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11373 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11378 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
11380 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11382 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11385 /* The generic code based on the glibc implementation:
11386 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11387 allowing accelerated copying there)
11388 - copy the data using rep movsl
11389 - copy the rest. */
11394 rtx srcmem
, dstmem
;
11395 int desired_alignment
= (TARGET_PENTIUMPRO
11396 && (count
== 0 || count
>= (unsigned int) 260)
11397 ? 8 : UNITS_PER_WORD
);
11398 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11399 dst
= change_address (dst
, BLKmode
, destreg
);
11400 src
= change_address (src
, BLKmode
, srcreg
);
11402 /* In case we don't know anything about the alignment, default to
11403 library version, since it is usually equally fast and result in
11406 Also emit call when we know that the count is large and call overhead
11407 will not be important. */
11408 if (!TARGET_INLINE_ALL_STRINGOPS
11409 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11412 if (TARGET_SINGLE_STRINGOP
)
11413 emit_insn (gen_cld ());
11415 countreg2
= gen_reg_rtx (Pmode
);
11416 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11418 /* We don't use loops to align destination and to copy parts smaller
11419 than 4 bytes, because gcc is able to optimize such code better (in
11420 the case the destination or the count really is aligned, gcc is often
11421 able to predict the branches) and also it is friendlier to the
11422 hardware branch prediction.
11424 Using loops is beneficial for generic case, because we can
11425 handle small counts using the loops. Many CPUs (such as Athlon)
11426 have large REP prefix setup costs.
11428 This is quite costly. Maybe we can revisit this decision later or
11429 add some customizability to this code. */
11431 if (count
== 0 && align
< desired_alignment
)
11433 label
= gen_label_rtx ();
11434 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11435 LEU
, 0, counter_mode
, 1, label
);
11439 rtx label
= ix86_expand_aligntest (destreg
, 1);
11440 srcmem
= change_address (src
, QImode
, srcreg
);
11441 dstmem
= change_address (dst
, QImode
, destreg
);
11442 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11443 ix86_adjust_counter (countreg
, 1);
11444 emit_label (label
);
11445 LABEL_NUSES (label
) = 1;
11449 rtx label
= ix86_expand_aligntest (destreg
, 2);
11450 srcmem
= change_address (src
, HImode
, srcreg
);
11451 dstmem
= change_address (dst
, HImode
, destreg
);
11452 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11453 ix86_adjust_counter (countreg
, 2);
11454 emit_label (label
);
11455 LABEL_NUSES (label
) = 1;
11457 if (align
<= 4 && desired_alignment
> 4)
11459 rtx label
= ix86_expand_aligntest (destreg
, 4);
11460 srcmem
= change_address (src
, SImode
, srcreg
);
11461 dstmem
= change_address (dst
, SImode
, destreg
);
11462 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11463 ix86_adjust_counter (countreg
, 4);
11464 emit_label (label
);
11465 LABEL_NUSES (label
) = 1;
11468 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11470 emit_label (label
);
11471 LABEL_NUSES (label
) = 1;
11474 if (!TARGET_SINGLE_STRINGOP
)
11475 emit_insn (gen_cld ());
11478 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11480 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11484 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11485 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11487 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11488 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11489 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11490 countreg2
, destexp
, srcexp
));
11494 emit_label (label
);
11495 LABEL_NUSES (label
) = 1;
11497 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11499 srcmem
= change_address (src
, SImode
, srcreg
);
11500 dstmem
= change_address (dst
, SImode
, destreg
);
11501 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11503 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
11505 rtx label
= ix86_expand_aligntest (countreg
, 4);
11506 srcmem
= change_address (src
, SImode
, srcreg
);
11507 dstmem
= change_address (dst
, SImode
, destreg
);
11508 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11509 emit_label (label
);
11510 LABEL_NUSES (label
) = 1;
11512 if (align
> 2 && count
!= 0 && (count
& 2))
11514 srcmem
= change_address (src
, HImode
, srcreg
);
11515 dstmem
= change_address (dst
, HImode
, destreg
);
11516 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11518 if (align
<= 2 || count
== 0)
11520 rtx label
= ix86_expand_aligntest (countreg
, 2);
11521 srcmem
= change_address (src
, HImode
, srcreg
);
11522 dstmem
= change_address (dst
, HImode
, destreg
);
11523 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11524 emit_label (label
);
11525 LABEL_NUSES (label
) = 1;
11527 if (align
> 1 && count
!= 0 && (count
& 1))
11529 srcmem
= change_address (src
, QImode
, srcreg
);
11530 dstmem
= change_address (dst
, QImode
, destreg
);
11531 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11533 if (align
<= 1 || count
== 0)
11535 rtx label
= ix86_expand_aligntest (countreg
, 1);
11536 srcmem
= change_address (src
, QImode
, srcreg
);
11537 dstmem
= change_address (dst
, QImode
, destreg
);
11538 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11539 emit_label (label
);
11540 LABEL_NUSES (label
) = 1;
11547 /* Expand string clear operation (bzero). Use i386 string operations when
11548 profitable. expand_movmem contains similar code. */
11550 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
11552 rtx destreg
, zeroreg
, countreg
, destexp
;
11553 enum machine_mode counter_mode
;
11554 HOST_WIDE_INT align
= 0;
11555 unsigned HOST_WIDE_INT count
= 0;
11557 if (GET_CODE (align_exp
) == CONST_INT
)
11558 align
= INTVAL (align_exp
);
11560 /* Can't use any of this if the user has appropriated esi. */
11561 if (global_regs
[4])
11564 /* This simple hack avoids all inlining code and simplifies code below. */
11565 if (!TARGET_ALIGN_STRINGOPS
)
11568 if (GET_CODE (count_exp
) == CONST_INT
)
11570 count
= INTVAL (count_exp
);
11571 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11574 /* Figure out proper mode for counter. For 32bits it is always SImode,
11575 for 64bits use SImode when possible, otherwise DImode.
11576 Set count to number of bytes copied when known at compile time. */
11578 || GET_MODE (count_exp
) == SImode
11579 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
11580 counter_mode
= SImode
;
11582 counter_mode
= DImode
;
11584 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11585 if (destreg
!= XEXP (dst
, 0))
11586 dst
= replace_equiv_address_nv (dst
, destreg
);
11589 /* When optimizing for size emit simple rep ; movsb instruction for
11590 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11591 sequence is 7 bytes long, so if optimizing for size and count is
11592 small enough that some stosl, stosw and stosb instructions without
11593 rep are shorter, fall back into the next if. */
11595 if ((!optimize
|| optimize_size
)
11598 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
11600 emit_insn (gen_cld ());
11602 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11603 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11604 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11605 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11607 else if (count
!= 0
11609 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11610 || optimize_size
|| count
< (unsigned int) 64))
11612 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11613 unsigned HOST_WIDE_INT offset
= 0;
11615 emit_insn (gen_cld ());
11617 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11618 if (count
& ~(size
- 1))
11620 unsigned HOST_WIDE_INT repcount
;
11621 unsigned int max_nonrep
;
11623 repcount
= count
>> (size
== 4 ? 2 : 3);
11625 repcount
&= 0x3fffffff;
11627 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11628 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11629 bytes. In both cases the latter seems to be faster for small
11631 max_nonrep
= size
== 4 ? 7 : 4;
11632 if (!optimize_size
)
11635 case PROCESSOR_PENTIUM4
:
11636 case PROCESSOR_NOCONA
:
11643 if (repcount
<= max_nonrep
)
11644 while (repcount
-- > 0)
11646 rtx mem
= adjust_automodify_address_nv (dst
,
11647 GET_MODE (zeroreg
),
11649 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
11654 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
11655 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11656 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11657 GEN_INT (size
== 4 ? 2 : 3));
11658 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11659 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
11661 offset
= count
& ~(size
- 1);
11664 if (size
== 8 && (count
& 0x04))
11666 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11668 emit_insn (gen_strset (destreg
, mem
,
11669 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11674 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11676 emit_insn (gen_strset (destreg
, mem
,
11677 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11682 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11684 emit_insn (gen_strset (destreg
, mem
,
11685 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11692 /* Compute desired alignment of the string operation. */
11693 int desired_alignment
= (TARGET_PENTIUMPRO
11694 && (count
== 0 || count
>= (unsigned int) 260)
11695 ? 8 : UNITS_PER_WORD
);
11697 /* In case we don't know anything about the alignment, default to
11698 library version, since it is usually equally fast and result in
11701 Also emit call when we know that the count is large and call overhead
11702 will not be important. */
11703 if (!TARGET_INLINE_ALL_STRINGOPS
11704 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11707 if (TARGET_SINGLE_STRINGOP
)
11708 emit_insn (gen_cld ());
11710 countreg2
= gen_reg_rtx (Pmode
);
11711 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11712 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11713 /* Get rid of MEM_OFFSET, it won't be accurate. */
11714 dst
= change_address (dst
, BLKmode
, destreg
);
11716 if (count
== 0 && align
< desired_alignment
)
11718 label
= gen_label_rtx ();
11719 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11720 LEU
, 0, counter_mode
, 1, label
);
11724 rtx label
= ix86_expand_aligntest (destreg
, 1);
11725 emit_insn (gen_strset (destreg
, dst
,
11726 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11727 ix86_adjust_counter (countreg
, 1);
11728 emit_label (label
);
11729 LABEL_NUSES (label
) = 1;
11733 rtx label
= ix86_expand_aligntest (destreg
, 2);
11734 emit_insn (gen_strset (destreg
, dst
,
11735 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11736 ix86_adjust_counter (countreg
, 2);
11737 emit_label (label
);
11738 LABEL_NUSES (label
) = 1;
11740 if (align
<= 4 && desired_alignment
> 4)
11742 rtx label
= ix86_expand_aligntest (destreg
, 4);
11743 emit_insn (gen_strset (destreg
, dst
,
11745 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11747 ix86_adjust_counter (countreg
, 4);
11748 emit_label (label
);
11749 LABEL_NUSES (label
) = 1;
11752 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11754 emit_label (label
);
11755 LABEL_NUSES (label
) = 1;
11759 if (!TARGET_SINGLE_STRINGOP
)
11760 emit_insn (gen_cld ());
11763 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11765 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11769 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11770 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11772 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11773 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11777 emit_label (label
);
11778 LABEL_NUSES (label
) = 1;
11781 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11782 emit_insn (gen_strset (destreg
, dst
,
11783 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11784 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11786 rtx label
= ix86_expand_aligntest (countreg
, 4);
11787 emit_insn (gen_strset (destreg
, dst
,
11788 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11789 emit_label (label
);
11790 LABEL_NUSES (label
) = 1;
11792 if (align
> 2 && count
!= 0 && (count
& 2))
11793 emit_insn (gen_strset (destreg
, dst
,
11794 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11795 if (align
<= 2 || count
== 0)
11797 rtx label
= ix86_expand_aligntest (countreg
, 2);
11798 emit_insn (gen_strset (destreg
, dst
,
11799 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11800 emit_label (label
);
11801 LABEL_NUSES (label
) = 1;
11803 if (align
> 1 && count
!= 0 && (count
& 1))
11804 emit_insn (gen_strset (destreg
, dst
,
11805 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11806 if (align
<= 1 || count
== 0)
11808 rtx label
= ix86_expand_aligntest (countreg
, 1);
11809 emit_insn (gen_strset (destreg
, dst
,
11810 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11811 emit_label (label
);
11812 LABEL_NUSES (label
) = 1;
11818 /* Expand strlen. */
11820 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11822 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11824 /* The generic case of strlen expander is long. Avoid it's
11825 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11827 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11828 && !TARGET_INLINE_ALL_STRINGOPS
11830 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11833 addr
= force_reg (Pmode
, XEXP (src
, 0));
11834 scratch1
= gen_reg_rtx (Pmode
);
11836 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11839 /* Well it seems that some optimizer does not combine a call like
11840 foo(strlen(bar), strlen(bar));
11841 when the move and the subtraction is done here. It does calculate
11842 the length just once when these instructions are done inside of
11843 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11844 often used and I use one fewer register for the lifetime of
11845 output_strlen_unroll() this is better. */
11847 emit_move_insn (out
, addr
);
11849 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11851 /* strlensi_unroll_1 returns the address of the zero at the end of
11852 the string, like memchr(), so compute the length by subtracting
11853 the start address. */
11855 emit_insn (gen_subdi3 (out
, out
, addr
));
11857 emit_insn (gen_subsi3 (out
, out
, addr
));
11862 scratch2
= gen_reg_rtx (Pmode
);
11863 scratch3
= gen_reg_rtx (Pmode
);
11864 scratch4
= force_reg (Pmode
, constm1_rtx
);
11866 emit_move_insn (scratch3
, addr
);
11867 eoschar
= force_reg (QImode
, eoschar
);
11869 emit_insn (gen_cld ());
11870 src
= replace_equiv_address_nv (src
, scratch3
);
11872 /* If .md starts supporting :P, this can be done in .md. */
11873 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11874 scratch4
), UNSPEC_SCAS
);
11875 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11878 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11879 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11883 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11884 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11890 /* Expand the appropriate insns for doing strlen if not just doing
11893 out = result, initialized with the start address
11894 align_rtx = alignment of the address.
11895 scratch = scratch register, initialized with the startaddress when
11896 not aligned, otherwise undefined
11898 This is just the body. It needs the initializations mentioned above and
11899 some address computing at the end. These things are done in i386.md. */
11902 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11906 rtx align_2_label
= NULL_RTX
;
11907 rtx align_3_label
= NULL_RTX
;
11908 rtx align_4_label
= gen_label_rtx ();
11909 rtx end_0_label
= gen_label_rtx ();
11911 rtx tmpreg
= gen_reg_rtx (SImode
);
11912 rtx scratch
= gen_reg_rtx (SImode
);
11916 if (GET_CODE (align_rtx
) == CONST_INT
)
11917 align
= INTVAL (align_rtx
);
11919 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11921 /* Is there a known alignment and is it less than 4? */
11924 rtx scratch1
= gen_reg_rtx (Pmode
);
11925 emit_move_insn (scratch1
, out
);
11926 /* Is there a known alignment and is it not 2? */
11929 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11930 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11932 /* Leave just the 3 lower bits. */
11933 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11934 NULL_RTX
, 0, OPTAB_WIDEN
);
11936 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11937 Pmode
, 1, align_4_label
);
11938 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
11939 Pmode
, 1, align_2_label
);
11940 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
11941 Pmode
, 1, align_3_label
);
11945 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11946 check if is aligned to 4 - byte. */
11948 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
11949 NULL_RTX
, 0, OPTAB_WIDEN
);
11951 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11952 Pmode
, 1, align_4_label
);
11955 mem
= change_address (src
, QImode
, out
);
11957 /* Now compare the bytes. */
11959 /* Compare the first n unaligned byte on a byte per byte basis. */
11960 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11961 QImode
, 1, end_0_label
);
11963 /* Increment the address. */
11965 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11967 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11969 /* Not needed with an alignment of 2 */
11972 emit_label (align_2_label
);
11974 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11978 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11980 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11982 emit_label (align_3_label
);
11985 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11989 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11991 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11994 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11995 align this loop. It gives only huge programs, but does not help to
11997 emit_label (align_4_label
);
11999 mem
= change_address (src
, SImode
, out
);
12000 emit_move_insn (scratch
, mem
);
12002 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
12004 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
12006 /* This formula yields a nonzero result iff one of the bytes is zero.
12007 This saves three branches inside loop and many cycles. */
12009 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
12010 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
12011 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
12012 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
12013 gen_int_mode (0x80808080, SImode
)));
12014 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
12019 rtx reg
= gen_reg_rtx (SImode
);
12020 rtx reg2
= gen_reg_rtx (Pmode
);
12021 emit_move_insn (reg
, tmpreg
);
12022 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
12024 /* If zero is not in the first two bytes, move two bytes forward. */
12025 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
12026 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
12027 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
12028 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
12029 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
12032 /* Emit lea manually to avoid clobbering of flags. */
12033 emit_insn (gen_rtx_SET (SImode
, reg2
,
12034 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
12036 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
12037 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
12038 emit_insn (gen_rtx_SET (VOIDmode
, out
,
12039 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
12046 rtx end_2_label
= gen_label_rtx ();
12047 /* Is zero in the first two bytes? */
12049 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
12050 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
12051 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
12052 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
12053 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
12055 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
12056 JUMP_LABEL (tmp
) = end_2_label
;
12058 /* Not in the first two. Move two bytes forward. */
12059 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
12061 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
12063 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
12065 emit_label (end_2_label
);
12069 /* Avoid branch in fixing the byte. */
12070 tmpreg
= gen_lowpart (QImode
, tmpreg
);
12071 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
12072 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
12074 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
12076 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
12078 emit_label (end_0_label
);
12082 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
12083 rtx callarg2 ATTRIBUTE_UNUSED
,
12084 rtx pop
, int sibcall
)
12086 rtx use
= NULL
, call
;
12088 if (pop
== const0_rtx
)
12090 gcc_assert (!TARGET_64BIT
|| !pop
);
12093 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
12094 fnaddr
= machopic_indirect_call_target (fnaddr
);
12096 /* Static functions and indirect calls don't need the pic register. */
12097 if (! TARGET_64BIT
&& flag_pic
12098 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
12099 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
12100 use_reg (&use
, pic_offset_table_rtx
);
12102 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
12104 rtx al
= gen_rtx_REG (QImode
, 0);
12105 emit_move_insn (al
, callarg2
);
12106 use_reg (&use
, al
);
12108 #endif /* TARGET_MACHO */
12110 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
12112 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
12113 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
12115 if (sibcall
&& TARGET_64BIT
12116 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
12119 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
12120 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
12121 emit_move_insn (fnaddr
, addr
);
12122 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
12125 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
12127 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
12130 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
12131 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
12132 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
12135 call
= emit_call_insn (call
);
12137 CALL_INSN_FUNCTION_USAGE (call
) = use
;
12141 /* Clear stack slot assignments remembered from previous functions.
12142 This is called from INIT_EXPANDERS once before RTL is emitted for each
12145 static struct machine_function
*
12146 ix86_init_machine_status (void)
12148 struct machine_function
*f
;
12150 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
12151 f
->use_fast_prologue_epilogue_nregs
= -1;
12156 /* Return a MEM corresponding to a stack slot with mode MODE.
12157 Allocate a new slot if necessary.
12159 The RTL for a function can have several slots available: N is
12160 which slot to use. */
12163 assign_386_stack_local (enum machine_mode mode
, int n
)
12165 struct stack_local_entry
*s
;
12167 gcc_assert (n
>= 0 && n
< MAX_386_STACK_LOCALS
);
12169 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
12170 if (s
->mode
== mode
&& s
->n
== n
)
12173 s
= (struct stack_local_entry
*)
12174 ggc_alloc (sizeof (struct stack_local_entry
));
12177 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
12179 s
->next
= ix86_stack_locals
;
12180 ix86_stack_locals
= s
;
12184 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12186 static GTY(()) rtx ix86_tls_symbol
;
12188 ix86_tls_get_addr (void)
12191 if (!ix86_tls_symbol
)
12193 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
12194 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
12195 ? "___tls_get_addr"
12196 : "__tls_get_addr");
12199 return ix86_tls_symbol
;
12202 /* Calculate the length of the memory address in the instruction
12203 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12206 memory_address_length (rtx addr
)
12208 struct ix86_address parts
;
12209 rtx base
, index
, disp
;
12213 if (GET_CODE (addr
) == PRE_DEC
12214 || GET_CODE (addr
) == POST_INC
12215 || GET_CODE (addr
) == PRE_MODIFY
12216 || GET_CODE (addr
) == POST_MODIFY
)
12219 ok
= ix86_decompose_address (addr
, &parts
);
12222 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12223 parts
.base
= SUBREG_REG (parts
.base
);
12224 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12225 parts
.index
= SUBREG_REG (parts
.index
);
12228 index
= parts
.index
;
12233 - esp as the base always wants an index,
12234 - ebp as the base always wants a displacement. */
12236 /* Register Indirect. */
12237 if (base
&& !index
&& !disp
)
12239 /* esp (for its index) and ebp (for its displacement) need
12240 the two-byte modrm form. */
12241 if (addr
== stack_pointer_rtx
12242 || addr
== arg_pointer_rtx
12243 || addr
== frame_pointer_rtx
12244 || addr
== hard_frame_pointer_rtx
)
12248 /* Direct Addressing. */
12249 else if (disp
&& !base
&& !index
)
12254 /* Find the length of the displacement constant. */
12257 if (GET_CODE (disp
) == CONST_INT
12258 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
12264 /* ebp always wants a displacement. */
12265 else if (base
== hard_frame_pointer_rtx
)
12268 /* An index requires the two-byte modrm form.... */
12270 /* ...like esp, which always wants an index. */
12271 || base
== stack_pointer_rtx
12272 || base
== arg_pointer_rtx
12273 || base
== frame_pointer_rtx
)
12280 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12281 is set, expect that insn have 8bit immediate alternative. */
12283 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
12287 extract_insn_cached (insn
);
12288 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12289 if (CONSTANT_P (recog_data
.operand
[i
]))
12293 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
12294 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
12298 switch (get_attr_mode (insn
))
12309 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12314 fatal_insn ("unknown insn mode", insn
);
12320 /* Compute default value for "length_address" attribute. */
12322 ix86_attr_length_address_default (rtx insn
)
12326 if (get_attr_type (insn
) == TYPE_LEA
)
12328 rtx set
= PATTERN (insn
);
12330 if (GET_CODE (set
) == PARALLEL
)
12331 set
= XVECEXP (set
, 0, 0);
12333 gcc_assert (GET_CODE (set
) == SET
);
12335 return memory_address_length (SET_SRC (set
));
12338 extract_insn_cached (insn
);
12339 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12340 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12342 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
12348 /* Return the maximum number of instructions a cpu can issue. */
12351 ix86_issue_rate (void)
12355 case PROCESSOR_PENTIUM
:
12359 case PROCESSOR_PENTIUMPRO
:
12360 case PROCESSOR_PENTIUM4
:
12361 case PROCESSOR_ATHLON
:
12363 case PROCESSOR_NOCONA
:
12371 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12372 by DEP_INSN and nothing set by DEP_INSN. */
12375 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12379 /* Simplify the test for uninteresting insns. */
12380 if (insn_type
!= TYPE_SETCC
12381 && insn_type
!= TYPE_ICMOV
12382 && insn_type
!= TYPE_FCMOV
12383 && insn_type
!= TYPE_IBR
)
12386 if ((set
= single_set (dep_insn
)) != 0)
12388 set
= SET_DEST (set
);
12391 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
12392 && XVECLEN (PATTERN (dep_insn
), 0) == 2
12393 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
12394 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
12396 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12397 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12402 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
12405 /* This test is true if the dependent insn reads the flags but
12406 not any other potentially set register. */
12407 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
12410 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
12416 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12417 address with operands set by DEP_INSN. */
12420 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12424 if (insn_type
== TYPE_LEA
12427 addr
= PATTERN (insn
);
12429 if (GET_CODE (addr
) == PARALLEL
)
12430 addr
= XVECEXP (addr
, 0, 0);
12432 gcc_assert (GET_CODE (addr
) == SET
);
12434 addr
= SET_SRC (addr
);
12439 extract_insn_cached (insn
);
12440 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12441 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12443 addr
= XEXP (recog_data
.operand
[i
], 0);
12450 return modified_in_p (addr
, dep_insn
);
12454 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
12456 enum attr_type insn_type
, dep_insn_type
;
12457 enum attr_memory memory
;
12459 int dep_insn_code_number
;
12461 /* Anti and output dependencies have zero cost on all CPUs. */
12462 if (REG_NOTE_KIND (link
) != 0)
12465 dep_insn_code_number
= recog_memoized (dep_insn
);
12467 /* If we can't recognize the insns, we can't really do anything. */
12468 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
12471 insn_type
= get_attr_type (insn
);
12472 dep_insn_type
= get_attr_type (dep_insn
);
12476 case PROCESSOR_PENTIUM
:
12477 /* Address Generation Interlock adds a cycle of latency. */
12478 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12481 /* ??? Compares pair with jump/setcc. */
12482 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
12485 /* Floating point stores require value to be ready one cycle earlier. */
12486 if (insn_type
== TYPE_FMOV
12487 && get_attr_memory (insn
) == MEMORY_STORE
12488 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12492 case PROCESSOR_PENTIUMPRO
:
12493 memory
= get_attr_memory (insn
);
12495 /* INT->FP conversion is expensive. */
12496 if (get_attr_fp_int_src (dep_insn
))
12499 /* There is one cycle extra latency between an FP op and a store. */
12500 if (insn_type
== TYPE_FMOV
12501 && (set
= single_set (dep_insn
)) != NULL_RTX
12502 && (set2
= single_set (insn
)) != NULL_RTX
12503 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
12504 && GET_CODE (SET_DEST (set2
)) == MEM
)
12507 /* Show ability of reorder buffer to hide latency of load by executing
12508 in parallel with previous instruction in case
12509 previous instruction is not needed to compute the address. */
12510 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12511 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12513 /* Claim moves to take one cycle, as core can issue one load
12514 at time and the next load can start cycle later. */
12515 if (dep_insn_type
== TYPE_IMOV
12516 || dep_insn_type
== TYPE_FMOV
)
12524 memory
= get_attr_memory (insn
);
12526 /* The esp dependency is resolved before the instruction is really
12528 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
12529 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
12532 /* INT->FP conversion is expensive. */
12533 if (get_attr_fp_int_src (dep_insn
))
12536 /* Show ability of reorder buffer to hide latency of load by executing
12537 in parallel with previous instruction in case
12538 previous instruction is not needed to compute the address. */
12539 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12540 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12542 /* Claim moves to take one cycle, as core can issue one load
12543 at time and the next load can start cycle later. */
12544 if (dep_insn_type
== TYPE_IMOV
12545 || dep_insn_type
== TYPE_FMOV
)
12554 case PROCESSOR_ATHLON
:
12556 memory
= get_attr_memory (insn
);
12558 /* Show ability of reorder buffer to hide latency of load by executing
12559 in parallel with previous instruction in case
12560 previous instruction is not needed to compute the address. */
12561 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12562 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12564 enum attr_unit unit
= get_attr_unit (insn
);
12567 /* Because of the difference between the length of integer and
12568 floating unit pipeline preparation stages, the memory operands
12569 for floating point are cheaper.
12571 ??? For Athlon it the difference is most probably 2. */
12572 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
12575 loadcost
= TARGET_ATHLON
? 2 : 0;
12577 if (cost
>= loadcost
)
12590 /* How many alternative schedules to try. This should be as wide as the
12591 scheduling freedom in the DFA, but no wider. Making this value too
12592 large results extra work for the scheduler. */
12595 ia32_multipass_dfa_lookahead (void)
12597 if (ix86_tune
== PROCESSOR_PENTIUM
)
12600 if (ix86_tune
== PROCESSOR_PENTIUMPRO
12601 || ix86_tune
== PROCESSOR_K6
)
12609 /* Compute the alignment given to a constant that is being placed in memory.
12610 EXP is the constant and ALIGN is the alignment that the object would
12612 The value of this function is used instead of that alignment to align
12616 ix86_constant_alignment (tree exp
, int align
)
12618 if (TREE_CODE (exp
) == REAL_CST
)
12620 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12622 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12625 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
12626 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
12627 return BITS_PER_WORD
;
12632 /* Compute the alignment for a static variable.
12633 TYPE is the data type, and ALIGN is the alignment that
12634 the object would ordinarily have. The value of this function is used
12635 instead of that alignment to align the object. */
12638 ix86_data_alignment (tree type
, int align
)
12640 if (AGGREGATE_TYPE_P (type
)
12641 && TYPE_SIZE (type
)
12642 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12643 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12644 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12647 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12648 to 16byte boundary. */
12651 if (AGGREGATE_TYPE_P (type
)
12652 && TYPE_SIZE (type
)
12653 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12654 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12655 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12659 if (TREE_CODE (type
) == ARRAY_TYPE
)
12661 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12663 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12666 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12669 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12671 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12674 else if ((TREE_CODE (type
) == RECORD_TYPE
12675 || TREE_CODE (type
) == UNION_TYPE
12676 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12677 && TYPE_FIELDS (type
))
12679 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12681 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12684 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12685 || TREE_CODE (type
) == INTEGER_TYPE
)
12687 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12689 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12696 /* Compute the alignment for a local variable.
12697 TYPE is the data type, and ALIGN is the alignment that
12698 the object would ordinarily have. The value of this macro is used
12699 instead of that alignment to align the object. */
12702 ix86_local_alignment (tree type
, int align
)
12704 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12705 to 16byte boundary. */
12708 if (AGGREGATE_TYPE_P (type
)
12709 && TYPE_SIZE (type
)
12710 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12711 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12712 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12715 if (TREE_CODE (type
) == ARRAY_TYPE
)
12717 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12719 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12722 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12724 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12726 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12729 else if ((TREE_CODE (type
) == RECORD_TYPE
12730 || TREE_CODE (type
) == UNION_TYPE
12731 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12732 && TYPE_FIELDS (type
))
12734 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12736 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12739 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12740 || TREE_CODE (type
) == INTEGER_TYPE
)
12743 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12745 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12751 /* Emit RTL insns to initialize the variable parts of a trampoline.
12752 FNADDR is an RTX for the address of the function's pure code.
12753 CXT is an RTX for the static chain value for the function. */
12755 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12759 /* Compute offset from the end of the jmp to the target function. */
12760 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12761 plus_constant (tramp
, 10),
12762 NULL_RTX
, 1, OPTAB_DIRECT
);
12763 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12764 gen_int_mode (0xb9, QImode
));
12765 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12766 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12767 gen_int_mode (0xe9, QImode
));
12768 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12773 /* Try to load address using shorter movl instead of movabs.
12774 We may want to support movq for kernel mode, but kernel does not use
12775 trampolines at the moment. */
12776 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
12778 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12779 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12780 gen_int_mode (0xbb41, HImode
));
12781 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12782 gen_lowpart (SImode
, fnaddr
));
12787 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12788 gen_int_mode (0xbb49, HImode
));
12789 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12793 /* Load static chain using movabs to r10. */
12794 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12795 gen_int_mode (0xba49, HImode
));
12796 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12799 /* Jump to the r11 */
12800 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12801 gen_int_mode (0xff49, HImode
));
12802 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12803 gen_int_mode (0xe3, QImode
));
12805 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
12808 #ifdef ENABLE_EXECUTE_STACK
12809 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
12810 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12814 /* Codes for all the SSE/MMX builtins. */
12817 IX86_BUILTIN_ADDPS
,
12818 IX86_BUILTIN_ADDSS
,
12819 IX86_BUILTIN_DIVPS
,
12820 IX86_BUILTIN_DIVSS
,
12821 IX86_BUILTIN_MULPS
,
12822 IX86_BUILTIN_MULSS
,
12823 IX86_BUILTIN_SUBPS
,
12824 IX86_BUILTIN_SUBSS
,
12826 IX86_BUILTIN_CMPEQPS
,
12827 IX86_BUILTIN_CMPLTPS
,
12828 IX86_BUILTIN_CMPLEPS
,
12829 IX86_BUILTIN_CMPGTPS
,
12830 IX86_BUILTIN_CMPGEPS
,
12831 IX86_BUILTIN_CMPNEQPS
,
12832 IX86_BUILTIN_CMPNLTPS
,
12833 IX86_BUILTIN_CMPNLEPS
,
12834 IX86_BUILTIN_CMPNGTPS
,
12835 IX86_BUILTIN_CMPNGEPS
,
12836 IX86_BUILTIN_CMPORDPS
,
12837 IX86_BUILTIN_CMPUNORDPS
,
12838 IX86_BUILTIN_CMPNEPS
,
12839 IX86_BUILTIN_CMPEQSS
,
12840 IX86_BUILTIN_CMPLTSS
,
12841 IX86_BUILTIN_CMPLESS
,
12842 IX86_BUILTIN_CMPNEQSS
,
12843 IX86_BUILTIN_CMPNLTSS
,
12844 IX86_BUILTIN_CMPNLESS
,
12845 IX86_BUILTIN_CMPNGTSS
,
12846 IX86_BUILTIN_CMPNGESS
,
12847 IX86_BUILTIN_CMPORDSS
,
12848 IX86_BUILTIN_CMPUNORDSS
,
12849 IX86_BUILTIN_CMPNESS
,
12851 IX86_BUILTIN_COMIEQSS
,
12852 IX86_BUILTIN_COMILTSS
,
12853 IX86_BUILTIN_COMILESS
,
12854 IX86_BUILTIN_COMIGTSS
,
12855 IX86_BUILTIN_COMIGESS
,
12856 IX86_BUILTIN_COMINEQSS
,
12857 IX86_BUILTIN_UCOMIEQSS
,
12858 IX86_BUILTIN_UCOMILTSS
,
12859 IX86_BUILTIN_UCOMILESS
,
12860 IX86_BUILTIN_UCOMIGTSS
,
12861 IX86_BUILTIN_UCOMIGESS
,
12862 IX86_BUILTIN_UCOMINEQSS
,
12864 IX86_BUILTIN_CVTPI2PS
,
12865 IX86_BUILTIN_CVTPS2PI
,
12866 IX86_BUILTIN_CVTSI2SS
,
12867 IX86_BUILTIN_CVTSI642SS
,
12868 IX86_BUILTIN_CVTSS2SI
,
12869 IX86_BUILTIN_CVTSS2SI64
,
12870 IX86_BUILTIN_CVTTPS2PI
,
12871 IX86_BUILTIN_CVTTSS2SI
,
12872 IX86_BUILTIN_CVTTSS2SI64
,
12874 IX86_BUILTIN_MAXPS
,
12875 IX86_BUILTIN_MAXSS
,
12876 IX86_BUILTIN_MINPS
,
12877 IX86_BUILTIN_MINSS
,
12879 IX86_BUILTIN_LOADUPS
,
12880 IX86_BUILTIN_STOREUPS
,
12881 IX86_BUILTIN_MOVSS
,
12883 IX86_BUILTIN_MOVHLPS
,
12884 IX86_BUILTIN_MOVLHPS
,
12885 IX86_BUILTIN_LOADHPS
,
12886 IX86_BUILTIN_LOADLPS
,
12887 IX86_BUILTIN_STOREHPS
,
12888 IX86_BUILTIN_STORELPS
,
12890 IX86_BUILTIN_MASKMOVQ
,
12891 IX86_BUILTIN_MOVMSKPS
,
12892 IX86_BUILTIN_PMOVMSKB
,
12894 IX86_BUILTIN_MOVNTPS
,
12895 IX86_BUILTIN_MOVNTQ
,
12897 IX86_BUILTIN_LOADDQU
,
12898 IX86_BUILTIN_STOREDQU
,
12900 IX86_BUILTIN_PACKSSWB
,
12901 IX86_BUILTIN_PACKSSDW
,
12902 IX86_BUILTIN_PACKUSWB
,
12904 IX86_BUILTIN_PADDB
,
12905 IX86_BUILTIN_PADDW
,
12906 IX86_BUILTIN_PADDD
,
12907 IX86_BUILTIN_PADDQ
,
12908 IX86_BUILTIN_PADDSB
,
12909 IX86_BUILTIN_PADDSW
,
12910 IX86_BUILTIN_PADDUSB
,
12911 IX86_BUILTIN_PADDUSW
,
12912 IX86_BUILTIN_PSUBB
,
12913 IX86_BUILTIN_PSUBW
,
12914 IX86_BUILTIN_PSUBD
,
12915 IX86_BUILTIN_PSUBQ
,
12916 IX86_BUILTIN_PSUBSB
,
12917 IX86_BUILTIN_PSUBSW
,
12918 IX86_BUILTIN_PSUBUSB
,
12919 IX86_BUILTIN_PSUBUSW
,
12922 IX86_BUILTIN_PANDN
,
12926 IX86_BUILTIN_PAVGB
,
12927 IX86_BUILTIN_PAVGW
,
12929 IX86_BUILTIN_PCMPEQB
,
12930 IX86_BUILTIN_PCMPEQW
,
12931 IX86_BUILTIN_PCMPEQD
,
12932 IX86_BUILTIN_PCMPGTB
,
12933 IX86_BUILTIN_PCMPGTW
,
12934 IX86_BUILTIN_PCMPGTD
,
12936 IX86_BUILTIN_PMADDWD
,
12938 IX86_BUILTIN_PMAXSW
,
12939 IX86_BUILTIN_PMAXUB
,
12940 IX86_BUILTIN_PMINSW
,
12941 IX86_BUILTIN_PMINUB
,
12943 IX86_BUILTIN_PMULHUW
,
12944 IX86_BUILTIN_PMULHW
,
12945 IX86_BUILTIN_PMULLW
,
12947 IX86_BUILTIN_PSADBW
,
12948 IX86_BUILTIN_PSHUFW
,
12950 IX86_BUILTIN_PSLLW
,
12951 IX86_BUILTIN_PSLLD
,
12952 IX86_BUILTIN_PSLLQ
,
12953 IX86_BUILTIN_PSRAW
,
12954 IX86_BUILTIN_PSRAD
,
12955 IX86_BUILTIN_PSRLW
,
12956 IX86_BUILTIN_PSRLD
,
12957 IX86_BUILTIN_PSRLQ
,
12958 IX86_BUILTIN_PSLLWI
,
12959 IX86_BUILTIN_PSLLDI
,
12960 IX86_BUILTIN_PSLLQI
,
12961 IX86_BUILTIN_PSRAWI
,
12962 IX86_BUILTIN_PSRADI
,
12963 IX86_BUILTIN_PSRLWI
,
12964 IX86_BUILTIN_PSRLDI
,
12965 IX86_BUILTIN_PSRLQI
,
12967 IX86_BUILTIN_PUNPCKHBW
,
12968 IX86_BUILTIN_PUNPCKHWD
,
12969 IX86_BUILTIN_PUNPCKHDQ
,
12970 IX86_BUILTIN_PUNPCKLBW
,
12971 IX86_BUILTIN_PUNPCKLWD
,
12972 IX86_BUILTIN_PUNPCKLDQ
,
12974 IX86_BUILTIN_SHUFPS
,
12976 IX86_BUILTIN_RCPPS
,
12977 IX86_BUILTIN_RCPSS
,
12978 IX86_BUILTIN_RSQRTPS
,
12979 IX86_BUILTIN_RSQRTSS
,
12980 IX86_BUILTIN_SQRTPS
,
12981 IX86_BUILTIN_SQRTSS
,
12983 IX86_BUILTIN_UNPCKHPS
,
12984 IX86_BUILTIN_UNPCKLPS
,
12986 IX86_BUILTIN_ANDPS
,
12987 IX86_BUILTIN_ANDNPS
,
12989 IX86_BUILTIN_XORPS
,
12992 IX86_BUILTIN_LDMXCSR
,
12993 IX86_BUILTIN_STMXCSR
,
12994 IX86_BUILTIN_SFENCE
,
12996 /* 3DNow! Original */
12997 IX86_BUILTIN_FEMMS
,
12998 IX86_BUILTIN_PAVGUSB
,
12999 IX86_BUILTIN_PF2ID
,
13000 IX86_BUILTIN_PFACC
,
13001 IX86_BUILTIN_PFADD
,
13002 IX86_BUILTIN_PFCMPEQ
,
13003 IX86_BUILTIN_PFCMPGE
,
13004 IX86_BUILTIN_PFCMPGT
,
13005 IX86_BUILTIN_PFMAX
,
13006 IX86_BUILTIN_PFMIN
,
13007 IX86_BUILTIN_PFMUL
,
13008 IX86_BUILTIN_PFRCP
,
13009 IX86_BUILTIN_PFRCPIT1
,
13010 IX86_BUILTIN_PFRCPIT2
,
13011 IX86_BUILTIN_PFRSQIT1
,
13012 IX86_BUILTIN_PFRSQRT
,
13013 IX86_BUILTIN_PFSUB
,
13014 IX86_BUILTIN_PFSUBR
,
13015 IX86_BUILTIN_PI2FD
,
13016 IX86_BUILTIN_PMULHRW
,
13018 /* 3DNow! Athlon Extensions */
13019 IX86_BUILTIN_PF2IW
,
13020 IX86_BUILTIN_PFNACC
,
13021 IX86_BUILTIN_PFPNACC
,
13022 IX86_BUILTIN_PI2FW
,
13023 IX86_BUILTIN_PSWAPDSI
,
13024 IX86_BUILTIN_PSWAPDSF
,
13027 IX86_BUILTIN_ADDPD
,
13028 IX86_BUILTIN_ADDSD
,
13029 IX86_BUILTIN_DIVPD
,
13030 IX86_BUILTIN_DIVSD
,
13031 IX86_BUILTIN_MULPD
,
13032 IX86_BUILTIN_MULSD
,
13033 IX86_BUILTIN_SUBPD
,
13034 IX86_BUILTIN_SUBSD
,
13036 IX86_BUILTIN_CMPEQPD
,
13037 IX86_BUILTIN_CMPLTPD
,
13038 IX86_BUILTIN_CMPLEPD
,
13039 IX86_BUILTIN_CMPGTPD
,
13040 IX86_BUILTIN_CMPGEPD
,
13041 IX86_BUILTIN_CMPNEQPD
,
13042 IX86_BUILTIN_CMPNLTPD
,
13043 IX86_BUILTIN_CMPNLEPD
,
13044 IX86_BUILTIN_CMPNGTPD
,
13045 IX86_BUILTIN_CMPNGEPD
,
13046 IX86_BUILTIN_CMPORDPD
,
13047 IX86_BUILTIN_CMPUNORDPD
,
13048 IX86_BUILTIN_CMPNEPD
,
13049 IX86_BUILTIN_CMPEQSD
,
13050 IX86_BUILTIN_CMPLTSD
,
13051 IX86_BUILTIN_CMPLESD
,
13052 IX86_BUILTIN_CMPNEQSD
,
13053 IX86_BUILTIN_CMPNLTSD
,
13054 IX86_BUILTIN_CMPNLESD
,
13055 IX86_BUILTIN_CMPORDSD
,
13056 IX86_BUILTIN_CMPUNORDSD
,
13057 IX86_BUILTIN_CMPNESD
,
13059 IX86_BUILTIN_COMIEQSD
,
13060 IX86_BUILTIN_COMILTSD
,
13061 IX86_BUILTIN_COMILESD
,
13062 IX86_BUILTIN_COMIGTSD
,
13063 IX86_BUILTIN_COMIGESD
,
13064 IX86_BUILTIN_COMINEQSD
,
13065 IX86_BUILTIN_UCOMIEQSD
,
13066 IX86_BUILTIN_UCOMILTSD
,
13067 IX86_BUILTIN_UCOMILESD
,
13068 IX86_BUILTIN_UCOMIGTSD
,
13069 IX86_BUILTIN_UCOMIGESD
,
13070 IX86_BUILTIN_UCOMINEQSD
,
13072 IX86_BUILTIN_MAXPD
,
13073 IX86_BUILTIN_MAXSD
,
13074 IX86_BUILTIN_MINPD
,
13075 IX86_BUILTIN_MINSD
,
13077 IX86_BUILTIN_ANDPD
,
13078 IX86_BUILTIN_ANDNPD
,
13080 IX86_BUILTIN_XORPD
,
13082 IX86_BUILTIN_SQRTPD
,
13083 IX86_BUILTIN_SQRTSD
,
13085 IX86_BUILTIN_UNPCKHPD
,
13086 IX86_BUILTIN_UNPCKLPD
,
13088 IX86_BUILTIN_SHUFPD
,
13090 IX86_BUILTIN_LOADUPD
,
13091 IX86_BUILTIN_STOREUPD
,
13092 IX86_BUILTIN_MOVSD
,
13094 IX86_BUILTIN_LOADHPD
,
13095 IX86_BUILTIN_LOADLPD
,
13097 IX86_BUILTIN_CVTDQ2PD
,
13098 IX86_BUILTIN_CVTDQ2PS
,
13100 IX86_BUILTIN_CVTPD2DQ
,
13101 IX86_BUILTIN_CVTPD2PI
,
13102 IX86_BUILTIN_CVTPD2PS
,
13103 IX86_BUILTIN_CVTTPD2DQ
,
13104 IX86_BUILTIN_CVTTPD2PI
,
13106 IX86_BUILTIN_CVTPI2PD
,
13107 IX86_BUILTIN_CVTSI2SD
,
13108 IX86_BUILTIN_CVTSI642SD
,
13110 IX86_BUILTIN_CVTSD2SI
,
13111 IX86_BUILTIN_CVTSD2SI64
,
13112 IX86_BUILTIN_CVTSD2SS
,
13113 IX86_BUILTIN_CVTSS2SD
,
13114 IX86_BUILTIN_CVTTSD2SI
,
13115 IX86_BUILTIN_CVTTSD2SI64
,
13117 IX86_BUILTIN_CVTPS2DQ
,
13118 IX86_BUILTIN_CVTPS2PD
,
13119 IX86_BUILTIN_CVTTPS2DQ
,
13121 IX86_BUILTIN_MOVNTI
,
13122 IX86_BUILTIN_MOVNTPD
,
13123 IX86_BUILTIN_MOVNTDQ
,
13126 IX86_BUILTIN_MASKMOVDQU
,
13127 IX86_BUILTIN_MOVMSKPD
,
13128 IX86_BUILTIN_PMOVMSKB128
,
13130 IX86_BUILTIN_PACKSSWB128
,
13131 IX86_BUILTIN_PACKSSDW128
,
13132 IX86_BUILTIN_PACKUSWB128
,
13134 IX86_BUILTIN_PADDB128
,
13135 IX86_BUILTIN_PADDW128
,
13136 IX86_BUILTIN_PADDD128
,
13137 IX86_BUILTIN_PADDQ128
,
13138 IX86_BUILTIN_PADDSB128
,
13139 IX86_BUILTIN_PADDSW128
,
13140 IX86_BUILTIN_PADDUSB128
,
13141 IX86_BUILTIN_PADDUSW128
,
13142 IX86_BUILTIN_PSUBB128
,
13143 IX86_BUILTIN_PSUBW128
,
13144 IX86_BUILTIN_PSUBD128
,
13145 IX86_BUILTIN_PSUBQ128
,
13146 IX86_BUILTIN_PSUBSB128
,
13147 IX86_BUILTIN_PSUBSW128
,
13148 IX86_BUILTIN_PSUBUSB128
,
13149 IX86_BUILTIN_PSUBUSW128
,
13151 IX86_BUILTIN_PAND128
,
13152 IX86_BUILTIN_PANDN128
,
13153 IX86_BUILTIN_POR128
,
13154 IX86_BUILTIN_PXOR128
,
13156 IX86_BUILTIN_PAVGB128
,
13157 IX86_BUILTIN_PAVGW128
,
13159 IX86_BUILTIN_PCMPEQB128
,
13160 IX86_BUILTIN_PCMPEQW128
,
13161 IX86_BUILTIN_PCMPEQD128
,
13162 IX86_BUILTIN_PCMPGTB128
,
13163 IX86_BUILTIN_PCMPGTW128
,
13164 IX86_BUILTIN_PCMPGTD128
,
13166 IX86_BUILTIN_PMADDWD128
,
13168 IX86_BUILTIN_PMAXSW128
,
13169 IX86_BUILTIN_PMAXUB128
,
13170 IX86_BUILTIN_PMINSW128
,
13171 IX86_BUILTIN_PMINUB128
,
13173 IX86_BUILTIN_PMULUDQ
,
13174 IX86_BUILTIN_PMULUDQ128
,
13175 IX86_BUILTIN_PMULHUW128
,
13176 IX86_BUILTIN_PMULHW128
,
13177 IX86_BUILTIN_PMULLW128
,
13179 IX86_BUILTIN_PSADBW128
,
13180 IX86_BUILTIN_PSHUFHW
,
13181 IX86_BUILTIN_PSHUFLW
,
13182 IX86_BUILTIN_PSHUFD
,
13184 IX86_BUILTIN_PSLLW128
,
13185 IX86_BUILTIN_PSLLD128
,
13186 IX86_BUILTIN_PSLLQ128
,
13187 IX86_BUILTIN_PSRAW128
,
13188 IX86_BUILTIN_PSRAD128
,
13189 IX86_BUILTIN_PSRLW128
,
13190 IX86_BUILTIN_PSRLD128
,
13191 IX86_BUILTIN_PSRLQ128
,
13192 IX86_BUILTIN_PSLLDQI128
,
13193 IX86_BUILTIN_PSLLWI128
,
13194 IX86_BUILTIN_PSLLDI128
,
13195 IX86_BUILTIN_PSLLQI128
,
13196 IX86_BUILTIN_PSRAWI128
,
13197 IX86_BUILTIN_PSRADI128
,
13198 IX86_BUILTIN_PSRLDQI128
,
13199 IX86_BUILTIN_PSRLWI128
,
13200 IX86_BUILTIN_PSRLDI128
,
13201 IX86_BUILTIN_PSRLQI128
,
13203 IX86_BUILTIN_PUNPCKHBW128
,
13204 IX86_BUILTIN_PUNPCKHWD128
,
13205 IX86_BUILTIN_PUNPCKHDQ128
,
13206 IX86_BUILTIN_PUNPCKHQDQ128
,
13207 IX86_BUILTIN_PUNPCKLBW128
,
13208 IX86_BUILTIN_PUNPCKLWD128
,
13209 IX86_BUILTIN_PUNPCKLDQ128
,
13210 IX86_BUILTIN_PUNPCKLQDQ128
,
13212 IX86_BUILTIN_CLFLUSH
,
13213 IX86_BUILTIN_MFENCE
,
13214 IX86_BUILTIN_LFENCE
,
13216 /* Prescott New Instructions. */
13217 IX86_BUILTIN_ADDSUBPS
,
13218 IX86_BUILTIN_HADDPS
,
13219 IX86_BUILTIN_HSUBPS
,
13220 IX86_BUILTIN_MOVSHDUP
,
13221 IX86_BUILTIN_MOVSLDUP
,
13222 IX86_BUILTIN_ADDSUBPD
,
13223 IX86_BUILTIN_HADDPD
,
13224 IX86_BUILTIN_HSUBPD
,
13225 IX86_BUILTIN_LDDQU
,
13227 IX86_BUILTIN_MONITOR
,
13228 IX86_BUILTIN_MWAIT
,
13230 IX86_BUILTIN_VEC_INIT_V2SI
,
13231 IX86_BUILTIN_VEC_INIT_V4HI
,
13232 IX86_BUILTIN_VEC_INIT_V8QI
,
13233 IX86_BUILTIN_VEC_EXT_V2DF
,
13234 IX86_BUILTIN_VEC_EXT_V2DI
,
13235 IX86_BUILTIN_VEC_EXT_V4SF
,
13236 IX86_BUILTIN_VEC_EXT_V4SI
,
13237 IX86_BUILTIN_VEC_EXT_V8HI
,
13238 IX86_BUILTIN_VEC_EXT_V2SI
,
13239 IX86_BUILTIN_VEC_EXT_V4HI
,
13240 IX86_BUILTIN_VEC_SET_V8HI
,
13241 IX86_BUILTIN_VEC_SET_V4HI
,
13246 #define def_builtin(MASK, NAME, TYPE, CODE) \
13248 if ((MASK) & target_flags \
13249 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13250 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13251 NULL, NULL_TREE); \
13254 /* Bits for builtin_description.flag. */
13256 /* Set when we don't support the comparison natively, and should
13257 swap_comparison in order to support it. */
13258 #define BUILTIN_DESC_SWAP_OPERANDS 1
13260 struct builtin_description
13262 const unsigned int mask
;
13263 const enum insn_code icode
;
13264 const char *const name
;
13265 const enum ix86_builtins code
;
13266 const enum rtx_code comparison
;
13267 const unsigned int flag
;
13270 static const struct builtin_description bdesc_comi
[] =
13272 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
13273 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
13274 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
13275 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
13276 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
13277 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
13278 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
13279 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
13280 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
13281 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
13282 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
13283 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
13284 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
13285 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
13286 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
13287 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
13288 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
13289 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
13290 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
13291 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
13292 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
13293 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
13294 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
13295 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
13298 static const struct builtin_description bdesc_2arg
[] =
13301 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
13302 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
13303 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
13304 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
13305 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
13306 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
13307 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
13308 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
13310 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
13311 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
13312 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
13313 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
13314 BUILTIN_DESC_SWAP_OPERANDS
},
13315 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
13316 BUILTIN_DESC_SWAP_OPERANDS
},
13317 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
13318 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
13319 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
13320 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
13321 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
13322 BUILTIN_DESC_SWAP_OPERANDS
},
13323 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
13324 BUILTIN_DESC_SWAP_OPERANDS
},
13325 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
13326 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
13327 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
13328 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
13329 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
13330 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
13331 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
13332 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
13333 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
13334 BUILTIN_DESC_SWAP_OPERANDS
},
13335 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
13336 BUILTIN_DESC_SWAP_OPERANDS
},
13337 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
13339 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
13340 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
13341 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
13342 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
13344 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
13345 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
13346 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
13347 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
13349 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
13350 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
13351 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
13352 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
13353 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
13356 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
13357 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
13358 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
13359 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
13360 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
13361 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
13362 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
13363 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
13365 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
13366 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
13367 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
13368 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
13369 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
13370 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
13371 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
13372 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
13374 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
13375 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
13376 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
13378 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
13379 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
13380 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
13381 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
13383 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
13384 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
13386 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
13387 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
13388 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
13389 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
13390 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
13391 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
13393 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
13394 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
13395 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
13396 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
13398 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
13399 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
13400 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
13401 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
13402 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
13403 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
13406 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
13407 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
13408 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
13410 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
13411 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
13412 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
13414 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
13415 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
13416 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
13417 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
13418 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
13419 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
13421 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
13422 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
13423 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
13424 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
13425 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
13426 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
13428 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
13429 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
13430 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
13431 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
13433 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
13434 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
13437 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
13438 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
13439 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
13440 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
13441 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
13442 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
13443 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
13444 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
13446 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
13447 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
13448 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
13449 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
13450 BUILTIN_DESC_SWAP_OPERANDS
},
13451 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
13452 BUILTIN_DESC_SWAP_OPERANDS
},
13453 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
13454 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
13455 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
13456 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
13457 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
13458 BUILTIN_DESC_SWAP_OPERANDS
},
13459 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
13460 BUILTIN_DESC_SWAP_OPERANDS
},
13461 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
13462 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
13463 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
13464 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
13465 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
13466 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
13467 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
13468 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
13469 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
13471 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
13472 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
13473 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
13474 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
13476 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
13477 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
13478 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
13479 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
13481 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
13482 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
13483 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
13486 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
13487 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
13488 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
13489 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
13490 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
13491 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
13492 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
13493 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
13495 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
13496 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
13497 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
13498 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
13499 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
13500 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
13501 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
13502 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
13504 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
13505 { MASK_SSE2
, CODE_FOR_sse2_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
13507 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
13508 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
13509 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
13510 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
13512 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
13513 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
13515 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
13516 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
13517 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
13518 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
13519 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
13520 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
13522 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
13523 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
13524 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
13525 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
13527 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
13528 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
13529 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
13530 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
13531 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
13532 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
13533 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
13534 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
13536 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
13537 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
13538 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
13540 { MASK_SSE2
, CODE_FOR_sse2_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
13541 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
13543 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
13544 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
13546 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
13547 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
13548 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
13550 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
13551 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
13552 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
13554 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
13555 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
13557 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
13559 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
13560 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
13561 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
13562 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
13565 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
13566 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
13567 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
13568 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
13569 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
13570 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
13573 static const struct builtin_description bdesc_1arg
[] =
13575 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
13576 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
13578 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
13579 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
13580 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
13582 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
13583 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
13584 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
13585 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
13586 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
13587 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
13589 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
13590 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
13592 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
13594 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
13595 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
13597 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
13598 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
13599 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
13600 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
13601 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
13603 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
13605 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
13606 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
13607 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
13608 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
13610 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
13611 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
13612 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
13615 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
13616 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
13620 ix86_init_builtins (void)
13623 ix86_init_mmx_sse_builtins ();
13626 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13627 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13630 ix86_init_mmx_sse_builtins (void)
13632 const struct builtin_description
* d
;
13635 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
13636 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
13637 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
13638 tree V2DI_type_node
13639 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
13640 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
13641 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
13642 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
13643 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
13644 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
13645 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
13647 tree pchar_type_node
= build_pointer_type (char_type_node
);
13648 tree pcchar_type_node
= build_pointer_type (
13649 build_type_variant (char_type_node
, 1, 0));
13650 tree pfloat_type_node
= build_pointer_type (float_type_node
);
13651 tree pcfloat_type_node
= build_pointer_type (
13652 build_type_variant (float_type_node
, 1, 0));
13653 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
13654 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
13655 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
13658 tree int_ftype_v4sf_v4sf
13659 = build_function_type_list (integer_type_node
,
13660 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13661 tree v4si_ftype_v4sf_v4sf
13662 = build_function_type_list (V4SI_type_node
,
13663 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13664 /* MMX/SSE/integer conversions. */
13665 tree int_ftype_v4sf
13666 = build_function_type_list (integer_type_node
,
13667 V4SF_type_node
, NULL_TREE
);
13668 tree int64_ftype_v4sf
13669 = build_function_type_list (long_long_integer_type_node
,
13670 V4SF_type_node
, NULL_TREE
);
13671 tree int_ftype_v8qi
13672 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
13673 tree v4sf_ftype_v4sf_int
13674 = build_function_type_list (V4SF_type_node
,
13675 V4SF_type_node
, integer_type_node
, NULL_TREE
);
13676 tree v4sf_ftype_v4sf_int64
13677 = build_function_type_list (V4SF_type_node
,
13678 V4SF_type_node
, long_long_integer_type_node
,
13680 tree v4sf_ftype_v4sf_v2si
13681 = build_function_type_list (V4SF_type_node
,
13682 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
13684 /* Miscellaneous. */
13685 tree v8qi_ftype_v4hi_v4hi
13686 = build_function_type_list (V8QI_type_node
,
13687 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13688 tree v4hi_ftype_v2si_v2si
13689 = build_function_type_list (V4HI_type_node
,
13690 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13691 tree v4sf_ftype_v4sf_v4sf_int
13692 = build_function_type_list (V4SF_type_node
,
13693 V4SF_type_node
, V4SF_type_node
,
13694 integer_type_node
, NULL_TREE
);
13695 tree v2si_ftype_v4hi_v4hi
13696 = build_function_type_list (V2SI_type_node
,
13697 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13698 tree v4hi_ftype_v4hi_int
13699 = build_function_type_list (V4HI_type_node
,
13700 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13701 tree v4hi_ftype_v4hi_di
13702 = build_function_type_list (V4HI_type_node
,
13703 V4HI_type_node
, long_long_unsigned_type_node
,
13705 tree v2si_ftype_v2si_di
13706 = build_function_type_list (V2SI_type_node
,
13707 V2SI_type_node
, long_long_unsigned_type_node
,
13709 tree void_ftype_void
13710 = build_function_type (void_type_node
, void_list_node
);
13711 tree void_ftype_unsigned
13712 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
13713 tree void_ftype_unsigned_unsigned
13714 = build_function_type_list (void_type_node
, unsigned_type_node
,
13715 unsigned_type_node
, NULL_TREE
);
13716 tree void_ftype_pcvoid_unsigned_unsigned
13717 = build_function_type_list (void_type_node
, const_ptr_type_node
,
13718 unsigned_type_node
, unsigned_type_node
,
13720 tree unsigned_ftype_void
13721 = build_function_type (unsigned_type_node
, void_list_node
);
13722 tree v2si_ftype_v4sf
13723 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
13724 /* Loads/stores. */
13725 tree void_ftype_v8qi_v8qi_pchar
13726 = build_function_type_list (void_type_node
,
13727 V8QI_type_node
, V8QI_type_node
,
13728 pchar_type_node
, NULL_TREE
);
13729 tree v4sf_ftype_pcfloat
13730 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
13731 /* @@@ the type is bogus */
13732 tree v4sf_ftype_v4sf_pv2si
13733 = build_function_type_list (V4SF_type_node
,
13734 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
13735 tree void_ftype_pv2si_v4sf
13736 = build_function_type_list (void_type_node
,
13737 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
13738 tree void_ftype_pfloat_v4sf
13739 = build_function_type_list (void_type_node
,
13740 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
13741 tree void_ftype_pdi_di
13742 = build_function_type_list (void_type_node
,
13743 pdi_type_node
, long_long_unsigned_type_node
,
13745 tree void_ftype_pv2di_v2di
13746 = build_function_type_list (void_type_node
,
13747 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
13748 /* Normal vector unops. */
13749 tree v4sf_ftype_v4sf
13750 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13752 /* Normal vector binops. */
13753 tree v4sf_ftype_v4sf_v4sf
13754 = build_function_type_list (V4SF_type_node
,
13755 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13756 tree v8qi_ftype_v8qi_v8qi
13757 = build_function_type_list (V8QI_type_node
,
13758 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13759 tree v4hi_ftype_v4hi_v4hi
13760 = build_function_type_list (V4HI_type_node
,
13761 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13762 tree v2si_ftype_v2si_v2si
13763 = build_function_type_list (V2SI_type_node
,
13764 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13765 tree di_ftype_di_di
13766 = build_function_type_list (long_long_unsigned_type_node
,
13767 long_long_unsigned_type_node
,
13768 long_long_unsigned_type_node
, NULL_TREE
);
13770 tree v2si_ftype_v2sf
13771 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
13772 tree v2sf_ftype_v2si
13773 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
13774 tree v2si_ftype_v2si
13775 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13776 tree v2sf_ftype_v2sf
13777 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13778 tree v2sf_ftype_v2sf_v2sf
13779 = build_function_type_list (V2SF_type_node
,
13780 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13781 tree v2si_ftype_v2sf_v2sf
13782 = build_function_type_list (V2SI_type_node
,
13783 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13784 tree pint_type_node
= build_pointer_type (integer_type_node
);
13785 tree pdouble_type_node
= build_pointer_type (double_type_node
);
13786 tree pcdouble_type_node
= build_pointer_type (
13787 build_type_variant (double_type_node
, 1, 0));
13788 tree int_ftype_v2df_v2df
13789 = build_function_type_list (integer_type_node
,
13790 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13792 tree ti_ftype_ti_ti
13793 = build_function_type_list (intTI_type_node
,
13794 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13795 tree void_ftype_pcvoid
13796 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13797 tree v4sf_ftype_v4si
13798 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13799 tree v4si_ftype_v4sf
13800 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13801 tree v2df_ftype_v4si
13802 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13803 tree v4si_ftype_v2df
13804 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13805 tree v2si_ftype_v2df
13806 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13807 tree v4sf_ftype_v2df
13808 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13809 tree v2df_ftype_v2si
13810 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13811 tree v2df_ftype_v4sf
13812 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13813 tree int_ftype_v2df
13814 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13815 tree int64_ftype_v2df
13816 = build_function_type_list (long_long_integer_type_node
,
13817 V2DF_type_node
, NULL_TREE
);
13818 tree v2df_ftype_v2df_int
13819 = build_function_type_list (V2DF_type_node
,
13820 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13821 tree v2df_ftype_v2df_int64
13822 = build_function_type_list (V2DF_type_node
,
13823 V2DF_type_node
, long_long_integer_type_node
,
13825 tree v4sf_ftype_v4sf_v2df
13826 = build_function_type_list (V4SF_type_node
,
13827 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13828 tree v2df_ftype_v2df_v4sf
13829 = build_function_type_list (V2DF_type_node
,
13830 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13831 tree v2df_ftype_v2df_v2df_int
13832 = build_function_type_list (V2DF_type_node
,
13833 V2DF_type_node
, V2DF_type_node
,
13836 tree v2df_ftype_v2df_pcdouble
13837 = build_function_type_list (V2DF_type_node
,
13838 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13839 tree void_ftype_pdouble_v2df
13840 = build_function_type_list (void_type_node
,
13841 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13842 tree void_ftype_pint_int
13843 = build_function_type_list (void_type_node
,
13844 pint_type_node
, integer_type_node
, NULL_TREE
);
13845 tree void_ftype_v16qi_v16qi_pchar
13846 = build_function_type_list (void_type_node
,
13847 V16QI_type_node
, V16QI_type_node
,
13848 pchar_type_node
, NULL_TREE
);
13849 tree v2df_ftype_pcdouble
13850 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13851 tree v2df_ftype_v2df_v2df
13852 = build_function_type_list (V2DF_type_node
,
13853 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13854 tree v16qi_ftype_v16qi_v16qi
13855 = build_function_type_list (V16QI_type_node
,
13856 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13857 tree v8hi_ftype_v8hi_v8hi
13858 = build_function_type_list (V8HI_type_node
,
13859 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13860 tree v4si_ftype_v4si_v4si
13861 = build_function_type_list (V4SI_type_node
,
13862 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13863 tree v2di_ftype_v2di_v2di
13864 = build_function_type_list (V2DI_type_node
,
13865 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13866 tree v2di_ftype_v2df_v2df
13867 = build_function_type_list (V2DI_type_node
,
13868 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13869 tree v2df_ftype_v2df
13870 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13871 tree v2di_ftype_v2di_int
13872 = build_function_type_list (V2DI_type_node
,
13873 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13874 tree v4si_ftype_v4si_int
13875 = build_function_type_list (V4SI_type_node
,
13876 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13877 tree v8hi_ftype_v8hi_int
13878 = build_function_type_list (V8HI_type_node
,
13879 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13880 tree v8hi_ftype_v8hi_v2di
13881 = build_function_type_list (V8HI_type_node
,
13882 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13883 tree v4si_ftype_v4si_v2di
13884 = build_function_type_list (V4SI_type_node
,
13885 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13886 tree v4si_ftype_v8hi_v8hi
13887 = build_function_type_list (V4SI_type_node
,
13888 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13889 tree di_ftype_v8qi_v8qi
13890 = build_function_type_list (long_long_unsigned_type_node
,
13891 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13892 tree di_ftype_v2si_v2si
13893 = build_function_type_list (long_long_unsigned_type_node
,
13894 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13895 tree v2di_ftype_v16qi_v16qi
13896 = build_function_type_list (V2DI_type_node
,
13897 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13898 tree v2di_ftype_v4si_v4si
13899 = build_function_type_list (V2DI_type_node
,
13900 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13901 tree int_ftype_v16qi
13902 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13903 tree v16qi_ftype_pcchar
13904 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13905 tree void_ftype_pchar_v16qi
13906 = build_function_type_list (void_type_node
,
13907 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13910 tree float128_type
;
13913 /* The __float80 type. */
13914 if (TYPE_MODE (long_double_type_node
) == XFmode
)
13915 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
13919 /* The __float80 type. */
13920 float80_type
= make_node (REAL_TYPE
);
13921 TYPE_PRECISION (float80_type
) = 80;
13922 layout_type (float80_type
);
13923 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
13926 float128_type
= make_node (REAL_TYPE
);
13927 TYPE_PRECISION (float128_type
) = 128;
13928 layout_type (float128_type
);
13929 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
13931 /* Add all builtins that are more or less simple operations on two
13933 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13935 /* Use one of the operands; the target can have a different mode for
13936 mask-generating compares. */
13937 enum machine_mode mode
;
13942 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13947 type
= v16qi_ftype_v16qi_v16qi
;
13950 type
= v8hi_ftype_v8hi_v8hi
;
13953 type
= v4si_ftype_v4si_v4si
;
13956 type
= v2di_ftype_v2di_v2di
;
13959 type
= v2df_ftype_v2df_v2df
;
13962 type
= ti_ftype_ti_ti
;
13965 type
= v4sf_ftype_v4sf_v4sf
;
13968 type
= v8qi_ftype_v8qi_v8qi
;
13971 type
= v4hi_ftype_v4hi_v4hi
;
13974 type
= v2si_ftype_v2si_v2si
;
13977 type
= di_ftype_di_di
;
13981 gcc_unreachable ();
13984 /* Override for comparisons. */
13985 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
13986 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
13987 type
= v4si_ftype_v4sf_v4sf
;
13989 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
13990 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
13991 type
= v2di_ftype_v2df_v2df
;
13993 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13996 /* Add the remaining MMX insns with somewhat more complicated types. */
13997 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13998 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13999 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
14000 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
14002 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
14003 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
14004 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
14006 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
14007 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
14009 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
14010 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
14012 /* comi/ucomi insns. */
14013 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14014 if (d
->mask
== MASK_SSE2
)
14015 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
14017 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
14019 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
14020 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
14021 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
14023 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
14024 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
14025 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
14026 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
14027 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
14028 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
14029 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
14030 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
14031 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
14032 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
14033 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
14035 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
14037 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
14038 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
14040 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
14041 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
14042 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
14043 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
14045 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
14046 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
14047 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
14048 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
14050 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
14052 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
14054 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
14055 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
14056 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
14057 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
14058 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
14059 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
14061 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
14063 /* Original 3DNow! */
14064 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
14065 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
14066 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
14067 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
14068 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
14069 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
14070 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
14071 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
14072 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
14073 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
14074 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
14075 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
14076 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
14077 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
14078 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
14079 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
14080 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
14081 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
14082 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
14083 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
14085 /* 3DNow! extension as used in the Athlon CPU. */
14086 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
14087 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
14088 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
14089 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
14090 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
14091 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
14094 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
14096 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
14097 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
14099 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
14100 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
14102 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
14103 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
14104 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
14105 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
14106 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
14108 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
14109 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
14110 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
14111 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
14113 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
14114 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
14116 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
14118 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
14119 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
14121 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
14122 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
14123 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
14124 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
14125 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
14127 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
14129 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
14130 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
14131 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
14132 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
14134 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
14135 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
14136 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
14138 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
14139 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
14140 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
14141 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
14143 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
14144 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
14145 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
14147 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
14148 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
14150 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
14151 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
14153 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
14154 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
14155 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
14157 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
14158 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
14159 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
14161 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
14162 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
14164 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
14165 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
14166 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
14167 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
14169 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
14170 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
14171 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
14172 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
14174 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
14175 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
14177 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
14179 /* Prescott New Instructions. */
14180 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
14181 void_ftype_pcvoid_unsigned_unsigned
,
14182 IX86_BUILTIN_MONITOR
);
14183 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
14184 void_ftype_unsigned_unsigned
,
14185 IX86_BUILTIN_MWAIT
);
14186 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
14188 IX86_BUILTIN_MOVSHDUP
);
14189 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
14191 IX86_BUILTIN_MOVSLDUP
);
14192 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
14193 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
14195 /* Access to the vec_init patterns. */
14196 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
14197 integer_type_node
, NULL_TREE
);
14198 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
14199 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
14201 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
14202 short_integer_type_node
,
14203 short_integer_type_node
,
14204 short_integer_type_node
, NULL_TREE
);
14205 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
14206 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
14208 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
14209 char_type_node
, char_type_node
,
14210 char_type_node
, char_type_node
,
14211 char_type_node
, char_type_node
,
14212 char_type_node
, NULL_TREE
);
14213 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
14214 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
14216 /* Access to the vec_extract patterns. */
14217 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
14218 integer_type_node
, NULL_TREE
);
14219 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
14220 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
14222 ftype
= build_function_type_list (long_long_integer_type_node
,
14223 V2DI_type_node
, integer_type_node
,
14225 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
14226 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
14228 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
14229 integer_type_node
, NULL_TREE
);
14230 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
14231 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
14233 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
14234 integer_type_node
, NULL_TREE
);
14235 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
14236 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
14238 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
14239 integer_type_node
, NULL_TREE
);
14240 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
14241 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
14243 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
14244 integer_type_node
, NULL_TREE
);
14245 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
14246 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
14248 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
14249 integer_type_node
, NULL_TREE
);
14250 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
14251 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
14253 /* Access to the vec_set patterns. */
14254 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
14256 integer_type_node
, NULL_TREE
);
14257 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
14258 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
14260 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
14262 integer_type_node
, NULL_TREE
);
14263 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
14264 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
14267 /* Errors in the source file can cause expand_expr to return const0_rtx
14268 where we expect a vector. To avoid crashing, use one of the vector
14269 clear instructions. */
14271 safe_vector_operand (rtx x
, enum machine_mode mode
)
14273 if (x
== const0_rtx
)
14274 x
= CONST0_RTX (mode
);
14278 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14281 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
14284 tree arg0
= TREE_VALUE (arglist
);
14285 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14286 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14287 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14288 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14289 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14290 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14292 if (VECTOR_MODE_P (mode0
))
14293 op0
= safe_vector_operand (op0
, mode0
);
14294 if (VECTOR_MODE_P (mode1
))
14295 op1
= safe_vector_operand (op1
, mode1
);
14297 if (optimize
|| !target
14298 || GET_MODE (target
) != tmode
14299 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14300 target
= gen_reg_rtx (tmode
);
14302 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
14304 rtx x
= gen_reg_rtx (V4SImode
);
14305 emit_insn (gen_sse2_loadd (x
, op1
));
14306 op1
= gen_lowpart (TImode
, x
);
14309 /* The insn must want input operands in the same modes as the
14311 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
14312 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
14314 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14315 op0
= copy_to_mode_reg (mode0
, op0
);
14316 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14317 op1
= copy_to_mode_reg (mode1
, op1
);
14319 /* ??? Using ix86_fixup_binary_operands is problematic when
14320 we've got mismatched modes. Fake it. */
14326 if (tmode
== mode0
&& tmode
== mode1
)
14328 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
14332 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
14334 op0
= force_reg (mode0
, op0
);
14335 op1
= force_reg (mode1
, op1
);
14336 target
= gen_reg_rtx (tmode
);
14339 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14346 /* Subroutine of ix86_expand_builtin to take care of stores. */
14349 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
14352 tree arg0
= TREE_VALUE (arglist
);
14353 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14354 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14355 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14356 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
14357 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
14359 if (VECTOR_MODE_P (mode1
))
14360 op1
= safe_vector_operand (op1
, mode1
);
14362 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14363 op1
= copy_to_mode_reg (mode1
, op1
);
14365 pat
= GEN_FCN (icode
) (op0
, op1
);
14371 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14374 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
14375 rtx target
, int do_load
)
14378 tree arg0
= TREE_VALUE (arglist
);
14379 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14380 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14381 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14383 if (optimize
|| !target
14384 || GET_MODE (target
) != tmode
14385 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14386 target
= gen_reg_rtx (tmode
);
14388 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14391 if (VECTOR_MODE_P (mode0
))
14392 op0
= safe_vector_operand (op0
, mode0
);
14394 if ((optimize
&& !register_operand (op0
, mode0
))
14395 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14396 op0
= copy_to_mode_reg (mode0
, op0
);
14399 pat
= GEN_FCN (icode
) (target
, op0
);
14406 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14407 sqrtss, rsqrtss, rcpss. */
14410 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
14413 tree arg0
= TREE_VALUE (arglist
);
14414 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14415 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14416 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14418 if (optimize
|| !target
14419 || GET_MODE (target
) != tmode
14420 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14421 target
= gen_reg_rtx (tmode
);
14423 if (VECTOR_MODE_P (mode0
))
14424 op0
= safe_vector_operand (op0
, mode0
);
14426 if ((optimize
&& !register_operand (op0
, mode0
))
14427 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14428 op0
= copy_to_mode_reg (mode0
, op0
);
14431 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
14432 op1
= copy_to_mode_reg (mode0
, op1
);
14434 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14441 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14444 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
14448 tree arg0
= TREE_VALUE (arglist
);
14449 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14450 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14451 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14453 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
14454 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
14455 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
14456 enum rtx_code comparison
= d
->comparison
;
14458 if (VECTOR_MODE_P (mode0
))
14459 op0
= safe_vector_operand (op0
, mode0
);
14460 if (VECTOR_MODE_P (mode1
))
14461 op1
= safe_vector_operand (op1
, mode1
);
14463 /* Swap operands if we have a comparison that isn't available in
14465 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
14467 rtx tmp
= gen_reg_rtx (mode1
);
14468 emit_move_insn (tmp
, op1
);
14473 if (optimize
|| !target
14474 || GET_MODE (target
) != tmode
14475 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
14476 target
= gen_reg_rtx (tmode
);
14478 if ((optimize
&& !register_operand (op0
, mode0
))
14479 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
14480 op0
= copy_to_mode_reg (mode0
, op0
);
14481 if ((optimize
&& !register_operand (op1
, mode1
))
14482 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
14483 op1
= copy_to_mode_reg (mode1
, op1
);
14485 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
14486 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
14493 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14496 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
14500 tree arg0
= TREE_VALUE (arglist
);
14501 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14502 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14503 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14505 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
14506 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
14507 enum rtx_code comparison
= d
->comparison
;
14509 if (VECTOR_MODE_P (mode0
))
14510 op0
= safe_vector_operand (op0
, mode0
);
14511 if (VECTOR_MODE_P (mode1
))
14512 op1
= safe_vector_operand (op1
, mode1
);
14514 /* Swap operands if we have a comparison that isn't available in
14516 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
14523 target
= gen_reg_rtx (SImode
);
14524 emit_move_insn (target
, const0_rtx
);
14525 target
= gen_rtx_SUBREG (QImode
, target
, 0);
14527 if ((optimize
&& !register_operand (op0
, mode0
))
14528 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
14529 op0
= copy_to_mode_reg (mode0
, op0
);
14530 if ((optimize
&& !register_operand (op1
, mode1
))
14531 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
14532 op1
= copy_to_mode_reg (mode1
, op1
);
14534 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
14535 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
14539 emit_insn (gen_rtx_SET (VOIDmode
,
14540 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
14541 gen_rtx_fmt_ee (comparison
, QImode
,
14545 return SUBREG_REG (target
);
14548 /* Return the integer constant in ARG. Constrain it to be in the range
14549 of the subparts of VEC_TYPE; issue an error if not. */
14552 get_element_number (tree vec_type
, tree arg
)
14554 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
14556 if (!host_integerp (arg
, 1)
14557 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
14559 error ("selector must be an integer constant in the range 0..%i", max
);
14566 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14567 ix86_expand_vector_init. We DO have language-level syntax for this, in
14568 the form of (type){ init-list }. Except that since we can't place emms
14569 instructions from inside the compiler, we can't allow the use of MMX
14570 registers unless the user explicitly asks for it. So we do *not* define
14571 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14572 we have builtins invoked by mmintrin.h that gives us license to emit
14573 these sorts of instructions. */
14576 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
14578 enum machine_mode tmode
= TYPE_MODE (type
);
14579 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
14580 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
14581 rtvec v
= rtvec_alloc (n_elt
);
14583 gcc_assert (VECTOR_MODE_P (tmode
));
14585 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
14587 rtx x
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14588 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
14591 gcc_assert (arglist
== NULL
);
14593 if (!target
|| !register_operand (target
, tmode
))
14594 target
= gen_reg_rtx (tmode
);
14596 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
14600 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14601 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14602 had a language-level syntax for referencing vector elements. */
14605 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
14607 enum machine_mode tmode
, mode0
;
14612 arg0
= TREE_VALUE (arglist
);
14613 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14615 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14616 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
14618 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
14619 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
14620 gcc_assert (VECTOR_MODE_P (mode0
));
14622 op0
= force_reg (mode0
, op0
);
14624 if (optimize
|| !target
|| !register_operand (target
, tmode
))
14625 target
= gen_reg_rtx (tmode
);
14627 ix86_expand_vector_extract (true, target
, op0
, elt
);
14632 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14633 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14634 a language-level syntax for referencing vector elements. */
14637 ix86_expand_vec_set_builtin (tree arglist
)
14639 enum machine_mode tmode
, mode1
;
14640 tree arg0
, arg1
, arg2
;
14644 arg0
= TREE_VALUE (arglist
);
14645 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14646 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14648 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
14649 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
14650 gcc_assert (VECTOR_MODE_P (tmode
));
14652 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
14653 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
14654 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
14656 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
14657 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
14659 op0
= force_reg (tmode
, op0
);
14660 op1
= force_reg (mode1
, op1
);
14662 ix86_expand_vector_set (true, op0
, op1
, elt
);
14667 /* Expand an expression EXP that calls a built-in function,
14668 with result going to TARGET if that's convenient
14669 (and in mode MODE if that's convenient).
14670 SUBTARGET may be used as the target for computing one of EXP's operands.
14671 IGNORE is nonzero if the value is to be ignored. */
14674 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
14675 enum machine_mode mode ATTRIBUTE_UNUSED
,
14676 int ignore ATTRIBUTE_UNUSED
)
14678 const struct builtin_description
*d
;
14680 enum insn_code icode
;
14681 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
14682 tree arglist
= TREE_OPERAND (exp
, 1);
14683 tree arg0
, arg1
, arg2
;
14684 rtx op0
, op1
, op2
, pat
;
14685 enum machine_mode tmode
, mode0
, mode1
, mode2
;
14686 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
14690 case IX86_BUILTIN_EMMS
:
14691 emit_insn (gen_mmx_emms ());
14694 case IX86_BUILTIN_SFENCE
:
14695 emit_insn (gen_sse_sfence ());
14698 case IX86_BUILTIN_MASKMOVQ
:
14699 case IX86_BUILTIN_MASKMOVDQU
:
14700 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
14701 ? CODE_FOR_mmx_maskmovq
14702 : CODE_FOR_sse2_maskmovdqu
);
14703 /* Note the arg order is different from the operand order. */
14704 arg1
= TREE_VALUE (arglist
);
14705 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
14706 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14707 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14708 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14709 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14710 mode0
= insn_data
[icode
].operand
[0].mode
;
14711 mode1
= insn_data
[icode
].operand
[1].mode
;
14712 mode2
= insn_data
[icode
].operand
[2].mode
;
14714 op0
= force_reg (Pmode
, op0
);
14715 op0
= gen_rtx_MEM (mode1
, op0
);
14717 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14718 op0
= copy_to_mode_reg (mode0
, op0
);
14719 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14720 op1
= copy_to_mode_reg (mode1
, op1
);
14721 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
14722 op2
= copy_to_mode_reg (mode2
, op2
);
14723 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
14729 case IX86_BUILTIN_SQRTSS
:
14730 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
14731 case IX86_BUILTIN_RSQRTSS
:
14732 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
14733 case IX86_BUILTIN_RCPSS
:
14734 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
14736 case IX86_BUILTIN_LOADUPS
:
14737 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
14739 case IX86_BUILTIN_STOREUPS
:
14740 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
14742 case IX86_BUILTIN_LOADHPS
:
14743 case IX86_BUILTIN_LOADLPS
:
14744 case IX86_BUILTIN_LOADHPD
:
14745 case IX86_BUILTIN_LOADLPD
:
14746 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
14747 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
14748 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
14749 : CODE_FOR_sse2_loadlpd
);
14750 arg0
= TREE_VALUE (arglist
);
14751 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14752 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14753 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14754 tmode
= insn_data
[icode
].operand
[0].mode
;
14755 mode0
= insn_data
[icode
].operand
[1].mode
;
14756 mode1
= insn_data
[icode
].operand
[2].mode
;
14758 op0
= force_reg (mode0
, op0
);
14759 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
14760 if (optimize
|| target
== 0
14761 || GET_MODE (target
) != tmode
14762 || !register_operand (target
, tmode
))
14763 target
= gen_reg_rtx (tmode
);
14764 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14770 case IX86_BUILTIN_STOREHPS
:
14771 case IX86_BUILTIN_STORELPS
:
14772 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
14773 : CODE_FOR_sse_storelps
);
14774 arg0
= TREE_VALUE (arglist
);
14775 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14776 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14777 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14778 mode0
= insn_data
[icode
].operand
[0].mode
;
14779 mode1
= insn_data
[icode
].operand
[1].mode
;
14781 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14782 op1
= force_reg (mode1
, op1
);
14784 pat
= GEN_FCN (icode
) (op0
, op1
);
14790 case IX86_BUILTIN_MOVNTPS
:
14791 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
14792 case IX86_BUILTIN_MOVNTQ
:
14793 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
14795 case IX86_BUILTIN_LDMXCSR
:
14796 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14797 target
= assign_386_stack_local (SImode
, 0);
14798 emit_move_insn (target
, op0
);
14799 emit_insn (gen_sse_ldmxcsr (target
));
14802 case IX86_BUILTIN_STMXCSR
:
14803 target
= assign_386_stack_local (SImode
, 0);
14804 emit_insn (gen_sse_stmxcsr (target
));
14805 return copy_to_mode_reg (SImode
, target
);
14807 case IX86_BUILTIN_SHUFPS
:
14808 case IX86_BUILTIN_SHUFPD
:
14809 icode
= (fcode
== IX86_BUILTIN_SHUFPS
14810 ? CODE_FOR_sse_shufps
14811 : CODE_FOR_sse2_shufpd
);
14812 arg0
= TREE_VALUE (arglist
);
14813 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14814 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14815 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14816 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14817 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14818 tmode
= insn_data
[icode
].operand
[0].mode
;
14819 mode0
= insn_data
[icode
].operand
[1].mode
;
14820 mode1
= insn_data
[icode
].operand
[2].mode
;
14821 mode2
= insn_data
[icode
].operand
[3].mode
;
14823 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14824 op0
= copy_to_mode_reg (mode0
, op0
);
14825 if ((optimize
&& !register_operand (op1
, mode1
))
14826 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14827 op1
= copy_to_mode_reg (mode1
, op1
);
14828 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14830 /* @@@ better error message */
14831 error ("mask must be an immediate");
14832 return gen_reg_rtx (tmode
);
14834 if (optimize
|| target
== 0
14835 || GET_MODE (target
) != tmode
14836 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14837 target
= gen_reg_rtx (tmode
);
14838 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14844 case IX86_BUILTIN_PSHUFW
:
14845 case IX86_BUILTIN_PSHUFD
:
14846 case IX86_BUILTIN_PSHUFHW
:
14847 case IX86_BUILTIN_PSHUFLW
:
14848 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
14849 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
14850 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
14851 : CODE_FOR_mmx_pshufw
);
14852 arg0
= TREE_VALUE (arglist
);
14853 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14854 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14855 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14856 tmode
= insn_data
[icode
].operand
[0].mode
;
14857 mode1
= insn_data
[icode
].operand
[1].mode
;
14858 mode2
= insn_data
[icode
].operand
[2].mode
;
14860 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14861 op0
= copy_to_mode_reg (mode1
, op0
);
14862 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14864 /* @@@ better error message */
14865 error ("mask must be an immediate");
14869 || GET_MODE (target
) != tmode
14870 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14871 target
= gen_reg_rtx (tmode
);
14872 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14878 case IX86_BUILTIN_PSLLDQI128
:
14879 case IX86_BUILTIN_PSRLDQI128
:
14880 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
14881 : CODE_FOR_sse2_lshrti3
);
14882 arg0
= TREE_VALUE (arglist
);
14883 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14884 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14885 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14886 tmode
= insn_data
[icode
].operand
[0].mode
;
14887 mode1
= insn_data
[icode
].operand
[1].mode
;
14888 mode2
= insn_data
[icode
].operand
[2].mode
;
14890 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14892 op0
= copy_to_reg (op0
);
14893 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14895 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14897 error ("shift must be an immediate");
14900 target
= gen_reg_rtx (V2DImode
);
14901 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14907 case IX86_BUILTIN_FEMMS
:
14908 emit_insn (gen_mmx_femms ());
14911 case IX86_BUILTIN_PAVGUSB
:
14912 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
14914 case IX86_BUILTIN_PF2ID
:
14915 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
14917 case IX86_BUILTIN_PFACC
:
14918 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
14920 case IX86_BUILTIN_PFADD
:
14921 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
14923 case IX86_BUILTIN_PFCMPEQ
:
14924 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
14926 case IX86_BUILTIN_PFCMPGE
:
14927 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
14929 case IX86_BUILTIN_PFCMPGT
:
14930 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
14932 case IX86_BUILTIN_PFMAX
:
14933 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
14935 case IX86_BUILTIN_PFMIN
:
14936 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
14938 case IX86_BUILTIN_PFMUL
:
14939 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
14941 case IX86_BUILTIN_PFRCP
:
14942 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
14944 case IX86_BUILTIN_PFRCPIT1
:
14945 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
14947 case IX86_BUILTIN_PFRCPIT2
:
14948 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
14950 case IX86_BUILTIN_PFRSQIT1
:
14951 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
14953 case IX86_BUILTIN_PFRSQRT
:
14954 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
14956 case IX86_BUILTIN_PFSUB
:
14957 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
14959 case IX86_BUILTIN_PFSUBR
:
14960 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
14962 case IX86_BUILTIN_PI2FD
:
14963 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
14965 case IX86_BUILTIN_PMULHRW
:
14966 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
14968 case IX86_BUILTIN_PF2IW
:
14969 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
14971 case IX86_BUILTIN_PFNACC
:
14972 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
14974 case IX86_BUILTIN_PFPNACC
:
14975 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
14977 case IX86_BUILTIN_PI2FW
:
14978 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
14980 case IX86_BUILTIN_PSWAPDSI
:
14981 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
14983 case IX86_BUILTIN_PSWAPDSF
:
14984 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
14986 case IX86_BUILTIN_SQRTSD
:
14987 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
14988 case IX86_BUILTIN_LOADUPD
:
14989 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14990 case IX86_BUILTIN_STOREUPD
:
14991 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14993 case IX86_BUILTIN_MFENCE
:
14994 emit_insn (gen_sse2_mfence ());
14996 case IX86_BUILTIN_LFENCE
:
14997 emit_insn (gen_sse2_lfence ());
15000 case IX86_BUILTIN_CLFLUSH
:
15001 arg0
= TREE_VALUE (arglist
);
15002 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
15003 icode
= CODE_FOR_sse2_clflush
;
15004 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
15005 op0
= copy_to_mode_reg (Pmode
, op0
);
15007 emit_insn (gen_sse2_clflush (op0
));
15010 case IX86_BUILTIN_MOVNTPD
:
15011 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
15012 case IX86_BUILTIN_MOVNTDQ
:
15013 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
15014 case IX86_BUILTIN_MOVNTI
:
15015 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
15017 case IX86_BUILTIN_LOADDQU
:
15018 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
15019 case IX86_BUILTIN_STOREDQU
:
15020 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
15022 case IX86_BUILTIN_MONITOR
:
15023 arg0
= TREE_VALUE (arglist
);
15024 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15025 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
15026 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
15027 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
15028 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
15030 op0
= copy_to_mode_reg (SImode
, op0
);
15032 op1
= copy_to_mode_reg (SImode
, op1
);
15034 op2
= copy_to_mode_reg (SImode
, op2
);
15035 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
15038 case IX86_BUILTIN_MWAIT
:
15039 arg0
= TREE_VALUE (arglist
);
15040 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15041 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
15042 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
15044 op0
= copy_to_mode_reg (SImode
, op0
);
15046 op1
= copy_to_mode_reg (SImode
, op1
);
15047 emit_insn (gen_sse3_mwait (op0
, op1
));
15050 case IX86_BUILTIN_LDDQU
:
15051 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
15054 case IX86_BUILTIN_VEC_INIT_V2SI
:
15055 case IX86_BUILTIN_VEC_INIT_V4HI
:
15056 case IX86_BUILTIN_VEC_INIT_V8QI
:
15057 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
15059 case IX86_BUILTIN_VEC_EXT_V2DF
:
15060 case IX86_BUILTIN_VEC_EXT_V2DI
:
15061 case IX86_BUILTIN_VEC_EXT_V4SF
:
15062 case IX86_BUILTIN_VEC_EXT_V4SI
:
15063 case IX86_BUILTIN_VEC_EXT_V8HI
:
15064 case IX86_BUILTIN_VEC_EXT_V2SI
:
15065 case IX86_BUILTIN_VEC_EXT_V4HI
:
15066 return ix86_expand_vec_ext_builtin (arglist
, target
);
15068 case IX86_BUILTIN_VEC_SET_V8HI
:
15069 case IX86_BUILTIN_VEC_SET_V4HI
:
15070 return ix86_expand_vec_set_builtin (arglist
);
15076 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
15077 if (d
->code
== fcode
)
15079 /* Compares are treated specially. */
15080 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
15081 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
15082 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
15083 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
15084 return ix86_expand_sse_compare (d
, arglist
, target
);
15086 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
15089 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
15090 if (d
->code
== fcode
)
15091 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
15093 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
15094 if (d
->code
== fcode
)
15095 return ix86_expand_sse_comi (d
, arglist
, target
);
15097 gcc_unreachable ();
15100 /* Store OPERAND to the memory after reload is completed. This means
15101 that we can't easily use assign_stack_local. */
15103 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
15107 gcc_assert (reload_completed
);
15108 if (TARGET_RED_ZONE
)
15110 result
= gen_rtx_MEM (mode
,
15111 gen_rtx_PLUS (Pmode
,
15113 GEN_INT (-RED_ZONE_SIZE
)));
15114 emit_move_insn (result
, operand
);
15116 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
15122 operand
= gen_lowpart (DImode
, operand
);
15126 gen_rtx_SET (VOIDmode
,
15127 gen_rtx_MEM (DImode
,
15128 gen_rtx_PRE_DEC (DImode
,
15129 stack_pointer_rtx
)),
15133 gcc_unreachable ();
15135 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15144 split_di (&operand
, 1, operands
, operands
+ 1);
15146 gen_rtx_SET (VOIDmode
,
15147 gen_rtx_MEM (SImode
,
15148 gen_rtx_PRE_DEC (Pmode
,
15149 stack_pointer_rtx
)),
15152 gen_rtx_SET (VOIDmode
,
15153 gen_rtx_MEM (SImode
,
15154 gen_rtx_PRE_DEC (Pmode
,
15155 stack_pointer_rtx
)),
15160 /* It is better to store HImodes as SImodes. */
15161 if (!TARGET_PARTIAL_REG_STALL
)
15162 operand
= gen_lowpart (SImode
, operand
);
15166 gen_rtx_SET (VOIDmode
,
15167 gen_rtx_MEM (GET_MODE (operand
),
15168 gen_rtx_PRE_DEC (SImode
,
15169 stack_pointer_rtx
)),
15173 gcc_unreachable ();
15175 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15180 /* Free operand from the memory. */
15182 ix86_free_from_memory (enum machine_mode mode
)
15184 if (!TARGET_RED_ZONE
)
15188 if (mode
== DImode
|| TARGET_64BIT
)
15190 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
15194 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15195 to pop or add instruction if registers are available. */
15196 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
15197 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
15202 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15203 QImode must go into class Q_REGS.
15204 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
15205 movdf to do mem-to-mem moves through integer regs. */
15207 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
15209 /* We're only allowed to return a subclass of CLASS. Many of the
15210 following checks fail for NO_REGS, so eliminate that early. */
15211 if (class == NO_REGS
)
15214 /* All classes can load zeros. */
15215 if (x
== CONST0_RTX (GET_MODE (x
)))
15218 /* Floating-point constants need more complex checks. */
15219 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
15221 /* General regs can load everything. */
15222 if (reg_class_subset_p (class, GENERAL_REGS
))
15225 /* Floats can load 0 and 1 plus some others. Note that we eliminated
15226 zero above. We only want to wind up preferring 80387 registers if
15227 we plan on doing computation with them. */
15229 && (TARGET_MIX_SSE_I387
15230 || !(TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (GET_MODE (x
))))
15231 && standard_80387_constant_p (x
))
15233 /* Limit class to non-sse. */
15234 if (class == FLOAT_SSE_REGS
)
15236 if (class == FP_TOP_SSE_REGS
)
15238 if (class == FP_SECOND_SSE_REGS
)
15239 return FP_SECOND_REG
;
15240 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
15246 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
15248 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x
))
15251 /* Generally when we see PLUS here, it's the function invariant
15252 (plus soft-fp const_int). Which can only be computed into general
15254 if (GET_CODE (x
) == PLUS
)
15255 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
15257 /* QImode constants are easy to load, but non-constant QImode data
15258 must go into Q_REGS. */
15259 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
15261 if (reg_class_subset_p (class, Q_REGS
))
15263 if (reg_class_subset_p (Q_REGS
, class))
15271 /* If we are copying between general and FP registers, we need a memory
15272 location. The same is true for SSE and MMX registers.
15274 The macro can't work reliably when one of the CLASSES is class containing
15275 registers from multiple units (SSE, MMX, integer). We avoid this by never
15276 combining those units in single alternative in the machine description.
15277 Ensure that this constraint holds to avoid unexpected surprises.
15279 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15280 enforce these sanity checks. */
15283 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
15284 enum machine_mode mode
, int strict
)
15286 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
15287 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
15288 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
15289 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
15290 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
15291 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
15293 gcc_assert (!strict
);
15297 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
15300 /* ??? This is a lie. We do have moves between mmx/general, and for
15301 mmx/sse2. But by saying we need secondary memory we discourage the
15302 register allocator from using the mmx registers unless needed. */
15303 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
15306 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
15308 /* SSE1 doesn't have any direct moves from other classes. */
15312 /* If the target says that inter-unit moves are more expensive
15313 than moving through memory, then don't generate them. */
15314 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
15317 /* Between SSE and general, we have moves no larger than word size. */
15318 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
15321 /* ??? For the cost of one register reformat penalty, we could use
15322 the same instructions to move SFmode and DFmode data, but the
15323 relevant move patterns don't support those alternatives. */
15324 if (mode
== SFmode
|| mode
== DFmode
)
15331 /* Return true if the registers in CLASS cannot represent the change from
15332 modes FROM to TO. */
15335 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
15336 enum reg_class
class)
15341 /* x87 registers can't do subreg at all, as all values are reformated
15342 to extended precision. */
15343 if (MAYBE_FLOAT_CLASS_P (class))
15346 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
15348 /* Vector registers do not support QI or HImode loads. If we don't
15349 disallow a change to these modes, reload will assume it's ok to
15350 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
15351 the vec_dupv4hi pattern. */
15352 if (GET_MODE_SIZE (from
) < 4)
15355 /* Vector registers do not support subreg with nonzero offsets, which
15356 are otherwise valid for integer registers. Since we can't see
15357 whether we have a nonzero offset from here, prohibit all
15358 nonparadoxical subregs changing size. */
15359 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
15366 /* Return the cost of moving data from a register in class CLASS1 to
15367 one in class CLASS2.
15369 It is not required that the cost always equal 2 when FROM is the same as TO;
15370 on some machines it is expensive to move between registers if they are not
15371 general registers. */
15374 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
15375 enum reg_class class2
)
15377 /* In case we require secondary memory, compute cost of the store followed
15378 by load. In order to avoid bad register allocation choices, we need
15379 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15381 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
15385 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
15386 MEMORY_MOVE_COST (mode
, class1
, 1));
15387 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
15388 MEMORY_MOVE_COST (mode
, class2
, 1));
15390 /* In case of copying from general_purpose_register we may emit multiple
15391 stores followed by single load causing memory size mismatch stall.
15392 Count this as arbitrarily high cost of 20. */
15393 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
15396 /* In the case of FP/MMX moves, the registers actually overlap, and we
15397 have to switch modes in order to treat them differently. */
15398 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
15399 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
15405 /* Moves between SSE/MMX and integer unit are expensive. */
15406 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
15407 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
15408 return ix86_cost
->mmxsse_to_integer
;
15409 if (MAYBE_FLOAT_CLASS_P (class1
))
15410 return ix86_cost
->fp_move
;
15411 if (MAYBE_SSE_CLASS_P (class1
))
15412 return ix86_cost
->sse_move
;
15413 if (MAYBE_MMX_CLASS_P (class1
))
15414 return ix86_cost
->mmx_move
;
15418 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15421 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
15423 /* Flags and only flags can only hold CCmode values. */
15424 if (CC_REGNO_P (regno
))
15425 return GET_MODE_CLASS (mode
) == MODE_CC
;
15426 if (GET_MODE_CLASS (mode
) == MODE_CC
15427 || GET_MODE_CLASS (mode
) == MODE_RANDOM
15428 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
15430 if (FP_REGNO_P (regno
))
15431 return VALID_FP_MODE_P (mode
);
15432 if (SSE_REGNO_P (regno
))
15434 /* We implement the move patterns for all vector modes into and
15435 out of SSE registers, even when no operation instructions
15437 return (VALID_SSE_REG_MODE (mode
)
15438 || VALID_SSE2_REG_MODE (mode
)
15439 || VALID_MMX_REG_MODE (mode
)
15440 || VALID_MMX_REG_MODE_3DNOW (mode
));
15442 if (MMX_REGNO_P (regno
))
15444 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15445 so if the register is available at all, then we can move data of
15446 the given mode into or out of it. */
15447 return (VALID_MMX_REG_MODE (mode
)
15448 || VALID_MMX_REG_MODE_3DNOW (mode
));
15451 if (mode
== QImode
)
15453 /* Take care for QImode values - they can be in non-QI regs,
15454 but then they do cause partial register stalls. */
15455 if (regno
< 4 || TARGET_64BIT
)
15457 if (!TARGET_PARTIAL_REG_STALL
)
15459 return reload_in_progress
|| reload_completed
;
15461 /* We handle both integer and floats in the general purpose registers. */
15462 else if (VALID_INT_MODE_P (mode
))
15464 else if (VALID_FP_MODE_P (mode
))
15466 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15467 on to use that value in smaller contexts, this can easily force a
15468 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15469 supporting DImode, allow it. */
15470 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
15476 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15477 tieable integer mode. */
15480 ix86_tieable_integer_mode_p (enum machine_mode mode
)
15489 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
15492 return TARGET_64BIT
;
15499 /* Return true if MODE1 is accessible in a register that can hold MODE2
15500 without copying. That is, all register classes that can hold MODE2
15501 can also hold MODE1. */
15504 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
15506 if (mode1
== mode2
)
15509 if (ix86_tieable_integer_mode_p (mode1
)
15510 && ix86_tieable_integer_mode_p (mode2
))
15513 /* MODE2 being XFmode implies fp stack or general regs, which means we
15514 can tie any smaller floating point modes to it. Note that we do not
15515 tie this with TFmode. */
15516 if (mode2
== XFmode
)
15517 return mode1
== SFmode
|| mode1
== DFmode
;
15519 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15520 that we can tie it with SFmode. */
15521 if (mode2
== DFmode
)
15522 return mode1
== SFmode
;
15524 /* If MODE2 is only appropriate for an SSE register, then tie with
15525 any other mode acceptable to SSE registers. */
15526 if (GET_MODE_SIZE (mode2
) >= 8
15527 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
15528 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
15530 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15531 with any other mode acceptable to MMX registers. */
15532 if (GET_MODE_SIZE (mode2
) == 8
15533 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
15534 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
15539 /* Return the cost of moving data of mode M between a
15540 register and memory. A value of 2 is the default; this cost is
15541 relative to those in `REGISTER_MOVE_COST'.
15543 If moving between registers and memory is more expensive than
15544 between two registers, you should define this macro to express the
15547 Model also increased moving costs of QImode registers in non
15551 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
15553 if (FLOAT_CLASS_P (class))
15570 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
15572 if (SSE_CLASS_P (class))
15575 switch (GET_MODE_SIZE (mode
))
15589 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
15591 if (MMX_CLASS_P (class))
15594 switch (GET_MODE_SIZE (mode
))
15605 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
15607 switch (GET_MODE_SIZE (mode
))
15611 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
15612 : ix86_cost
->movzbl_load
);
15614 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
15615 : ix86_cost
->int_store
[0] + 4);
15618 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
15620 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15621 if (mode
== TFmode
)
15623 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
15624 * (((int) GET_MODE_SIZE (mode
)
15625 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
15629 /* Compute a (partial) cost for rtx X. Return true if the complete
15630 cost has been computed, and false if subexpressions should be
15631 scanned. In either case, *TOTAL contains the cost result. */
15634 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
15636 enum machine_mode mode
= GET_MODE (x
);
15644 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
15646 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
15648 else if (flag_pic
&& SYMBOLIC_CONST (x
)
15650 || (!GET_CODE (x
) != LABEL_REF
15651 && (GET_CODE (x
) != SYMBOL_REF
15652 || !SYMBOL_REF_LOCAL_P (x
)))))
15659 if (mode
== VOIDmode
)
15662 switch (standard_80387_constant_p (x
))
15667 default: /* Other constants */
15672 /* Start with (MEM (SYMBOL_REF)), since that's where
15673 it'll probably end up. Add a penalty for size. */
15674 *total
= (COSTS_N_INSNS (1)
15675 + (flag_pic
!= 0 && !TARGET_64BIT
)
15676 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
15682 /* The zero extensions is often completely free on x86_64, so make
15683 it as cheap as possible. */
15684 if (TARGET_64BIT
&& mode
== DImode
15685 && GET_MODE (XEXP (x
, 0)) == SImode
)
15687 else if (TARGET_ZERO_EXTEND_WITH_AND
)
15688 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15690 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
15694 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
15698 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
15699 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
15701 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15704 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15707 if ((value
== 2 || value
== 3)
15708 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
15710 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15720 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
15722 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15724 if (INTVAL (XEXP (x
, 1)) > 32)
15725 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
15727 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
15731 if (GET_CODE (XEXP (x
, 1)) == AND
)
15732 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
15734 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
15739 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15740 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
15742 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
15747 if (FLOAT_MODE_P (mode
))
15749 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
15754 rtx op0
= XEXP (x
, 0);
15755 rtx op1
= XEXP (x
, 1);
15757 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15759 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15760 for (nbits
= 0; value
!= 0; value
&= value
- 1)
15764 /* This is arbitrary. */
15767 /* Compute costs correctly for widening multiplication. */
15768 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
15769 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
15770 == GET_MODE_SIZE (mode
))
15772 int is_mulwiden
= 0;
15773 enum machine_mode inner_mode
= GET_MODE (op0
);
15775 if (GET_CODE (op0
) == GET_CODE (op1
))
15776 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
15777 else if (GET_CODE (op1
) == CONST_INT
)
15779 if (GET_CODE (op0
) == SIGN_EXTEND
)
15780 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
15783 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
15787 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
15790 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
15791 + nbits
* ix86_cost
->mult_bit
)
15792 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
15801 if (FLOAT_MODE_P (mode
))
15802 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
15804 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
15808 if (FLOAT_MODE_P (mode
))
15809 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15810 else if (GET_MODE_CLASS (mode
) == MODE_INT
15811 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
15813 if (GET_CODE (XEXP (x
, 0)) == PLUS
15814 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
15815 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
15816 && CONSTANT_P (XEXP (x
, 1)))
15818 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
15819 if (val
== 2 || val
== 4 || val
== 8)
15821 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15822 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15823 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
15825 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15829 else if (GET_CODE (XEXP (x
, 0)) == MULT
15830 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
15832 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
15833 if (val
== 2 || val
== 4 || val
== 8)
15835 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15836 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15837 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15841 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
15843 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15844 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15845 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15846 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15853 if (FLOAT_MODE_P (mode
))
15855 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15863 if (!TARGET_64BIT
&& mode
== DImode
)
15865 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
15866 + (rtx_cost (XEXP (x
, 0), outer_code
)
15867 << (GET_MODE (XEXP (x
, 0)) != DImode
))
15868 + (rtx_cost (XEXP (x
, 1), outer_code
)
15869 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
15875 if (FLOAT_MODE_P (mode
))
15877 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
15883 if (!TARGET_64BIT
&& mode
== DImode
)
15884 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
15886 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15890 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
15891 && XEXP (XEXP (x
, 0), 1) == const1_rtx
15892 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
15893 && XEXP (x
, 1) == const0_rtx
)
15895 /* This kind of construct is implemented using test[bwl].
15896 Treat it as if we had an AND. */
15897 *total
= (COSTS_N_INSNS (ix86_cost
->add
)
15898 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
15899 + rtx_cost (const1_rtx
, outer_code
));
15905 if (!TARGET_SSE_MATH
15907 || (mode
== DFmode
&& !TARGET_SSE2
))
15912 if (FLOAT_MODE_P (mode
))
15913 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
15917 if (FLOAT_MODE_P (mode
))
15918 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
15922 if (XINT (x
, 1) == UNSPEC_TP
)
15933 static int current_machopic_label_num
;
15935 /* Given a symbol name and its associated stub, write out the
15936 definition of the stub. */
15939 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
15941 unsigned int length
;
15942 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
15943 int label
= ++current_machopic_label_num
;
15945 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15946 symb
= (*targetm
.strip_name_encoding
) (symb
);
15948 length
= strlen (stub
);
15949 binder_name
= alloca (length
+ 32);
15950 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
15952 length
= strlen (symb
);
15953 symbol_name
= alloca (length
+ 32);
15954 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
15956 sprintf (lazy_ptr_name
, "L%d$lz", label
);
15959 machopic_picsymbol_stub_section ();
15961 machopic_symbol_stub_section ();
15963 fprintf (file
, "%s:\n", stub
);
15964 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15968 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
15969 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
15970 fprintf (file
, "\tjmp %%edx\n");
15973 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
15975 fprintf (file
, "%s:\n", binder_name
);
15979 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
15980 fprintf (file
, "\tpushl %%eax\n");
15983 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
15985 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
15987 machopic_lazy_symbol_ptr_section ();
15988 fprintf (file
, "%s:\n", lazy_ptr_name
);
15989 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15990 fprintf (file
, "\t.long %s\n", binder_name
);
15992 #endif /* TARGET_MACHO */
15994 /* Order the registers for register allocator. */
15997 x86_order_regs_for_local_alloc (void)
16002 /* First allocate the local general purpose registers. */
16003 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
16004 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
16005 reg_alloc_order
[pos
++] = i
;
16007 /* Global general purpose registers. */
16008 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
16009 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
16010 reg_alloc_order
[pos
++] = i
;
16012 /* x87 registers come first in case we are doing FP math
16014 if (!TARGET_SSE_MATH
)
16015 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
16016 reg_alloc_order
[pos
++] = i
;
16018 /* SSE registers. */
16019 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
16020 reg_alloc_order
[pos
++] = i
;
16021 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
16022 reg_alloc_order
[pos
++] = i
;
16024 /* x87 registers. */
16025 if (TARGET_SSE_MATH
)
16026 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
16027 reg_alloc_order
[pos
++] = i
;
16029 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
16030 reg_alloc_order
[pos
++] = i
;
16032 /* Initialize the rest of array as we do not allocate some registers
16034 while (pos
< FIRST_PSEUDO_REGISTER
)
16035 reg_alloc_order
[pos
++] = 0;
16038 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16039 struct attribute_spec.handler. */
16041 ix86_handle_struct_attribute (tree
*node
, tree name
,
16042 tree args ATTRIBUTE_UNUSED
,
16043 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
16046 if (DECL_P (*node
))
16048 if (TREE_CODE (*node
) == TYPE_DECL
)
16049 type
= &TREE_TYPE (*node
);
16054 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
16055 || TREE_CODE (*type
) == UNION_TYPE
)))
16057 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name
));
16058 *no_add_attrs
= true;
16061 else if ((is_attribute_p ("ms_struct", name
)
16062 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
16063 || ((is_attribute_p ("gcc_struct", name
)
16064 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
16066 warning (0, "%qs incompatible attribute ignored",
16067 IDENTIFIER_POINTER (name
));
16068 *no_add_attrs
= true;
16075 ix86_ms_bitfield_layout_p (tree record_type
)
16077 return (TARGET_MS_BITFIELD_LAYOUT
&&
16078 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
16079 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
16082 /* Returns an expression indicating where the this parameter is
16083 located on entry to the FUNCTION. */
16086 x86_this_parameter (tree function
)
16088 tree type
= TREE_TYPE (function
);
16092 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
16093 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
16096 if (ix86_function_regparm (type
, function
) > 0)
16100 parm
= TYPE_ARG_TYPES (type
);
16101 /* Figure out whether or not the function has a variable number of
16103 for (; parm
; parm
= TREE_CHAIN (parm
))
16104 if (TREE_VALUE (parm
) == void_type_node
)
16106 /* If not, the this parameter is in the first argument. */
16110 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
16112 return gen_rtx_REG (SImode
, regno
);
16116 if (aggregate_value_p (TREE_TYPE (type
), type
))
16117 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
16119 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
16122 /* Determine whether x86_output_mi_thunk can succeed. */
16125 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
16126 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
16127 HOST_WIDE_INT vcall_offset
, tree function
)
16129 /* 64-bit can handle anything. */
16133 /* For 32-bit, everything's fine if we have one free register. */
16134 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
16137 /* Need a free register for vcall_offset. */
16141 /* Need a free register for GOT references. */
16142 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
16145 /* Otherwise ok. */
16149 /* Output the assembler code for a thunk function. THUNK_DECL is the
16150 declaration for the thunk function itself, FUNCTION is the decl for
16151 the target function. DELTA is an immediate constant offset to be
16152 added to THIS. If VCALL_OFFSET is nonzero, the word at
16153 *(*this + vcall_offset) should be added to THIS. */
16156 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
16157 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
16158 HOST_WIDE_INT vcall_offset
, tree function
)
16161 rtx
this = x86_this_parameter (function
);
16164 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16165 pull it in now and let DELTA benefit. */
16168 else if (vcall_offset
)
16170 /* Put the this parameter into %eax. */
16172 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
16173 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
16176 this_reg
= NULL_RTX
;
16178 /* Adjust the this parameter by a fixed constant. */
16181 xops
[0] = GEN_INT (delta
);
16182 xops
[1] = this_reg
? this_reg
: this;
16185 if (!x86_64_general_operand (xops
[0], DImode
))
16187 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
16189 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
16193 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
16196 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
16199 /* Adjust the this parameter by a value stored in the vtable. */
16203 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
16206 int tmp_regno
= 2 /* ECX */;
16207 if (lookup_attribute ("fastcall",
16208 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
16209 tmp_regno
= 0 /* EAX */;
16210 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
16213 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
16216 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
16218 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
16220 /* Adjust the this parameter. */
16221 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
16222 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
16224 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
16225 xops
[0] = GEN_INT (vcall_offset
);
16227 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
16228 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
16230 xops
[1] = this_reg
;
16232 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
16234 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
16237 /* If necessary, drop THIS back to its stack slot. */
16238 if (this_reg
&& this_reg
!= this)
16240 xops
[0] = this_reg
;
16242 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
16245 xops
[0] = XEXP (DECL_RTL (function
), 0);
16248 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
16249 output_asm_insn ("jmp\t%P0", xops
);
16252 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
16253 tmp
= gen_rtx_CONST (Pmode
, tmp
);
16254 tmp
= gen_rtx_MEM (QImode
, tmp
);
16256 output_asm_insn ("jmp\t%A0", xops
);
16261 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
16262 output_asm_insn ("jmp\t%P0", xops
);
16267 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
16268 tmp
= (gen_rtx_SYMBOL_REF
16270 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
16271 tmp
= gen_rtx_MEM (QImode
, tmp
);
16273 output_asm_insn ("jmp\t%0", xops
);
16276 #endif /* TARGET_MACHO */
16278 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
16279 output_set_got (tmp
);
16282 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
16283 output_asm_insn ("jmp\t{*}%1", xops
);
16289 x86_file_start (void)
16291 default_file_start ();
16292 if (X86_FILE_START_VERSION_DIRECTIVE
)
16293 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
16294 if (X86_FILE_START_FLTUSED
)
16295 fputs ("\t.global\t__fltused\n", asm_out_file
);
16296 if (ix86_asm_dialect
== ASM_INTEL
)
16297 fputs ("\t.intel_syntax\n", asm_out_file
);
16301 x86_field_alignment (tree field
, int computed
)
16303 enum machine_mode mode
;
16304 tree type
= TREE_TYPE (field
);
16306 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
16308 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
16309 ? get_inner_array_type (type
) : type
);
16310 if (mode
== DFmode
|| mode
== DCmode
16311 || GET_MODE_CLASS (mode
) == MODE_INT
16312 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
16313 return MIN (32, computed
);
16317 /* Output assembler code to FILE to increment profiler label # LABELNO
16318 for profiling a function entry. */
16320 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
16325 #ifndef NO_PROFILE_COUNTERS
16326 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
16328 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
16332 #ifndef NO_PROFILE_COUNTERS
16333 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
16335 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
16339 #ifndef NO_PROFILE_COUNTERS
16340 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16341 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
16343 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
16347 #ifndef NO_PROFILE_COUNTERS
16348 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
16349 PROFILE_COUNT_REGISTER
);
16351 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
16355 /* We don't have exact information about the insn sizes, but we may assume
16356 quite safely that we are informed about all 1 byte insns and memory
16357 address sizes. This is enough to eliminate unnecessary padding in
16361 min_insn_size (rtx insn
)
16365 if (!INSN_P (insn
) || !active_insn_p (insn
))
16368 /* Discard alignments we've emit and jump instructions. */
16369 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
16370 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
16372 if (GET_CODE (insn
) == JUMP_INSN
16373 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
16374 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
16377 /* Important case - calls are always 5 bytes.
16378 It is common to have many calls in the row. */
16379 if (GET_CODE (insn
) == CALL_INSN
16380 && symbolic_reference_mentioned_p (PATTERN (insn
))
16381 && !SIBLING_CALL_P (insn
))
16383 if (get_attr_length (insn
) <= 1)
16386 /* For normal instructions we may rely on the sizes of addresses
16387 and the presence of symbol to require 4 bytes of encoding.
16388 This is not the case for jumps where references are PC relative. */
16389 if (GET_CODE (insn
) != JUMP_INSN
)
16391 l
= get_attr_length_address (insn
);
16392 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
16401 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16405 ix86_avoid_jump_misspredicts (void)
16407 rtx insn
, start
= get_insns ();
16408 int nbytes
= 0, njumps
= 0;
16411 /* Look for all minimal intervals of instructions containing 4 jumps.
16412 The intervals are bounded by START and INSN. NBYTES is the total
16413 size of instructions in the interval including INSN and not including
16414 START. When the NBYTES is smaller than 16 bytes, it is possible
16415 that the end of START and INSN ends up in the same 16byte page.
16417 The smallest offset in the page INSN can start is the case where START
16418 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16419 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16421 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
16424 nbytes
+= min_insn_size (insn
);
16426 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
16427 INSN_UID (insn
), min_insn_size (insn
));
16428 if ((GET_CODE (insn
) == JUMP_INSN
16429 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
16430 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
16431 || GET_CODE (insn
) == CALL_INSN
)
16438 start
= NEXT_INSN (start
);
16439 if ((GET_CODE (start
) == JUMP_INSN
16440 && GET_CODE (PATTERN (start
)) != ADDR_VEC
16441 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
16442 || GET_CODE (start
) == CALL_INSN
)
16443 njumps
--, isjump
= 1;
16446 nbytes
-= min_insn_size (start
);
16448 gcc_assert (njumps
>= 0);
16450 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
16451 INSN_UID (start
), INSN_UID (insn
), nbytes
);
16453 if (njumps
== 3 && isjump
&& nbytes
< 16)
16455 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
16458 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
16459 INSN_UID (insn
), padsize
);
16460 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
16465 /* AMD Athlon works faster
16466 when RET is not destination of conditional jump or directly preceded
16467 by other jump instruction. We avoid the penalty by inserting NOP just
16468 before the RET instructions in such cases. */
16470 ix86_pad_returns (void)
16475 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
16477 basic_block bb
= e
->src
;
16478 rtx ret
= BB_END (bb
);
16480 bool replace
= false;
16482 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
16483 || !maybe_hot_bb_p (bb
))
16485 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
16486 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
16488 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
16493 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16494 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
16495 && !(e
->flags
& EDGE_FALLTHRU
))
16500 prev
= prev_active_insn (ret
);
16502 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
16503 || GET_CODE (prev
) == CALL_INSN
))
16505 /* Empty functions get branch mispredict even when the jump destination
16506 is not visible to us. */
16507 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
16512 emit_insn_before (gen_return_internal_long (), ret
);
16518 /* Implement machine specific optimizations. We implement padding of returns
16519 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16523 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
16524 ix86_pad_returns ();
16525 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
16526 ix86_avoid_jump_misspredicts ();
16529 /* Return nonzero when QImode register that must be represented via REX prefix
16532 x86_extended_QIreg_mentioned_p (rtx insn
)
16535 extract_insn_cached (insn
);
16536 for (i
= 0; i
< recog_data
.n_operands
; i
++)
16537 if (REG_P (recog_data
.operand
[i
])
16538 && REGNO (recog_data
.operand
[i
]) >= 4)
16543 /* Return nonzero when P points to register encoded via REX prefix.
16544 Called via for_each_rtx. */
16546 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
16548 unsigned int regno
;
16551 regno
= REGNO (*p
);
16552 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
16555 /* Return true when INSN mentions register that must be encoded using REX
16558 x86_extended_reg_mentioned_p (rtx insn
)
16560 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
16563 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16564 optabs would emit if we didn't have TFmode patterns. */
16567 x86_emit_floatuns (rtx operands
[2])
16569 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
16570 enum machine_mode mode
, inmode
;
16572 inmode
= GET_MODE (operands
[1]);
16573 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
16576 in
= force_reg (inmode
, operands
[1]);
16577 mode
= GET_MODE (out
);
16578 neglab
= gen_label_rtx ();
16579 donelab
= gen_label_rtx ();
16580 i1
= gen_reg_rtx (Pmode
);
16581 f0
= gen_reg_rtx (mode
);
16583 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
16585 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
16586 emit_jump_insn (gen_jump (donelab
));
16589 emit_label (neglab
);
16591 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
16592 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
16593 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
16594 expand_float (f0
, i0
, 0);
16595 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
16597 emit_label (donelab
);
16600 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16601 with all elements equal to VAR. Return true if successful. */
16604 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
16605 rtx target
, rtx val
)
16607 enum machine_mode smode
, wsmode
, wvmode
;
16614 if (!mmx_ok
&& !TARGET_SSE
)
16622 val
= force_reg (GET_MODE_INNER (mode
), val
);
16623 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
16624 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16630 if (TARGET_SSE
|| TARGET_3DNOW_A
)
16632 val
= gen_lowpart (SImode
, val
);
16633 x
= gen_rtx_TRUNCATE (HImode
, val
);
16634 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
16635 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16664 /* Replicate the value once into the next wider mode and recurse. */
16665 val
= convert_modes (wsmode
, smode
, val
, true);
16666 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
16667 GEN_INT (GET_MODE_BITSIZE (smode
)),
16668 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16669 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16671 x
= gen_reg_rtx (wvmode
);
16672 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
16673 gcc_unreachable ();
16674 emit_move_insn (target
, gen_lowpart (mode
, x
));
16682 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16683 whose low element is VAR, and other elements are zero. Return true
16687 ix86_expand_vector_init_low_nonzero (bool mmx_ok
, enum machine_mode mode
,
16688 rtx target
, rtx var
)
16690 enum machine_mode vsimode
;
16697 if (!mmx_ok
&& !TARGET_SSE
)
16703 var
= force_reg (GET_MODE_INNER (mode
), var
);
16704 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
16705 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16710 var
= force_reg (GET_MODE_INNER (mode
), var
);
16711 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
16712 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
16713 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
16718 vsimode
= V4SImode
;
16724 vsimode
= V2SImode
;
16727 /* Zero extend the variable element to SImode and recurse. */
16728 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
16730 x
= gen_reg_rtx (vsimode
);
16731 if (!ix86_expand_vector_init_low_nonzero (mmx_ok
, vsimode
, x
, var
))
16732 gcc_unreachable ();
16734 emit_move_insn (target
, gen_lowpart (mode
, x
));
16742 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16743 consisting of the values in VALS. It is known that all elements
16744 except ONE_VAR are constants. Return true if successful. */
16747 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
16748 rtx target
, rtx vals
, int one_var
)
16750 rtx var
= XVECEXP (vals
, 0, one_var
);
16751 enum machine_mode wmode
;
16754 XVECEXP (vals
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
16755 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
16763 /* For the two element vectors, it's just as easy to use
16764 the general case. */
16780 /* There's no way to set one QImode entry easily. Combine
16781 the variable value with its adjacent constant value, and
16782 promote to an HImode set. */
16783 x
= XVECEXP (vals
, 0, one_var
^ 1);
16786 var
= convert_modes (HImode
, QImode
, var
, true);
16787 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
16788 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16789 x
= GEN_INT (INTVAL (x
) & 0xff);
16793 var
= convert_modes (HImode
, QImode
, var
, true);
16794 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
16796 if (x
!= const0_rtx
)
16797 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
16798 1, OPTAB_LIB_WIDEN
);
16800 x
= gen_reg_rtx (wmode
);
16801 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
16802 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
16804 emit_move_insn (target
, gen_lowpart (mode
, x
));
16811 emit_move_insn (target
, const_vec
);
16812 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
16816 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16817 all values variable, and none identical. */
16820 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
16821 rtx target
, rtx vals
)
16823 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
16824 rtx op0
= NULL
, op1
= NULL
;
16825 bool use_vec_concat
= false;
16831 if (!mmx_ok
&& !TARGET_SSE
)
16837 /* For the two element vectors, we always implement VEC_CONCAT. */
16838 op0
= XVECEXP (vals
, 0, 0);
16839 op1
= XVECEXP (vals
, 0, 1);
16840 use_vec_concat
= true;
16844 half_mode
= V2SFmode
;
16847 half_mode
= V2SImode
;
16853 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16854 Recurse to load the two halves. */
16856 op0
= gen_reg_rtx (half_mode
);
16857 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
16858 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
16860 op1
= gen_reg_rtx (half_mode
);
16861 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
16862 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
16864 use_vec_concat
= true;
16875 gcc_unreachable ();
16878 if (use_vec_concat
)
16880 if (!register_operand (op0
, half_mode
))
16881 op0
= force_reg (half_mode
, op0
);
16882 if (!register_operand (op1
, half_mode
))
16883 op1
= force_reg (half_mode
, op1
);
16885 emit_insn (gen_rtx_SET (VOIDmode
, target
,
16886 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
16890 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
16891 enum machine_mode inner_mode
;
16892 rtx words
[4], shift
;
16894 inner_mode
= GET_MODE_INNER (mode
);
16895 n_elts
= GET_MODE_NUNITS (mode
);
16896 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
16897 n_elt_per_word
= n_elts
/ n_words
;
16898 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
16900 for (i
= 0; i
< n_words
; ++i
)
16902 rtx word
= NULL_RTX
;
16904 for (j
= 0; j
< n_elt_per_word
; ++j
)
16906 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
16907 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
16913 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
16914 word
, 1, OPTAB_LIB_WIDEN
);
16915 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
16916 word
, 1, OPTAB_LIB_WIDEN
);
16924 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
16925 else if (n_words
== 2)
16927 rtx tmp
= gen_reg_rtx (mode
);
16928 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
16929 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
16930 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
16931 emit_move_insn (target
, tmp
);
16933 else if (n_words
== 4)
16935 rtx tmp
= gen_reg_rtx (V4SImode
);
16936 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
16937 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
16938 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
16941 gcc_unreachable ();
16945 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16946 instructions unless MMX_OK is true. */
16949 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
16951 enum machine_mode mode
= GET_MODE (target
);
16952 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
16953 int n_elts
= GET_MODE_NUNITS (mode
);
16954 int n_var
= 0, one_var
= -1;
16955 bool all_same
= true, all_const_zero
= true;
16959 for (i
= 0; i
< n_elts
; ++i
)
16961 x
= XVECEXP (vals
, 0, i
);
16962 if (!CONSTANT_P (x
))
16963 n_var
++, one_var
= i
;
16964 else if (x
!= CONST0_RTX (inner_mode
))
16965 all_const_zero
= false;
16966 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
16970 /* Constants are best loaded from the constant pool. */
16973 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
16977 /* If all values are identical, broadcast the value. */
16979 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
16980 XVECEXP (vals
, 0, 0)))
16983 /* Values where only one field is non-constant are best loaded from
16984 the pool and overwritten via move later. */
16987 if (all_const_zero
&& one_var
== 0
16988 && ix86_expand_vector_init_low_nonzero (mmx_ok
, mode
, target
,
16989 XVECEXP (vals
, 0, 0)))
16992 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
16996 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
17000 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
17002 enum machine_mode mode
= GET_MODE (target
);
17003 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
17004 bool use_vec_merge
= false;
17013 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
17014 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
17016 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
17018 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
17019 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17029 /* For the two element vectors, we implement a VEC_CONCAT with
17030 the extraction of the other element. */
17032 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
17033 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
17036 op0
= val
, op1
= tmp
;
17038 op0
= tmp
, op1
= val
;
17040 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
17041 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17049 use_vec_merge
= true;
17053 /* tmp = target = A B C D */
17054 tmp
= copy_to_reg (target
);
17055 /* target = A A B B */
17056 emit_insn (gen_sse_unpcklps (target
, target
, target
));
17057 /* target = X A B B */
17058 ix86_expand_vector_set (false, target
, val
, 0);
17059 /* target = A X C D */
17060 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
17061 GEN_INT (1), GEN_INT (0),
17062 GEN_INT (2+4), GEN_INT (3+4)));
17066 /* tmp = target = A B C D */
17067 tmp
= copy_to_reg (target
);
17068 /* tmp = X B C D */
17069 ix86_expand_vector_set (false, tmp
, val
, 0);
17070 /* target = A B X D */
17071 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
17072 GEN_INT (0), GEN_INT (1),
17073 GEN_INT (0+4), GEN_INT (3+4)));
17077 /* tmp = target = A B C D */
17078 tmp
= copy_to_reg (target
);
17079 /* tmp = X B C D */
17080 ix86_expand_vector_set (false, tmp
, val
, 0);
17081 /* target = A B X D */
17082 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
17083 GEN_INT (0), GEN_INT (1),
17084 GEN_INT (2+4), GEN_INT (0+4)));
17088 gcc_unreachable ();
17093 /* Element 0 handled by vec_merge below. */
17096 use_vec_merge
= true;
17102 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17103 store into element 0, then shuffle them back. */
17107 order
[0] = GEN_INT (elt
);
17108 order
[1] = const1_rtx
;
17109 order
[2] = const2_rtx
;
17110 order
[3] = GEN_INT (3);
17111 order
[elt
] = const0_rtx
;
17113 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
17114 order
[1], order
[2], order
[3]));
17116 ix86_expand_vector_set (false, target
, val
, 0);
17118 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
17119 order
[1], order
[2], order
[3]));
17123 /* For SSE1, we have to reuse the V4SF code. */
17124 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
17125 gen_lowpart (SFmode
, val
), elt
);
17130 use_vec_merge
= TARGET_SSE2
;
17133 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
17144 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
17145 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
17146 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17150 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
17152 emit_move_insn (mem
, target
);
17154 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
17155 emit_move_insn (tmp
, val
);
17157 emit_move_insn (target
, mem
);
17162 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
17164 enum machine_mode mode
= GET_MODE (vec
);
17165 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
17166 bool use_vec_extr
= false;
17179 use_vec_extr
= true;
17191 tmp
= gen_reg_rtx (mode
);
17192 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
17193 GEN_INT (elt
), GEN_INT (elt
),
17194 GEN_INT (elt
+4), GEN_INT (elt
+4)));
17198 tmp
= gen_reg_rtx (mode
);
17199 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
17203 gcc_unreachable ();
17206 use_vec_extr
= true;
17221 tmp
= gen_reg_rtx (mode
);
17222 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
17223 GEN_INT (elt
), GEN_INT (elt
),
17224 GEN_INT (elt
), GEN_INT (elt
)));
17228 tmp
= gen_reg_rtx (mode
);
17229 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
17233 gcc_unreachable ();
17236 use_vec_extr
= true;
17241 /* For SSE1, we have to reuse the V4SF code. */
17242 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
17243 gen_lowpart (V4SFmode
, vec
), elt
);
17249 use_vec_extr
= TARGET_SSE2
;
17252 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
17257 /* ??? Could extract the appropriate HImode element and shift. */
17264 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
17265 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
17267 /* Let the rtl optimizers know about the zero extension performed. */
17268 if (inner_mode
== HImode
)
17270 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
17271 target
= gen_lowpart (SImode
, target
);
17274 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17278 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
17280 emit_move_insn (mem
, vec
);
17282 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
17283 emit_move_insn (target
, tmp
);
17287 /* Implements target hook vector_mode_supported_p. */
17289 ix86_vector_mode_supported_p (enum machine_mode mode
)
17291 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
17293 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
17295 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
17297 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
17302 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17304 We do this in the new i386 backend to maintain source compatibility
17305 with the old cc0-based compiler. */
17308 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
17309 tree inputs ATTRIBUTE_UNUSED
,
17312 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
17314 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
17316 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
17321 /* Worker function for REVERSE_CONDITION. */
17324 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
17326 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
17327 ? reverse_condition (code
)
17328 : reverse_condition_maybe_unordered (code
));
17331 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17335 output_387_reg_move (rtx insn
, rtx
*operands
)
17337 if (REG_P (operands
[1])
17338 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
17340 if (REGNO (operands
[0]) == FIRST_STACK_REG
17341 && TARGET_USE_FFREEP
)
17342 return "ffreep\t%y0";
17343 return "fstp\t%y0";
17345 if (STACK_TOP_P (operands
[0]))
17346 return "fld%z1\t%y1";
17350 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17351 FP status register is set. */
17354 ix86_emit_fp_unordered_jump (rtx label
)
17356 rtx reg
= gen_reg_rtx (HImode
);
17359 emit_insn (gen_x86_fnstsw_1 (reg
));
17361 if (TARGET_USE_SAHF
)
17363 emit_insn (gen_x86_sahf_1 (reg
));
17365 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
17366 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
17370 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
17372 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17373 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
17376 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
17377 gen_rtx_LABEL_REF (VOIDmode
, label
),
17379 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
17380 emit_jump_insn (temp
);
17383 /* Output code to perform a log1p XFmode calculation. */
17385 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
17387 rtx label1
= gen_label_rtx ();
17388 rtx label2
= gen_label_rtx ();
17390 rtx tmp
= gen_reg_rtx (XFmode
);
17391 rtx tmp2
= gen_reg_rtx (XFmode
);
17393 emit_insn (gen_absxf2 (tmp
, op1
));
17394 emit_insn (gen_cmpxf (tmp
,
17395 CONST_DOUBLE_FROM_REAL_VALUE (
17396 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
17398 emit_jump_insn (gen_bge (label1
));
17400 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
17401 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
17402 emit_jump (label2
);
17404 emit_label (label1
);
17405 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
17406 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
17407 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
17408 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
17410 emit_label (label2
);
17413 /* Solaris named-section hook. Parameters are as for
17414 named_section_real. */
17417 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
17420 /* With Binutils 2.15, the "@unwind" marker must be specified on
17421 every occurrence of the ".eh_frame" section, not just the first
17424 && strcmp (name
, ".eh_frame") == 0)
17426 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
17427 flags
& SECTION_WRITE
? "aw" : "a");
17430 default_elf_asm_named_section (name
, flags
, decl
);
17433 #include "gt-i386.h"