1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_3dnow_a
= m_ATHLON_K8
;
529 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
530 const int x86_branch_hints
= m_PENT4
| m_NOCONA
;
531 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
532 const int x86_partial_reg_stall
= m_PPRO
;
533 const int x86_use_loop
= m_K6
;
534 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
535 const int x86_use_mov0
= m_K6
;
536 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
537 const int x86_read_modify_write
= ~m_PENT
;
538 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
539 const int x86_split_long_moves
= m_PPRO
;
540 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
541 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
542 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
543 const int x86_qimode_math
= ~(0);
544 const int x86_promote_qi_regs
= 0;
545 const int x86_himode_math
= ~(m_PPRO
);
546 const int x86_promote_hi_regs
= m_PPRO
;
547 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
548 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
549 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
550 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
551 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
552 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
553 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
554 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
555 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
556 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
557 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
558 const int x86_shift1
= ~m_486
;
559 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
560 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs
= m_ATHLON_K8
;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
568 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
569 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
570 const int x86_use_ffreep
= m_ATHLON_K8
;
571 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
572 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
573 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
585 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
586 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
594 AREG
, DREG
, CREG
, BREG
,
596 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
598 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
599 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
604 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
606 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
608 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
609 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
610 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers
[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
633 static int const x86_64_int_return_registers
[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0
= NULL_RTX
;
719 rtx ix86_compare_op1
= NULL_RTX
;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry
GTY(())
732 struct stack_local_entry
*next
;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size
;
764 HOST_WIDE_INT to_allocate
;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset
;
767 HOST_WIDE_INT hard_frame_pointer_offset
;
768 HOST_WIDE_INT stack_pointer_offset
;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov
;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string
;
780 enum cmodel ix86_cmodel
;
782 const char *ix86_asm_string
;
783 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
785 const char *ix86_tls_dialect_string
;
786 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath
;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune
;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch
;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string
; /* for -mtune=<xxx> */
798 const char *ix86_arch_string
; /* for -march=<xxx> */
799 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string
;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse
;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string
;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string
;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string
;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary
;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost
;
826 const char *ix86_branch_cost_string
;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string
;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 char internal_label_prefix
[16];
833 int internal_label_prefix_len
;
835 static void output_pic_addr_const (FILE *, rtx
, int);
836 static void put_condition_code (enum rtx_code
, enum machine_mode
,
838 static const char *get_some_local_dynamic_name (void);
839 static int get_some_local_dynamic_name_1 (rtx
*, void *);
840 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
841 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
843 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
844 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
846 static rtx
get_thread_pointer (int);
847 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
848 static void get_pc_thunk_name (char [32], unsigned int);
849 static rtx
gen_push (rtx
);
850 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
851 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
852 static struct machine_function
* ix86_init_machine_status (void);
853 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
854 static int ix86_nsaved_regs (void);
855 static void ix86_emit_save_regs (void);
856 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
857 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
858 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
859 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
860 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
861 static rtx
ix86_expand_aligntest (rtx
, int);
862 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
863 static int ix86_issue_rate (void);
864 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
865 static int ia32_multipass_dfa_lookahead (void);
866 static void ix86_init_mmx_sse_builtins (void);
867 static rtx
x86_this_parameter (tree
);
868 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
869 HOST_WIDE_INT
, tree
);
870 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
871 static void x86_file_start (void);
872 static void ix86_reorg (void);
873 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
874 static tree
ix86_build_builtin_va_list (void);
875 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
877 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
878 static bool ix86_vector_mode_supported_p (enum machine_mode
);
880 static int ix86_address_cost (rtx
);
881 static bool ix86_cannot_force_const_mem (rtx
);
882 static rtx
ix86_delegitimize_address (rtx
);
884 struct builtin_description
;
885 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
887 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
889 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
890 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
891 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
892 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
893 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
894 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
895 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
896 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
897 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
898 static int ix86_fp_comparison_cost (enum rtx_code code
);
899 static unsigned int ix86_select_alt_pic_regnum (void);
900 static int ix86_save_reg (unsigned int, int);
901 static void ix86_compute_frame_layout (struct ix86_frame
*);
902 static int ix86_comp_type_attributes (tree
, tree
);
903 static int ix86_function_regparm (tree
, tree
);
904 const struct attribute_spec ix86_attribute_table
[];
905 static bool ix86_function_ok_for_sibcall (tree
, tree
);
906 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
907 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
908 static int ix86_value_regno (enum machine_mode
);
909 static bool contains_128bit_aligned_vector_p (tree
);
910 static rtx
ix86_struct_value_rtx (tree
, int);
911 static bool ix86_ms_bitfield_layout_p (tree
);
912 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
913 static int extended_reg_mentioned_1 (rtx
*, void *);
914 static bool ix86_rtx_costs (rtx
, int, int, int *);
915 static int min_insn_size (rtx
);
916 static tree
ix86_md_asm_clobbers (tree clobbers
);
917 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
918 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
921 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
922 static void ix86_svr3_asm_out_constructor (rtx
, int);
925 /* Register class used for passing given 64bit part of the argument.
926 These represent classes as documented by the PS ABI, with the exception
927 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
928 use SF or DFmode move instead of DImode to avoid reformatting penalties.
930 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
931 whenever possible (upper half does contain padding).
933 enum x86_64_reg_class
936 X86_64_INTEGER_CLASS
,
937 X86_64_INTEGERSI_CLASS
,
946 static const char * const x86_64_reg_class_name
[] =
947 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
949 #define MAX_CLASSES 4
950 static int classify_argument (enum machine_mode
, tree
,
951 enum x86_64_reg_class
[MAX_CLASSES
], int);
952 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
953 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
955 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
956 enum x86_64_reg_class
);
958 /* Table of constants used by fldpi, fldln2, etc.... */
959 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
960 static bool ext_80387_constants_init
= 0;
961 static void init_ext_80387_constants (void);
963 /* Initialize the GCC target structure. */
964 #undef TARGET_ATTRIBUTE_TABLE
965 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967 # undef TARGET_MERGE_DECL_ATTRIBUTES
968 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
971 #undef TARGET_COMP_TYPE_ATTRIBUTES
972 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
974 #undef TARGET_INIT_BUILTINS
975 #define TARGET_INIT_BUILTINS ix86_init_builtins
977 #undef TARGET_EXPAND_BUILTIN
978 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
980 #undef TARGET_ASM_FUNCTION_EPILOGUE
981 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
983 #undef TARGET_ASM_OPEN_PAREN
984 #define TARGET_ASM_OPEN_PAREN ""
985 #undef TARGET_ASM_CLOSE_PAREN
986 #define TARGET_ASM_CLOSE_PAREN ""
988 #undef TARGET_ASM_ALIGNED_HI_OP
989 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
990 #undef TARGET_ASM_ALIGNED_SI_OP
991 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
993 #undef TARGET_ASM_ALIGNED_DI_OP
994 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
997 #undef TARGET_ASM_UNALIGNED_HI_OP
998 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
999 #undef TARGET_ASM_UNALIGNED_SI_OP
1000 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1001 #undef TARGET_ASM_UNALIGNED_DI_OP
1002 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1004 #undef TARGET_SCHED_ADJUST_COST
1005 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1006 #undef TARGET_SCHED_ISSUE_RATE
1007 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1008 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1009 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1010 ia32_multipass_dfa_lookahead
1012 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1013 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1016 #undef TARGET_HAVE_TLS
1017 #define TARGET_HAVE_TLS true
1019 #undef TARGET_CANNOT_FORCE_CONST_MEM
1020 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1022 #undef TARGET_DELEGITIMIZE_ADDRESS
1023 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1025 #undef TARGET_MS_BITFIELD_LAYOUT_P
1026 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1028 #undef TARGET_ASM_OUTPUT_MI_THUNK
1029 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1030 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1031 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1033 #undef TARGET_ASM_FILE_START
1034 #define TARGET_ASM_FILE_START x86_file_start
1036 #undef TARGET_RTX_COSTS
1037 #define TARGET_RTX_COSTS ix86_rtx_costs
1038 #undef TARGET_ADDRESS_COST
1039 #define TARGET_ADDRESS_COST ix86_address_cost
1041 #undef TARGET_FIXED_CONDITION_CODE_REGS
1042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043 #undef TARGET_CC_MODES_COMPATIBLE
1044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046 #undef TARGET_MACHINE_DEPENDENT_REORG
1047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049 #undef TARGET_BUILD_BUILTIN_VA_LIST
1050 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052 #undef TARGET_MD_ASM_CLOBBERS
1053 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055 #undef TARGET_PROMOTE_PROTOTYPES
1056 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1057 #undef TARGET_STRUCT_VALUE_RTX
1058 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1059 #undef TARGET_SETUP_INCOMING_VARARGS
1060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1061 #undef TARGET_MUST_PASS_IN_STACK
1062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1063 #undef TARGET_PASS_BY_REFERENCE
1064 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1066 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1070 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1072 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1073 #undef TARGET_INSERT_ATTRIBUTES
1074 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1077 struct gcc_target targetm
= TARGET_INITIALIZER
;
1080 /* The svr4 ABI for the i386 says that records and unions are returned
1082 #ifndef DEFAULT_PCC_STRUCT_RETURN
1083 #define DEFAULT_PCC_STRUCT_RETURN 1
1086 /* Sometimes certain combinations of command options do not make
1087 sense on a particular target machine. You can define a macro
1088 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1089 defined, is executed once just after all the command options have
1092 Don't use this macro to turn on various extra optimizations for
1093 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1096 override_options (void)
1099 int ix86_tune_defaulted
= 0;
1101 /* Comes from final.c -- no real reason to change it. */
1102 #define MAX_CODE_ALIGN 16
1106 const struct processor_costs
*cost
; /* Processor costs */
1107 const int target_enable
; /* Target flags to enable. */
1108 const int target_disable
; /* Target flags to disable. */
1109 const int align_loop
; /* Default alignments. */
1110 const int align_loop_max_skip
;
1111 const int align_jump
;
1112 const int align_jump_max_skip
;
1113 const int align_func
;
1115 const processor_target_table
[PROCESSOR_max
] =
1117 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1118 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1119 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1120 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1121 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1122 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1123 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1124 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1125 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1128 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1131 const char *const name
; /* processor name or nickname. */
1132 const enum processor_type processor
;
1133 const enum pta_flags
1139 PTA_PREFETCH_SSE
= 16,
1145 const processor_alias_table
[] =
1147 {"i386", PROCESSOR_I386
, 0},
1148 {"i486", PROCESSOR_I486
, 0},
1149 {"i586", PROCESSOR_PENTIUM
, 0},
1150 {"pentium", PROCESSOR_PENTIUM
, 0},
1151 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1152 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1153 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1154 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1155 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1156 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1157 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1158 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1159 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1160 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1161 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1162 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1163 | PTA_MMX
| PTA_PREFETCH_SSE
},
1164 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1165 | PTA_MMX
| PTA_PREFETCH_SSE
},
1166 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1167 | PTA_MMX
| PTA_PREFETCH_SSE
},
1168 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1169 | PTA_MMX
| PTA_PREFETCH_SSE
},
1170 {"k6", PROCESSOR_K6
, PTA_MMX
},
1171 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1172 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1173 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1175 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1176 | PTA_3DNOW
| PTA_3DNOW_A
},
1177 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1178 | PTA_3DNOW_A
| PTA_SSE
},
1179 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1180 | PTA_3DNOW_A
| PTA_SSE
},
1181 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1182 | PTA_3DNOW_A
| PTA_SSE
},
1183 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1184 | PTA_SSE
| PTA_SSE2
},
1185 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1186 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1187 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1188 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1189 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1190 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1191 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1192 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1195 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1197 /* Set the default values for switches whose default depends on TARGET_64BIT
1198 in case they weren't overwritten by command line options. */
1201 if (flag_omit_frame_pointer
== 2)
1202 flag_omit_frame_pointer
= 1;
1203 if (flag_asynchronous_unwind_tables
== 2)
1204 flag_asynchronous_unwind_tables
= 1;
1205 if (flag_pcc_struct_return
== 2)
1206 flag_pcc_struct_return
= 0;
1210 if (flag_omit_frame_pointer
== 2)
1211 flag_omit_frame_pointer
= 0;
1212 if (flag_asynchronous_unwind_tables
== 2)
1213 flag_asynchronous_unwind_tables
= 0;
1214 if (flag_pcc_struct_return
== 2)
1215 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1218 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1219 SUBTARGET_OVERRIDE_OPTIONS
;
1222 if (!ix86_tune_string
&& ix86_arch_string
)
1223 ix86_tune_string
= ix86_arch_string
;
1224 if (!ix86_tune_string
)
1226 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1227 ix86_tune_defaulted
= 1;
1229 if (!ix86_arch_string
)
1230 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1232 if (ix86_cmodel_string
!= 0)
1234 if (!strcmp (ix86_cmodel_string
, "small"))
1235 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1237 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1238 else if (!strcmp (ix86_cmodel_string
, "32"))
1239 ix86_cmodel
= CM_32
;
1240 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1241 ix86_cmodel
= CM_KERNEL
;
1242 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1243 ix86_cmodel
= CM_MEDIUM
;
1244 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1245 ix86_cmodel
= CM_LARGE
;
1247 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1251 ix86_cmodel
= CM_32
;
1253 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1255 if (ix86_asm_string
!= 0)
1257 if (!strcmp (ix86_asm_string
, "intel"))
1258 ix86_asm_dialect
= ASM_INTEL
;
1259 else if (!strcmp (ix86_asm_string
, "att"))
1260 ix86_asm_dialect
= ASM_ATT
;
1262 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1264 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1265 error ("code model `%s' not supported in the %s bit mode",
1266 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1267 if (ix86_cmodel
== CM_LARGE
)
1268 sorry ("code model `large' not supported yet");
1269 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1270 sorry ("%i-bit mode not compiled in",
1271 (target_flags
& MASK_64BIT
) ? 64 : 32);
1273 for (i
= 0; i
< pta_size
; i
++)
1274 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1276 ix86_arch
= processor_alias_table
[i
].processor
;
1277 /* Default cpu tuning to the architecture. */
1278 ix86_tune
= ix86_arch
;
1279 if (processor_alias_table
[i
].flags
& PTA_MMX
1280 && !(target_flags_explicit
& MASK_MMX
))
1281 target_flags
|= MASK_MMX
;
1282 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1283 && !(target_flags_explicit
& MASK_3DNOW
))
1284 target_flags
|= MASK_3DNOW
;
1285 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1286 && !(target_flags_explicit
& MASK_3DNOW_A
))
1287 target_flags
|= MASK_3DNOW_A
;
1288 if (processor_alias_table
[i
].flags
& PTA_SSE
1289 && !(target_flags_explicit
& MASK_SSE
))
1290 target_flags
|= MASK_SSE
;
1291 if (processor_alias_table
[i
].flags
& PTA_SSE2
1292 && !(target_flags_explicit
& MASK_SSE2
))
1293 target_flags
|= MASK_SSE2
;
1294 if (processor_alias_table
[i
].flags
& PTA_SSE3
1295 && !(target_flags_explicit
& MASK_SSE3
))
1296 target_flags
|= MASK_SSE3
;
1297 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1298 x86_prefetch_sse
= true;
1299 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1301 if (ix86_tune_defaulted
)
1303 ix86_tune_string
= "x86-64";
1304 for (i
= 0; i
< pta_size
; i
++)
1305 if (! strcmp (ix86_tune_string
,
1306 processor_alias_table
[i
].name
))
1308 ix86_tune
= processor_alias_table
[i
].processor
;
1311 error ("CPU you selected does not support x86-64 "
1318 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1320 for (i
= 0; i
< pta_size
; i
++)
1321 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1323 ix86_tune
= processor_alias_table
[i
].processor
;
1324 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1325 error ("CPU you selected does not support x86-64 instruction set");
1327 /* Intel CPUs have always interpreted SSE prefetch instructions as
1328 NOPs; so, we can enable SSE prefetch instructions even when
1329 -mtune (rather than -march) points us to a processor that has them.
1330 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1331 higher processors. */
1332 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1333 x86_prefetch_sse
= true;
1337 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1340 ix86_cost
= &size_cost
;
1342 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1343 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1344 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1346 /* Arrange to set up i386_stack_locals for all functions. */
1347 init_machine_status
= ix86_init_machine_status
;
1349 /* Validate -mregparm= value. */
1350 if (ix86_regparm_string
)
1352 i
= atoi (ix86_regparm_string
);
1353 if (i
< 0 || i
> REGPARM_MAX
)
1354 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1360 ix86_regparm
= REGPARM_MAX
;
1362 /* If the user has provided any of the -malign-* options,
1363 warn and use that value only if -falign-* is not set.
1364 Remove this code in GCC 3.2 or later. */
1365 if (ix86_align_loops_string
)
1367 warning ("-malign-loops is obsolete, use -falign-loops");
1368 if (align_loops
== 0)
1370 i
= atoi (ix86_align_loops_string
);
1371 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1372 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1374 align_loops
= 1 << i
;
1378 if (ix86_align_jumps_string
)
1380 warning ("-malign-jumps is obsolete, use -falign-jumps");
1381 if (align_jumps
== 0)
1383 i
= atoi (ix86_align_jumps_string
);
1384 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1385 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1387 align_jumps
= 1 << i
;
1391 if (ix86_align_funcs_string
)
1393 warning ("-malign-functions is obsolete, use -falign-functions");
1394 if (align_functions
== 0)
1396 i
= atoi (ix86_align_funcs_string
);
1397 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1398 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1400 align_functions
= 1 << i
;
1404 /* Default align_* from the processor table. */
1405 if (align_loops
== 0)
1407 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1408 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1410 if (align_jumps
== 0)
1412 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1413 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1415 if (align_functions
== 0)
1417 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1420 /* Validate -mpreferred-stack-boundary= value, or provide default.
1421 The default of 128 bits is for Pentium III's SSE __m128, but we
1422 don't want additional code to keep the stack aligned when
1423 optimizing for code size. */
1424 ix86_preferred_stack_boundary
= (optimize_size
1425 ? TARGET_64BIT
? 128 : 32
1427 if (ix86_preferred_stack_boundary_string
)
1429 i
= atoi (ix86_preferred_stack_boundary_string
);
1430 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1431 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1432 TARGET_64BIT
? 4 : 2);
1434 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1437 /* Validate -mbranch-cost= value, or provide default. */
1438 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1439 if (ix86_branch_cost_string
)
1441 i
= atoi (ix86_branch_cost_string
);
1443 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1445 ix86_branch_cost
= i
;
1448 if (ix86_tls_dialect_string
)
1450 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1451 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1452 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1453 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1455 error ("bad value (%s) for -mtls-dialect= switch",
1456 ix86_tls_dialect_string
);
1459 /* Keep nonleaf frame pointers. */
1460 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1461 flag_omit_frame_pointer
= 1;
1463 /* If we're doing fast math, we don't care about comparison order
1464 wrt NaNs. This lets us use a shorter comparison sequence. */
1465 if (flag_unsafe_math_optimizations
)
1466 target_flags
&= ~MASK_IEEE_FP
;
1468 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1469 since the insns won't need emulation. */
1470 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1471 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1473 /* Turn on SSE2 builtins for -msse3. */
1475 target_flags
|= MASK_SSE2
;
1477 /* Turn on SSE builtins for -msse2. */
1479 target_flags
|= MASK_SSE
;
1483 if (TARGET_ALIGN_DOUBLE
)
1484 error ("-malign-double makes no sense in the 64bit mode");
1486 error ("-mrtd calling convention not supported in the 64bit mode");
1487 /* Enable by default the SSE and MMX builtins. */
1488 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1489 ix86_fpmath
= FPMATH_SSE
;
1493 ix86_fpmath
= FPMATH_387
;
1494 /* i386 ABI does not specify red zone. It still makes sense to use it
1495 when programmer takes care to stack from being destroyed. */
1496 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1497 target_flags
|= MASK_NO_RED_ZONE
;
1500 if (ix86_fpmath_string
!= 0)
1502 if (! strcmp (ix86_fpmath_string
, "387"))
1503 ix86_fpmath
= FPMATH_387
;
1504 else if (! strcmp (ix86_fpmath_string
, "sse"))
1508 warning ("SSE instruction set disabled, using 387 arithmetics");
1509 ix86_fpmath
= FPMATH_387
;
1512 ix86_fpmath
= FPMATH_SSE
;
1514 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1515 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1519 warning ("SSE instruction set disabled, using 387 arithmetics");
1520 ix86_fpmath
= FPMATH_387
;
1522 else if (!TARGET_80387
)
1524 warning ("387 instruction set disabled, using SSE arithmetics");
1525 ix86_fpmath
= FPMATH_SSE
;
1528 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1531 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1534 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1538 target_flags
|= MASK_MMX
;
1539 x86_prefetch_sse
= true;
1542 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1545 target_flags
|= MASK_MMX
;
1546 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1547 extensions it adds. */
1548 if (x86_3dnow_a
& (1 << ix86_arch
))
1549 target_flags
|= MASK_3DNOW_A
;
1551 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1552 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1554 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1556 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1559 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1560 p
= strchr (internal_label_prefix
, 'X');
1561 internal_label_prefix_len
= p
- internal_label_prefix
;
1567 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1569 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1570 make the problem with not enough registers even worse. */
1571 #ifdef INSN_SCHEDULING
1573 flag_schedule_insns
= 0;
1576 /* The default values of these switches depend on the TARGET_64BIT
1577 that is not known at this moment. Mark these values with 2 and
1578 let user the to override these. In case there is no command line option
1579 specifying them, we will set the defaults in override_options. */
1581 flag_omit_frame_pointer
= 2;
1582 flag_pcc_struct_return
= 2;
1583 flag_asynchronous_unwind_tables
= 2;
1586 /* Table of valid machine attributes. */
1587 const struct attribute_spec ix86_attribute_table
[] =
1589 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1590 /* Stdcall attribute says callee is responsible for popping arguments
1591 if they are not variable. */
1592 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1593 /* Fastcall attribute says callee is responsible for popping arguments
1594 if they are not variable. */
1595 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1596 /* Cdecl attribute says the callee is a normal C declaration */
1597 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1598 /* Regparm attribute specifies how many integer arguments are to be
1599 passed in registers. */
1600 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1601 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1602 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
1603 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
1604 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1606 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1607 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1608 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1609 SUBTARGET_ATTRIBUTE_TABLE
,
1611 { NULL
, 0, 0, false, false, false, NULL
}
1614 /* Decide whether we can make a sibling call to a function. DECL is the
1615 declaration of the function being targeted by the call and EXP is the
1616 CALL_EXPR representing the call. */
1619 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1621 /* If we are generating position-independent code, we cannot sibcall
1622 optimize any indirect call, or a direct call to a global function,
1623 as the PLT requires %ebx be live. */
1624 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1627 /* If we are returning floats on the 80387 register stack, we cannot
1628 make a sibcall from a function that doesn't return a float to a
1629 function that does or, conversely, from a function that does return
1630 a float to a function that doesn't; the necessary stack adjustment
1631 would not be executed. */
1632 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1633 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1636 /* If this call is indirect, we'll need to be able to use a call-clobbered
1637 register for the address of the target function. Make sure that all
1638 such registers are not used for passing parameters. */
1639 if (!decl
&& !TARGET_64BIT
)
1643 /* We're looking at the CALL_EXPR, we need the type of the function. */
1644 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1645 type
= TREE_TYPE (type
); /* pointer type */
1646 type
= TREE_TYPE (type
); /* function type */
1648 if (ix86_function_regparm (type
, NULL
) >= 3)
1650 /* ??? Need to count the actual number of registers to be used,
1651 not the possible number of registers. Fix later. */
1656 /* Otherwise okay. That also includes certain types of indirect calls. */
1660 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1661 arguments as in struct attribute_spec.handler. */
1663 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1664 tree args ATTRIBUTE_UNUSED
,
1665 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1667 if (TREE_CODE (*node
) != FUNCTION_TYPE
1668 && TREE_CODE (*node
) != METHOD_TYPE
1669 && TREE_CODE (*node
) != FIELD_DECL
1670 && TREE_CODE (*node
) != TYPE_DECL
)
1672 warning ("`%s' attribute only applies to functions",
1673 IDENTIFIER_POINTER (name
));
1674 *no_add_attrs
= true;
1678 if (is_attribute_p ("fastcall", name
))
1680 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1682 error ("fastcall and stdcall attributes are not compatible");
1684 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1686 error ("fastcall and regparm attributes are not compatible");
1689 else if (is_attribute_p ("stdcall", name
))
1691 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1693 error ("fastcall and stdcall attributes are not compatible");
1700 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1701 *no_add_attrs
= true;
1707 /* Handle a "regparm" attribute;
1708 arguments as in struct attribute_spec.handler. */
1710 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1711 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1713 if (TREE_CODE (*node
) != FUNCTION_TYPE
1714 && TREE_CODE (*node
) != METHOD_TYPE
1715 && TREE_CODE (*node
) != FIELD_DECL
1716 && TREE_CODE (*node
) != TYPE_DECL
)
1718 warning ("`%s' attribute only applies to functions",
1719 IDENTIFIER_POINTER (name
));
1720 *no_add_attrs
= true;
1726 cst
= TREE_VALUE (args
);
1727 if (TREE_CODE (cst
) != INTEGER_CST
)
1729 warning ("`%s' attribute requires an integer constant argument",
1730 IDENTIFIER_POINTER (name
));
1731 *no_add_attrs
= true;
1733 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1735 warning ("argument to `%s' attribute larger than %d",
1736 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1737 *no_add_attrs
= true;
1740 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1742 error ("fastcall and regparm attributes are not compatible");
1749 /* Return 0 if the attributes for two types are incompatible, 1 if they
1750 are compatible, and 2 if they are nearly compatible (which causes a
1751 warning to be generated). */
1754 ix86_comp_type_attributes (tree type1
, tree type2
)
1756 /* Check for mismatch of non-default calling convention. */
1757 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1759 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1762 /* Check for mismatched fastcall types */
1763 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1764 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1767 /* Check for mismatched return types (cdecl vs stdcall). */
1768 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1769 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1771 if (ix86_function_regparm (type1
, NULL
)
1772 != ix86_function_regparm (type2
, NULL
))
1777 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1778 DECL may be NULL when calling function indirectly
1779 or considering a libcall. */
1782 ix86_function_regparm (tree type
, tree decl
)
1785 int regparm
= ix86_regparm
;
1786 bool user_convention
= false;
1790 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1793 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1794 user_convention
= true;
1797 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1800 user_convention
= true;
1803 /* Use register calling convention for local functions when possible. */
1804 if (!TARGET_64BIT
&& !user_convention
&& decl
1805 && flag_unit_at_a_time
&& !profile_flag
)
1807 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1810 /* We can't use regparm(3) for nested functions as these use
1811 static chain pointer in third argument. */
1812 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1822 /* Return true if EAX is live at the start of the function. Used by
1823 ix86_expand_prologue to determine if we need special help before
1824 calling allocate_stack_worker. */
1827 ix86_eax_live_at_start_p (void)
1829 /* Cheat. Don't bother working forward from ix86_function_regparm
1830 to the function type to whether an actual argument is located in
1831 eax. Instead just look at cfg info, which is still close enough
1832 to correct at this point. This gives false positives for broken
1833 functions that might use uninitialized data that happens to be
1834 allocated in eax, but who cares? */
1835 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1838 /* Value is the number of bytes of arguments automatically
1839 popped when returning from a subroutine call.
1840 FUNDECL is the declaration node of the function (as a tree),
1841 FUNTYPE is the data type of the function (as a tree),
1842 or for a library call it is an identifier node for the subroutine name.
1843 SIZE is the number of bytes of arguments passed on the stack.
1845 On the 80386, the RTD insn may be used to pop them if the number
1846 of args is fixed, but if the number is variable then the caller
1847 must pop them all. RTD can't be used for library calls now
1848 because the library is compiled with the Unix compiler.
1849 Use of RTD is a selectable option, since it is incompatible with
1850 standard Unix calling sequences. If the option is not selected,
1851 the caller must always pop the args.
1853 The attribute stdcall is equivalent to RTD on a per module basis. */
1856 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1858 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1860 /* Cdecl functions override -mrtd, and never pop the stack. */
1861 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1863 /* Stdcall and fastcall functions will pop the stack if not
1865 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1866 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1870 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1871 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1872 == void_type_node
)))
1876 /* Lose any fake structure return argument if it is passed on the stack. */
1877 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1880 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1883 return GET_MODE_SIZE (Pmode
);
1889 /* Argument support functions. */
1891 /* Return true when register may be used to pass function parameters. */
1893 ix86_function_arg_regno_p (int regno
)
1897 return (regno
< REGPARM_MAX
1898 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1899 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1901 /* RAX is used as hidden argument to va_arg functions. */
1904 for (i
= 0; i
< REGPARM_MAX
; i
++)
1905 if (regno
== x86_64_int_parameter_registers
[i
])
1910 /* Return if we do not know how to pass TYPE solely in registers. */
1913 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
1915 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
1917 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
1920 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1921 for a call to a function whose data type is FNTYPE.
1922 For a library call, FNTYPE is 0. */
1925 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1926 tree fntype
, /* tree ptr for function decl */
1927 rtx libname
, /* SYMBOL_REF of library name or 0 */
1930 static CUMULATIVE_ARGS zero_cum
;
1931 tree param
, next_param
;
1933 if (TARGET_DEBUG_ARG
)
1935 fprintf (stderr
, "\ninit_cumulative_args (");
1937 fprintf (stderr
, "fntype code = %s, ret code = %s",
1938 tree_code_name
[(int) TREE_CODE (fntype
)],
1939 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1941 fprintf (stderr
, "no fntype");
1944 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1949 /* Set up the number of registers to use for passing arguments. */
1951 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1953 cum
->nregs
= ix86_regparm
;
1955 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1957 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1958 cum
->warn_sse
= true;
1959 cum
->warn_mmx
= true;
1960 cum
->maybe_vaarg
= false;
1962 /* Use ecx and edx registers if function has fastcall attribute */
1963 if (fntype
&& !TARGET_64BIT
)
1965 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1972 /* Determine if this function has variable arguments. This is
1973 indicated by the last argument being 'void_type_mode' if there
1974 are no variable arguments. If there are variable arguments, then
1975 we won't pass anything in registers in 32-bit mode. */
1977 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
1979 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1980 param
!= 0; param
= next_param
)
1982 next_param
= TREE_CHAIN (param
);
1983 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1994 cum
->maybe_vaarg
= true;
1998 if ((!fntype
&& !libname
)
1999 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2000 cum
->maybe_vaarg
= 1;
2002 if (TARGET_DEBUG_ARG
)
2003 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2008 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2009 of this code is to classify each 8bytes of incoming argument by the register
2010 class and assign registers accordingly. */
2012 /* Return the union class of CLASS1 and CLASS2.
2013 See the x86-64 PS ABI for details. */
2015 static enum x86_64_reg_class
2016 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2018 /* Rule #1: If both classes are equal, this is the resulting class. */
2019 if (class1
== class2
)
2022 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2024 if (class1
== X86_64_NO_CLASS
)
2026 if (class2
== X86_64_NO_CLASS
)
2029 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2030 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2031 return X86_64_MEMORY_CLASS
;
2033 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2034 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2035 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2036 return X86_64_INTEGERSI_CLASS
;
2037 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2038 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2039 return X86_64_INTEGER_CLASS
;
2041 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2042 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
2043 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
2044 return X86_64_MEMORY_CLASS
;
2046 /* Rule #6: Otherwise class SSE is used. */
2047 return X86_64_SSE_CLASS
;
2050 /* Classify the argument of type TYPE and mode MODE.
2051 CLASSES will be filled by the register class used to pass each word
2052 of the operand. The number of words is returned. In case the parameter
2053 should be passed in memory, 0 is returned. As a special case for zero
2054 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2056 BIT_OFFSET is used internally for handling records and specifies offset
2057 of the offset in bits modulo 256 to avoid overflow cases.
2059 See the x86-64 PS ABI for details.
2063 classify_argument (enum machine_mode mode
, tree type
,
2064 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2066 HOST_WIDE_INT bytes
=
2067 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2068 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2070 /* Variable sized entities are always passed/returned in memory. */
2074 if (mode
!= VOIDmode
2075 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2078 if (type
&& AGGREGATE_TYPE_P (type
))
2082 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2084 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2088 for (i
= 0; i
< words
; i
++)
2089 classes
[i
] = X86_64_NO_CLASS
;
2091 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2092 signalize memory class, so handle it as special case. */
2095 classes
[0] = X86_64_NO_CLASS
;
2099 /* Classify each field of record and merge classes. */
2100 if (TREE_CODE (type
) == RECORD_TYPE
)
2102 /* For classes first merge in the field of the subclasses. */
2103 if (TYPE_BINFO (type
))
2105 tree binfo
, base_binfo
;
2108 for (binfo
= TYPE_BINFO (type
), i
= 0;
2109 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2112 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2113 tree type
= BINFO_TYPE (base_binfo
);
2115 num
= classify_argument (TYPE_MODE (type
),
2117 (offset
+ bit_offset
) % 256);
2120 for (i
= 0; i
< num
; i
++)
2122 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2124 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2128 /* And now merge the fields of structure. */
2129 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2131 if (TREE_CODE (field
) == FIELD_DECL
)
2135 /* Bitfields are always classified as integer. Handle them
2136 early, since later code would consider them to be
2137 misaligned integers. */
2138 if (DECL_BIT_FIELD (field
))
2140 for (i
= int_bit_position (field
) / 8 / 8;
2141 i
< (int_bit_position (field
)
2142 + tree_low_cst (DECL_SIZE (field
), 0)
2145 merge_classes (X86_64_INTEGER_CLASS
,
2150 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2151 TREE_TYPE (field
), subclasses
,
2152 (int_bit_position (field
)
2153 + bit_offset
) % 256);
2156 for (i
= 0; i
< num
; i
++)
2159 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2161 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2167 /* Arrays are handled as small records. */
2168 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2171 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2172 TREE_TYPE (type
), subclasses
, bit_offset
);
2176 /* The partial classes are now full classes. */
2177 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2178 subclasses
[0] = X86_64_SSE_CLASS
;
2179 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2180 subclasses
[0] = X86_64_INTEGER_CLASS
;
2182 for (i
= 0; i
< words
; i
++)
2183 classes
[i
] = subclasses
[i
% num
];
2185 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2186 else if (TREE_CODE (type
) == UNION_TYPE
2187 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2189 /* For classes first merge in the field of the subclasses. */
2190 if (TYPE_BINFO (type
))
2192 tree binfo
, base_binfo
;
2195 for (binfo
= TYPE_BINFO (type
), i
= 0;
2196 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2199 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2200 tree type
= BINFO_TYPE (base_binfo
);
2202 num
= classify_argument (TYPE_MODE (type
),
2204 (offset
+ (bit_offset
% 64)) % 256);
2207 for (i
= 0; i
< num
; i
++)
2209 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2211 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2215 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2217 if (TREE_CODE (field
) == FIELD_DECL
)
2220 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2221 TREE_TYPE (field
), subclasses
,
2225 for (i
= 0; i
< num
; i
++)
2226 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2230 else if (TREE_CODE (type
) == SET_TYPE
)
2234 classes
[0] = X86_64_INTEGERSI_CLASS
;
2237 else if (bytes
<= 8)
2239 classes
[0] = X86_64_INTEGER_CLASS
;
2242 else if (bytes
<= 12)
2244 classes
[0] = X86_64_INTEGER_CLASS
;
2245 classes
[1] = X86_64_INTEGERSI_CLASS
;
2250 classes
[0] = X86_64_INTEGER_CLASS
;
2251 classes
[1] = X86_64_INTEGER_CLASS
;
2258 /* Final merger cleanup. */
2259 for (i
= 0; i
< words
; i
++)
2261 /* If one class is MEMORY, everything should be passed in
2263 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2266 /* The X86_64_SSEUP_CLASS should be always preceded by
2267 X86_64_SSE_CLASS. */
2268 if (classes
[i
] == X86_64_SSEUP_CLASS
2269 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2270 classes
[i
] = X86_64_SSE_CLASS
;
2272 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2273 if (classes
[i
] == X86_64_X87UP_CLASS
2274 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2275 classes
[i
] = X86_64_SSE_CLASS
;
2280 /* Compute alignment needed. We align all types to natural boundaries with
2281 exception of XFmode that is aligned to 64bits. */
2282 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2284 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2287 mode_alignment
= 128;
2288 else if (mode
== XCmode
)
2289 mode_alignment
= 256;
2290 if (COMPLEX_MODE_P (mode
))
2291 mode_alignment
/= 2;
2292 /* Misaligned fields are always returned in memory. */
2293 if (bit_offset
% mode_alignment
)
2297 /* for V1xx modes, just use the base mode */
2298 if (VECTOR_MODE_P (mode
)
2299 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2300 mode
= GET_MODE_INNER (mode
);
2302 /* Classification of atomic types. */
2312 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2313 classes
[0] = X86_64_INTEGERSI_CLASS
;
2315 classes
[0] = X86_64_INTEGER_CLASS
;
2319 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2324 if (!(bit_offset
% 64))
2325 classes
[0] = X86_64_SSESF_CLASS
;
2327 classes
[0] = X86_64_SSE_CLASS
;
2330 classes
[0] = X86_64_SSEDF_CLASS
;
2333 classes
[0] = X86_64_X87_CLASS
;
2334 classes
[1] = X86_64_X87UP_CLASS
;
2337 classes
[0] = X86_64_SSE_CLASS
;
2338 classes
[1] = X86_64_SSEUP_CLASS
;
2341 classes
[0] = X86_64_SSE_CLASS
;
2344 classes
[0] = X86_64_SSEDF_CLASS
;
2345 classes
[1] = X86_64_SSEDF_CLASS
;
2349 /* These modes are larger than 16 bytes. */
2357 classes
[0] = X86_64_SSE_CLASS
;
2358 classes
[1] = X86_64_SSEUP_CLASS
;
2364 classes
[0] = X86_64_SSE_CLASS
;
2370 if (VECTOR_MODE_P (mode
))
2374 if (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
)
2376 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2377 classes
[0] = X86_64_INTEGERSI_CLASS
;
2379 classes
[0] = X86_64_INTEGER_CLASS
;
2380 classes
[1] = X86_64_INTEGER_CLASS
;
2381 return 1 + (bytes
> 8);
2388 /* Examine the argument and return set number of register required in each
2389 class. Return 0 iff parameter should be passed in memory. */
2391 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2392 int *int_nregs
, int *sse_nregs
)
2394 enum x86_64_reg_class
class[MAX_CLASSES
];
2395 int n
= classify_argument (mode
, type
, class, 0);
2401 for (n
--; n
>= 0; n
--)
2404 case X86_64_INTEGER_CLASS
:
2405 case X86_64_INTEGERSI_CLASS
:
2408 case X86_64_SSE_CLASS
:
2409 case X86_64_SSESF_CLASS
:
2410 case X86_64_SSEDF_CLASS
:
2413 case X86_64_NO_CLASS
:
2414 case X86_64_SSEUP_CLASS
:
2416 case X86_64_X87_CLASS
:
2417 case X86_64_X87UP_CLASS
:
2421 case X86_64_MEMORY_CLASS
:
2426 /* Construct container for the argument used by GCC interface. See
2427 FUNCTION_ARG for the detailed description. */
2429 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2430 int nintregs
, int nsseregs
, const int * intreg
,
2433 enum machine_mode tmpmode
;
2435 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2436 enum x86_64_reg_class
class[MAX_CLASSES
];
2440 int needed_sseregs
, needed_intregs
;
2441 rtx exp
[MAX_CLASSES
];
2444 n
= classify_argument (mode
, type
, class, 0);
2445 if (TARGET_DEBUG_ARG
)
2448 fprintf (stderr
, "Memory class\n");
2451 fprintf (stderr
, "Classes:");
2452 for (i
= 0; i
< n
; i
++)
2454 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2456 fprintf (stderr
, "\n");
2461 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2463 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2466 /* First construct simple cases. Avoid SCmode, since we want to use
2467 single register to pass this type. */
2468 if (n
== 1 && mode
!= SCmode
)
2471 case X86_64_INTEGER_CLASS
:
2472 case X86_64_INTEGERSI_CLASS
:
2473 return gen_rtx_REG (mode
, intreg
[0]);
2474 case X86_64_SSE_CLASS
:
2475 case X86_64_SSESF_CLASS
:
2476 case X86_64_SSEDF_CLASS
:
2477 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2478 case X86_64_X87_CLASS
:
2479 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2480 case X86_64_NO_CLASS
:
2481 /* Zero sized array, struct or class. */
2486 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2488 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2490 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2491 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2492 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2493 && class[1] == X86_64_INTEGER_CLASS
2494 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2495 && intreg
[0] + 1 == intreg
[1])
2496 return gen_rtx_REG (mode
, intreg
[0]);
2498 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2499 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
2501 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2503 /* Otherwise figure out the entries of the PARALLEL. */
2504 for (i
= 0; i
< n
; i
++)
2508 case X86_64_NO_CLASS
:
2510 case X86_64_INTEGER_CLASS
:
2511 case X86_64_INTEGERSI_CLASS
:
2512 /* Merge TImodes on aligned occasions here too. */
2513 if (i
* 8 + 8 > bytes
)
2514 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2515 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2519 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2520 if (tmpmode
== BLKmode
)
2522 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2523 gen_rtx_REG (tmpmode
, *intreg
),
2527 case X86_64_SSESF_CLASS
:
2528 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2529 gen_rtx_REG (SFmode
,
2530 SSE_REGNO (sse_regno
)),
2534 case X86_64_SSEDF_CLASS
:
2535 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2536 gen_rtx_REG (DFmode
,
2537 SSE_REGNO (sse_regno
)),
2541 case X86_64_SSE_CLASS
:
2542 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2546 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2547 gen_rtx_REG (tmpmode
,
2548 SSE_REGNO (sse_regno
)),
2550 if (tmpmode
== TImode
)
2558 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2559 for (i
= 0; i
< nexps
; i
++)
2560 XVECEXP (ret
, 0, i
) = exp
[i
];
2564 /* Update the data in CUM to advance over an argument
2565 of mode MODE and data type TYPE.
2566 (TYPE is null for libcalls where that information may not be available.) */
2569 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2570 enum machine_mode mode
, /* current arg mode */
2571 tree type
, /* type of the argument or 0 if lib support */
2572 int named
) /* whether or not the argument was named */
2575 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2576 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2578 if (TARGET_DEBUG_ARG
)
2580 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2581 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2584 int int_nregs
, sse_nregs
;
2585 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2586 cum
->words
+= words
;
2587 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2589 cum
->nregs
-= int_nregs
;
2590 cum
->sse_nregs
-= sse_nregs
;
2591 cum
->regno
+= int_nregs
;
2592 cum
->sse_regno
+= sse_nregs
;
2595 cum
->words
+= words
;
2599 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2600 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2602 cum
->sse_words
+= words
;
2603 cum
->sse_nregs
-= 1;
2604 cum
->sse_regno
+= 1;
2605 if (cum
->sse_nregs
<= 0)
2611 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2612 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2614 cum
->mmx_words
+= words
;
2615 cum
->mmx_nregs
-= 1;
2616 cum
->mmx_regno
+= 1;
2617 if (cum
->mmx_nregs
<= 0)
2625 cum
->words
+= words
;
2626 cum
->nregs
-= words
;
2627 cum
->regno
+= words
;
2629 if (cum
->nregs
<= 0)
2639 /* Define where to put the arguments to a function.
2640 Value is zero to push the argument on the stack,
2641 or a hard register in which to store the argument.
2643 MODE is the argument's machine mode.
2644 TYPE is the data type of the argument (as a tree).
2645 This is null for libcalls where that information may
2647 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2648 the preceding args and about the function being called.
2649 NAMED is nonzero if this argument is a named parameter
2650 (otherwise it is an extra parameter matching an ellipsis). */
2653 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2654 enum machine_mode mode
, /* current arg mode */
2655 tree type
, /* type of the argument or 0 if lib support */
2656 int named
) /* != 0 for normal args, == 0 for ... args */
2660 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2661 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2662 static bool warnedsse
, warnedmmx
;
2664 /* To simplify the code below, represent vector types with a vector mode
2665 even if MMX/SSE are not active. */
2667 && TREE_CODE (type
) == VECTOR_TYPE
2668 && (bytes
== 8 || bytes
== 16)
2669 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_INT
2670 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_FLOAT
)
2672 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2673 enum machine_mode newmode
2674 = TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
2675 ? MIN_MODE_VECTOR_FLOAT
: MIN_MODE_VECTOR_INT
;
2677 /* Get the mode which has this inner mode and number of units. */
2678 for (; newmode
!= VOIDmode
; newmode
= GET_MODE_WIDER_MODE (newmode
))
2679 if (GET_MODE_NUNITS (newmode
) == TYPE_VECTOR_SUBPARTS (type
)
2680 && GET_MODE_INNER (newmode
) == innermode
)
2687 /* Handle a hidden AL argument containing number of registers for varargs
2688 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2690 if (mode
== VOIDmode
)
2693 return GEN_INT (cum
->maybe_vaarg
2694 ? (cum
->sse_nregs
< 0
2702 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2703 &x86_64_int_parameter_registers
[cum
->regno
],
2708 /* For now, pass fp/complex values on the stack. */
2720 if (words
<= cum
->nregs
)
2722 int regno
= cum
->regno
;
2724 /* Fastcall allocates the first two DWORD (SImode) or
2725 smaller arguments to ECX and EDX. */
2728 if (mode
== BLKmode
|| mode
== DImode
)
2731 /* ECX not EAX is the first allocated register. */
2735 ret
= gen_rtx_REG (mode
, regno
);
2745 if (!type
|| !AGGREGATE_TYPE_P (type
))
2747 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
2750 warning ("SSE vector argument without SSE enabled "
2754 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2761 if (!type
|| !AGGREGATE_TYPE_P (type
))
2763 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2766 warning ("MMX vector argument without MMX enabled "
2770 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2775 if (TARGET_DEBUG_ARG
)
2778 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2779 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2782 print_simple_rtl (stderr
, ret
);
2784 fprintf (stderr
, ", stack");
2786 fprintf (stderr
, " )\n");
2792 /* A C expression that indicates when an argument must be passed by
2793 reference. If nonzero for an argument, a copy of that argument is
2794 made in memory and a pointer to the argument is passed instead of
2795 the argument itself. The pointer is passed in whatever way is
2796 appropriate for passing a pointer to that type. */
2799 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2800 enum machine_mode mode ATTRIBUTE_UNUSED
,
2801 tree type
, bool named ATTRIBUTE_UNUSED
)
2806 if (type
&& int_size_in_bytes (type
) == -1)
2808 if (TARGET_DEBUG_ARG
)
2809 fprintf (stderr
, "function_arg_pass_by_reference\n");
2816 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2817 ABI. Only called if TARGET_SSE. */
2819 contains_128bit_aligned_vector_p (tree type
)
2821 enum machine_mode mode
= TYPE_MODE (type
);
2822 if (SSE_REG_MODE_P (mode
)
2823 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2825 if (TYPE_ALIGN (type
) < 128)
2828 if (AGGREGATE_TYPE_P (type
))
2830 /* Walk the aggregates recursively. */
2831 if (TREE_CODE (type
) == RECORD_TYPE
2832 || TREE_CODE (type
) == UNION_TYPE
2833 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2837 if (TYPE_BINFO (type
))
2839 tree binfo
, base_binfo
;
2842 for (binfo
= TYPE_BINFO (type
), i
= 0;
2843 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2844 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo
)))
2847 /* And now merge the fields of structure. */
2848 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2850 if (TREE_CODE (field
) == FIELD_DECL
2851 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2855 /* Just for use if some languages passes arrays by value. */
2856 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2858 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2867 /* Gives the alignment boundary, in bits, of an argument with the
2868 specified mode and type. */
2871 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2875 align
= TYPE_ALIGN (type
);
2877 align
= GET_MODE_ALIGNMENT (mode
);
2878 if (align
< PARM_BOUNDARY
)
2879 align
= PARM_BOUNDARY
;
2882 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2883 make an exception for SSE modes since these require 128bit
2886 The handling here differs from field_alignment. ICC aligns MMX
2887 arguments to 4 byte boundaries, while structure fields are aligned
2888 to 8 byte boundaries. */
2890 align
= PARM_BOUNDARY
;
2893 if (!SSE_REG_MODE_P (mode
))
2894 align
= PARM_BOUNDARY
;
2898 if (!contains_128bit_aligned_vector_p (type
))
2899 align
= PARM_BOUNDARY
;
2907 /* Return true if N is a possible register number of function value. */
2909 ix86_function_value_regno_p (int regno
)
2913 return ((regno
) == 0
2914 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2915 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2917 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2918 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2919 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2922 /* Define how to find the value returned by a function.
2923 VALTYPE is the data type of the value (as a tree).
2924 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2925 otherwise, FUNC is 0. */
2927 ix86_function_value (tree valtype
)
2931 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2932 REGPARM_MAX
, SSE_REGPARM_MAX
,
2933 x86_64_int_return_registers
, 0);
2934 /* For zero sized structures, construct_container return NULL, but we need
2935 to keep rest of compiler happy by returning meaningful value. */
2937 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2941 return gen_rtx_REG (TYPE_MODE (valtype
),
2942 ix86_value_regno (TYPE_MODE (valtype
)));
2945 /* Return false iff type is returned in memory. */
2947 ix86_return_in_memory (tree type
)
2949 int needed_intregs
, needed_sseregs
, size
;
2950 enum machine_mode mode
= TYPE_MODE (type
);
2953 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2955 if (mode
== BLKmode
)
2958 size
= int_size_in_bytes (type
);
2960 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2963 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2965 /* User-created vectors small enough to fit in EAX. */
2969 /* MMX/3dNow values are returned on the stack, since we've
2970 got to EMMS/FEMMS before returning. */
2974 /* SSE values are returned in XMM0, except when it doesn't exist. */
2976 return (TARGET_SSE
? 0 : 1);
2987 /* When returning SSE vector types, we have a choice of either
2988 (1) being abi incompatible with a -march switch, or
2989 (2) generating an error.
2990 Given no good solution, I think the safest thing is one warning.
2991 The user won't be able to use -Werror, but....
2993 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2994 called in response to actually generating a caller or callee that
2995 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2996 via aggregate_value_p for general type probing from tree-ssa. */
2999 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3003 if (!TARGET_SSE
&& type
&& !warned
)
3005 /* Look at the return type of the function, not the function type. */
3006 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3009 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3012 warning ("SSE vector return without SSE enabled changes the ABI");
3019 /* Define how to find the value returned by a library function
3020 assuming the value has mode MODE. */
3022 ix86_libcall_value (enum machine_mode mode
)
3033 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3035 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3040 return gen_rtx_REG (mode
, 0);
3044 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
3047 /* Given a mode, return the register to use for a return value. */
3050 ix86_value_regno (enum machine_mode mode
)
3052 /* Floating point return values in %st(0). */
3053 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
3054 return FIRST_FLOAT_REG
;
3055 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3056 we prevent this case when sse is not available. */
3057 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3058 return FIRST_SSE_REG
;
3059 /* Everything else in %eax. */
3063 /* Create the va_list data type. */
3066 ix86_build_builtin_va_list (void)
3068 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3070 /* For i386 we use plain pointer to argument area. */
3072 return build_pointer_type (char_type_node
);
3074 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3075 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3077 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3078 unsigned_type_node
);
3079 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3080 unsigned_type_node
);
3081 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3083 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3086 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3087 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3088 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3089 DECL_FIELD_CONTEXT (f_sav
) = record
;
3091 TREE_CHAIN (record
) = type_decl
;
3092 TYPE_NAME (record
) = type_decl
;
3093 TYPE_FIELDS (record
) = f_gpr
;
3094 TREE_CHAIN (f_gpr
) = f_fpr
;
3095 TREE_CHAIN (f_fpr
) = f_ovf
;
3096 TREE_CHAIN (f_ovf
) = f_sav
;
3098 layout_type (record
);
3100 /* The correct type is an array type of one element. */
3101 return build_array_type (record
, build_index_type (size_zero_node
));
3104 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3107 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3108 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3111 CUMULATIVE_ARGS next_cum
;
3112 rtx save_area
= NULL_RTX
, mem
;
3125 /* Indicate to allocate space on the stack for varargs save area. */
3126 ix86_save_varrargs_registers
= 1;
3128 cfun
->stack_alignment_needed
= 128;
3130 fntype
= TREE_TYPE (current_function_decl
);
3131 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3132 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3133 != void_type_node
));
3135 /* For varargs, we do not want to skip the dummy va_dcl argument.
3136 For stdargs, we do want to skip the last named argument. */
3139 function_arg_advance (&next_cum
, mode
, type
, 1);
3142 save_area
= frame_pointer_rtx
;
3144 set
= get_varargs_alias_set ();
3146 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3148 mem
= gen_rtx_MEM (Pmode
,
3149 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3150 set_mem_alias_set (mem
, set
);
3151 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3152 x86_64_int_parameter_registers
[i
]));
3155 if (next_cum
.sse_nregs
)
3157 /* Now emit code to save SSE registers. The AX parameter contains number
3158 of SSE parameter registers used to call this function. We use
3159 sse_prologue_save insn template that produces computed jump across
3160 SSE saves. We need some preparation work to get this working. */
3162 label
= gen_label_rtx ();
3163 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3165 /* Compute address to jump to :
3166 label - 5*eax + nnamed_sse_arguments*5 */
3167 tmp_reg
= gen_reg_rtx (Pmode
);
3168 nsse_reg
= gen_reg_rtx (Pmode
);
3169 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3170 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3171 gen_rtx_MULT (Pmode
, nsse_reg
,
3173 if (next_cum
.sse_regno
)
3176 gen_rtx_CONST (DImode
,
3177 gen_rtx_PLUS (DImode
,
3179 GEN_INT (next_cum
.sse_regno
* 4))));
3181 emit_move_insn (nsse_reg
, label_ref
);
3182 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3184 /* Compute address of memory block we save into. We always use pointer
3185 pointing 127 bytes after first byte to store - this is needed to keep
3186 instruction size limited by 4 bytes. */
3187 tmp_reg
= gen_reg_rtx (Pmode
);
3188 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3189 plus_constant (save_area
,
3190 8 * REGPARM_MAX
+ 127)));
3191 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3192 set_mem_alias_set (mem
, set
);
3193 set_mem_align (mem
, BITS_PER_WORD
);
3195 /* And finally do the dirty job! */
3196 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3197 GEN_INT (next_cum
.sse_regno
), label
));
3202 /* Implement va_start. */
3205 ix86_va_start (tree valist
, rtx nextarg
)
3207 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3208 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3209 tree gpr
, fpr
, ovf
, sav
, t
;
3211 /* Only 64bit target needs something special. */
3214 std_expand_builtin_va_start (valist
, nextarg
);
3218 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3219 f_fpr
= TREE_CHAIN (f_gpr
);
3220 f_ovf
= TREE_CHAIN (f_fpr
);
3221 f_sav
= TREE_CHAIN (f_ovf
);
3223 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3224 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3225 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3226 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3227 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3229 /* Count number of gp and fp argument registers used. */
3230 words
= current_function_args_info
.words
;
3231 n_gpr
= current_function_args_info
.regno
;
3232 n_fpr
= current_function_args_info
.sse_regno
;
3234 if (TARGET_DEBUG_ARG
)
3235 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3236 (int) words
, (int) n_gpr
, (int) n_fpr
);
3238 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3239 build_int_cst (NULL_TREE
, n_gpr
* 8, 0));
3240 TREE_SIDE_EFFECTS (t
) = 1;
3241 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3243 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3244 build_int_cst (NULL_TREE
, n_fpr
* 16 + 8*REGPARM_MAX
, 0));
3245 TREE_SIDE_EFFECTS (t
) = 1;
3246 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3248 /* Find the overflow area. */
3249 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3251 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3252 build_int_cst (NULL_TREE
, words
* UNITS_PER_WORD
, 0));
3253 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3254 TREE_SIDE_EFFECTS (t
) = 1;
3255 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3257 /* Find the register save area.
3258 Prologue of the function save it right above stack frame. */
3259 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3260 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3261 TREE_SIDE_EFFECTS (t
) = 1;
3262 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3265 /* Implement va_arg. */
3268 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3270 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3271 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3272 tree gpr
, fpr
, ovf
, sav
, t
;
3274 tree lab_false
, lab_over
= NULL_TREE
;
3280 /* Only 64bit target needs something special. */
3282 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3284 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3285 f_fpr
= TREE_CHAIN (f_gpr
);
3286 f_ovf
= TREE_CHAIN (f_fpr
);
3287 f_sav
= TREE_CHAIN (f_ovf
);
3289 valist
= build_fold_indirect_ref (valist
);
3290 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3291 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3292 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3293 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3295 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
3297 type
= build_pointer_type (type
);
3298 size
= int_size_in_bytes (type
);
3299 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3301 container
= construct_container (TYPE_MODE (type
), type
, 0,
3302 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3304 * Pull the value out of the saved registers ...
3307 addr
= create_tmp_var (ptr_type_node
, "addr");
3308 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3312 int needed_intregs
, needed_sseregs
;
3314 tree int_addr
, sse_addr
;
3316 lab_false
= create_artificial_label ();
3317 lab_over
= create_artificial_label ();
3319 examine_argument (TYPE_MODE (type
), type
, 0,
3320 &needed_intregs
, &needed_sseregs
);
3322 need_temp
= (!REG_P (container
)
3323 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3324 || TYPE_ALIGN (type
) > 128));
3326 /* In case we are passing structure, verify that it is consecutive block
3327 on the register save area. If not we need to do moves. */
3328 if (!need_temp
&& !REG_P (container
))
3330 /* Verify that all registers are strictly consecutive */
3331 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3335 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3337 rtx slot
= XVECEXP (container
, 0, i
);
3338 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3339 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3347 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3349 rtx slot
= XVECEXP (container
, 0, i
);
3350 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3351 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3363 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3364 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3365 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3366 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3368 /* First ensure that we fit completely in registers. */
3371 t
= build_int_cst (TREE_TYPE (gpr
),
3372 (REGPARM_MAX
- needed_intregs
+ 1) * 8, 0);
3373 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3374 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3375 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3376 gimplify_and_add (t
, pre_p
);
3380 t
= build_int_cst (TREE_TYPE (fpr
),
3381 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3382 + REGPARM_MAX
* 8, 0);
3383 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3384 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3385 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3386 gimplify_and_add (t
, pre_p
);
3389 /* Compute index to start of area used for integer regs. */
3392 /* int_addr = gpr + sav; */
3393 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3394 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3395 gimplify_and_add (t
, pre_p
);
3399 /* sse_addr = fpr + sav; */
3400 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3401 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3402 gimplify_and_add (t
, pre_p
);
3407 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3410 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3411 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3412 gimplify_and_add (t
, pre_p
);
3414 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3416 rtx slot
= XVECEXP (container
, 0, i
);
3417 rtx reg
= XEXP (slot
, 0);
3418 enum machine_mode mode
= GET_MODE (reg
);
3419 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3420 tree addr_type
= build_pointer_type (piece_type
);
3423 tree dest_addr
, dest
;
3425 if (SSE_REGNO_P (REGNO (reg
)))
3427 src_addr
= sse_addr
;
3428 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3432 src_addr
= int_addr
;
3433 src_offset
= REGNO (reg
) * 8;
3435 src_addr
= fold_convert (addr_type
, src_addr
);
3436 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3437 size_int (src_offset
)));
3438 src
= build_fold_indirect_ref (src_addr
);
3440 dest_addr
= fold_convert (addr_type
, addr
);
3441 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3442 size_int (INTVAL (XEXP (slot
, 1)))));
3443 dest
= build_fold_indirect_ref (dest_addr
);
3445 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3446 gimplify_and_add (t
, pre_p
);
3452 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3453 build_int_cst (NULL_TREE
, needed_intregs
* 8, 0));
3454 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3455 gimplify_and_add (t
, pre_p
);
3459 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3460 build_int_cst (NULL_TREE
, needed_sseregs
* 16, 0));
3461 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3462 gimplify_and_add (t
, pre_p
);
3465 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3466 gimplify_and_add (t
, pre_p
);
3468 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3469 append_to_statement_list (t
, pre_p
);
3472 /* ... otherwise out of the overflow area. */
3474 /* Care for on-stack alignment if needed. */
3475 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3479 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3480 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
3481 build_int_cst (NULL_TREE
, align
- 1, 0));
3482 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3483 build_int_cst (NULL_TREE
, -align
, -1));
3485 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3487 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3488 gimplify_and_add (t2
, pre_p
);
3490 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3491 build_int_cst (NULL_TREE
, rsize
* UNITS_PER_WORD
, 0));
3492 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3493 gimplify_and_add (t
, pre_p
);
3497 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3498 append_to_statement_list (t
, pre_p
);
3501 ptrtype
= build_pointer_type (type
);
3502 addr
= fold_convert (ptrtype
, addr
);
3505 addr
= build_fold_indirect_ref (addr
);
3506 return build_fold_indirect_ref (addr
);
3509 /* Return nonzero if OPNUM's MEM should be matched
3510 in movabs* patterns. */
3513 ix86_check_movabs (rtx insn
, int opnum
)
3517 set
= PATTERN (insn
);
3518 if (GET_CODE (set
) == PARALLEL
)
3519 set
= XVECEXP (set
, 0, 0);
3520 if (GET_CODE (set
) != SET
)
3522 mem
= XEXP (set
, opnum
);
3523 while (GET_CODE (mem
) == SUBREG
)
3524 mem
= SUBREG_REG (mem
);
3525 if (GET_CODE (mem
) != MEM
)
3527 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3530 /* Initialize the table of extra 80387 mathematical constants. */
3533 init_ext_80387_constants (void)
3535 static const char * cst
[5] =
3537 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3538 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3539 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3540 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3541 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3545 for (i
= 0; i
< 5; i
++)
3547 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
3548 /* Ensure each constant is rounded to XFmode precision. */
3549 real_convert (&ext_80387_constants_table
[i
],
3550 XFmode
, &ext_80387_constants_table
[i
]);
3553 ext_80387_constants_init
= 1;
3556 /* Return true if the constant is something that can be loaded with
3557 a special instruction. */
3560 standard_80387_constant_p (rtx x
)
3562 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3565 if (x
== CONST0_RTX (GET_MODE (x
)))
3567 if (x
== CONST1_RTX (GET_MODE (x
)))
3570 /* For XFmode constants, try to find a special 80387 instruction when
3571 optimizing for size or on those CPUs that benefit from them. */
3572 if (GET_MODE (x
) == XFmode
3573 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
3578 if (! ext_80387_constants_init
)
3579 init_ext_80387_constants ();
3581 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3582 for (i
= 0; i
< 5; i
++)
3583 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
3590 /* Return the opcode of the special instruction to be used to load
3594 standard_80387_constant_opcode (rtx x
)
3596 switch (standard_80387_constant_p (x
))
3616 /* Return the CONST_DOUBLE representing the 80387 constant that is
3617 loaded by the specified special instruction. The argument IDX
3618 matches the return value from standard_80387_constant_p. */
3621 standard_80387_constant_rtx (int idx
)
3625 if (! ext_80387_constants_init
)
3626 init_ext_80387_constants ();
3642 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
3646 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3649 standard_sse_constant_p (rtx x
)
3651 if (x
== const0_rtx
)
3653 return (x
== CONST0_RTX (GET_MODE (x
)));
3656 /* Returns 1 if OP contains a symbol reference */
3659 symbolic_reference_mentioned_p (rtx op
)
3664 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3667 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3668 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3674 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3675 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3679 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3686 /* Return 1 if it is appropriate to emit `ret' instructions in the
3687 body of a function. Do this only if the epilogue is simple, needing a
3688 couple of insns. Prior to reloading, we can't tell how many registers
3689 must be saved, so return 0 then. Return 0 if there is no frame
3690 marker to de-allocate.
3692 If NON_SAVING_SETJMP is defined and true, then it is not possible
3693 for the epilogue to be simple, so return 0. This is a special case
3694 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3695 until final, but jump_optimize may need to know sooner if a
3699 ix86_can_use_return_insn_p (void)
3701 struct ix86_frame frame
;
3703 #ifdef NON_SAVING_SETJMP
3704 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3708 if (! reload_completed
|| frame_pointer_needed
)
3711 /* Don't allow more than 32 pop, since that's all we can do
3712 with one instruction. */
3713 if (current_function_pops_args
3714 && current_function_args_size
>= 32768)
3717 ix86_compute_frame_layout (&frame
);
3718 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3721 /* Value should be nonzero if functions must have frame pointers.
3722 Zero means the frame pointer need not be set up (and parms may
3723 be accessed via the stack pointer) in functions that seem suitable. */
3726 ix86_frame_pointer_required (void)
3728 /* If we accessed previous frames, then the generated code expects
3729 to be able to access the saved ebp value in our frame. */
3730 if (cfun
->machine
->accesses_prev_frame
)
3733 /* Several x86 os'es need a frame pointer for other reasons,
3734 usually pertaining to setjmp. */
3735 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3738 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3739 the frame pointer by default. Turn it back on now if we've not
3740 got a leaf function. */
3741 if (TARGET_OMIT_LEAF_FRAME_POINTER
3742 && (!current_function_is_leaf
))
3745 if (current_function_profile
)
3751 /* Record that the current function accesses previous call frames. */
3754 ix86_setup_frame_addresses (void)
3756 cfun
->machine
->accesses_prev_frame
= 1;
3759 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3760 # define USE_HIDDEN_LINKONCE 1
3762 # define USE_HIDDEN_LINKONCE 0
3765 static int pic_labels_used
;
3767 /* Fills in the label name that should be used for a pc thunk for
3768 the given register. */
3771 get_pc_thunk_name (char name
[32], unsigned int regno
)
3773 if (USE_HIDDEN_LINKONCE
)
3774 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3776 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3780 /* This function generates code for -fpic that loads %ebx with
3781 the return address of the caller and then returns. */
3784 ix86_file_end (void)
3789 for (regno
= 0; regno
< 8; ++regno
)
3793 if (! ((pic_labels_used
>> regno
) & 1))
3796 get_pc_thunk_name (name
, regno
);
3798 if (USE_HIDDEN_LINKONCE
)
3802 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3804 TREE_PUBLIC (decl
) = 1;
3805 TREE_STATIC (decl
) = 1;
3806 DECL_ONE_ONLY (decl
) = 1;
3808 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3809 named_section (decl
, NULL
, 0);
3811 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
3812 fputs ("\t.hidden\t", asm_out_file
);
3813 assemble_name (asm_out_file
, name
);
3814 fputc ('\n', asm_out_file
);
3815 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
3820 ASM_OUTPUT_LABEL (asm_out_file
, name
);
3823 xops
[0] = gen_rtx_REG (SImode
, regno
);
3824 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3825 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3826 output_asm_insn ("ret", xops
);
3829 if (NEED_INDICATE_EXEC_STACK
)
3830 file_end_indicate_exec_stack ();
3833 /* Emit code for the SET_GOT patterns. */
3836 output_set_got (rtx dest
)
3841 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
3843 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
3845 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
3848 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3850 output_asm_insn ("call\t%a2", xops
);
3853 /* Output the "canonical" label name ("Lxx$pb") here too. This
3854 is what will be referred to by the Mach-O PIC subsystem. */
3855 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
3857 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
3858 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
3861 output_asm_insn ("pop{l}\t%0", xops
);
3866 get_pc_thunk_name (name
, REGNO (dest
));
3867 pic_labels_used
|= 1 << REGNO (dest
);
3869 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
3870 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
3871 output_asm_insn ("call\t%X2", xops
);
3874 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
3875 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
3876 else if (!TARGET_MACHO
)
3877 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
3882 /* Generate an "push" pattern for input ARG. */
3887 return gen_rtx_SET (VOIDmode
,
3889 gen_rtx_PRE_DEC (Pmode
,
3890 stack_pointer_rtx
)),
3894 /* Return >= 0 if there is an unused call-clobbered register available
3895 for the entire function. */
3898 ix86_select_alt_pic_regnum (void)
3900 if (current_function_is_leaf
&& !current_function_profile
)
3903 for (i
= 2; i
>= 0; --i
)
3904 if (!regs_ever_live
[i
])
3908 return INVALID_REGNUM
;
3911 /* Return 1 if we need to save REGNO. */
3913 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
3915 if (pic_offset_table_rtx
3916 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
3917 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
3918 || current_function_profile
3919 || current_function_calls_eh_return
3920 || current_function_uses_const_pool
))
3922 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
3927 if (current_function_calls_eh_return
&& maybe_eh_return
)
3932 unsigned test
= EH_RETURN_DATA_REGNO (i
);
3933 if (test
== INVALID_REGNUM
)
3940 return (regs_ever_live
[regno
]
3941 && !call_used_regs
[regno
]
3942 && !fixed_regs
[regno
]
3943 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
3946 /* Return number of registers to be saved on the stack. */
3949 ix86_nsaved_regs (void)
3954 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3955 if (ix86_save_reg (regno
, true))
3960 /* Return the offset between two registers, one to be eliminated, and the other
3961 its replacement, at the start of a routine. */
3964 ix86_initial_elimination_offset (int from
, int to
)
3966 struct ix86_frame frame
;
3967 ix86_compute_frame_layout (&frame
);
3969 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3970 return frame
.hard_frame_pointer_offset
;
3971 else if (from
== FRAME_POINTER_REGNUM
3972 && to
== HARD_FRAME_POINTER_REGNUM
)
3973 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
3976 if (to
!= STACK_POINTER_REGNUM
)
3978 else if (from
== ARG_POINTER_REGNUM
)
3979 return frame
.stack_pointer_offset
;
3980 else if (from
!= FRAME_POINTER_REGNUM
)
3983 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
3987 /* Fill structure ix86_frame about frame of currently computed function. */
3990 ix86_compute_frame_layout (struct ix86_frame
*frame
)
3992 HOST_WIDE_INT total_size
;
3993 unsigned int stack_alignment_needed
;
3994 HOST_WIDE_INT offset
;
3995 unsigned int preferred_alignment
;
3996 HOST_WIDE_INT size
= get_frame_size ();
3998 frame
->nregs
= ix86_nsaved_regs ();
4001 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4002 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4004 /* During reload iteration the amount of registers saved can change.
4005 Recompute the value as needed. Do not recompute when amount of registers
4006 didn't change as reload does mutiple calls to the function and does not
4007 expect the decision to change within single iteration. */
4009 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4011 int count
= frame
->nregs
;
4013 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4014 /* The fast prologue uses move instead of push to save registers. This
4015 is significantly longer, but also executes faster as modern hardware
4016 can execute the moves in parallel, but can't do that for push/pop.
4018 Be careful about choosing what prologue to emit: When function takes
4019 many instructions to execute we may use slow version as well as in
4020 case function is known to be outside hot spot (this is known with
4021 feedback only). Weight the size of function by number of registers
4022 to save as it is cheap to use one or two push instructions but very
4023 slow to use many of them. */
4025 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4026 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4027 || (flag_branch_probabilities
4028 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4029 cfun
->machine
->use_fast_prologue_epilogue
= false;
4031 cfun
->machine
->use_fast_prologue_epilogue
4032 = !expensive_function_p (count
);
4034 if (TARGET_PROLOGUE_USING_MOVE
4035 && cfun
->machine
->use_fast_prologue_epilogue
)
4036 frame
->save_regs_using_mov
= true;
4038 frame
->save_regs_using_mov
= false;
4041 /* Skip return address and saved base pointer. */
4042 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4044 frame
->hard_frame_pointer_offset
= offset
;
4046 /* Do some sanity checking of stack_alignment_needed and
4047 preferred_alignment, since i386 port is the only using those features
4048 that may break easily. */
4050 if (size
&& !stack_alignment_needed
)
4052 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4054 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4056 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4059 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4060 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4062 /* Register save area */
4063 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4066 if (ix86_save_varrargs_registers
)
4068 offset
+= X86_64_VARARGS_SIZE
;
4069 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4072 frame
->va_arg_size
= 0;
4074 /* Align start of frame for local function. */
4075 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4076 & -stack_alignment_needed
) - offset
;
4078 offset
+= frame
->padding1
;
4080 /* Frame pointer points here. */
4081 frame
->frame_pointer_offset
= offset
;
4085 /* Add outgoing arguments area. Can be skipped if we eliminated
4086 all the function calls as dead code.
4087 Skipping is however impossible when function calls alloca. Alloca
4088 expander assumes that last current_function_outgoing_args_size
4089 of stack frame are unused. */
4090 if (ACCUMULATE_OUTGOING_ARGS
4091 && (!current_function_is_leaf
|| current_function_calls_alloca
))
4093 offset
+= current_function_outgoing_args_size
;
4094 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4097 frame
->outgoing_arguments_size
= 0;
4099 /* Align stack boundary. Only needed if we're calling another function
4101 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4102 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4103 & -preferred_alignment
) - offset
;
4105 frame
->padding2
= 0;
4107 offset
+= frame
->padding2
;
4109 /* We've reached end of stack frame. */
4110 frame
->stack_pointer_offset
= offset
;
4112 /* Size prologue needs to allocate. */
4113 frame
->to_allocate
=
4114 (size
+ frame
->padding1
+ frame
->padding2
4115 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4117 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
4118 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
4119 frame
->save_regs_using_mov
= false;
4121 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4122 && current_function_is_leaf
)
4124 frame
->red_zone_size
= frame
->to_allocate
;
4125 if (frame
->save_regs_using_mov
)
4126 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
4127 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4128 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4131 frame
->red_zone_size
= 0;
4132 frame
->to_allocate
-= frame
->red_zone_size
;
4133 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4135 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4136 fprintf (stderr
, "size: %i\n", size
);
4137 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4138 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4139 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4140 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4141 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4142 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4143 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4144 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4145 frame
->hard_frame_pointer_offset
);
4146 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4150 /* Emit code to save registers in the prologue. */
4153 ix86_emit_save_regs (void)
4158 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4159 if (ix86_save_reg (regno
, true))
4161 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4162 RTX_FRAME_RELATED_P (insn
) = 1;
4166 /* Emit code to save registers using MOV insns. First register
4167 is restored from POINTER + OFFSET. */
4169 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
4174 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4175 if (ix86_save_reg (regno
, true))
4177 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4179 gen_rtx_REG (Pmode
, regno
));
4180 RTX_FRAME_RELATED_P (insn
) = 1;
4181 offset
+= UNITS_PER_WORD
;
4185 /* Expand prologue or epilogue stack adjustment.
4186 The pattern exist to put a dependency on all ebp-based memory accesses.
4187 STYLE should be negative if instructions should be marked as frame related,
4188 zero if %r11 register is live and cannot be freely used and positive
4192 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
4197 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
4198 else if (x86_64_immediate_operand (offset
, DImode
))
4199 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
4203 /* r11 is used by indirect sibcall return as well, set before the
4204 epilogue and used after the epilogue. ATM indirect sibcall
4205 shouldn't be used together with huge frame sizes in one
4206 function because of the frame_size check in sibcall.c. */
4209 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4210 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
4212 RTX_FRAME_RELATED_P (insn
) = 1;
4213 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
4217 RTX_FRAME_RELATED_P (insn
) = 1;
4220 /* Expand the prologue into a bunch of separate insns. */
4223 ix86_expand_prologue (void)
4227 struct ix86_frame frame
;
4228 HOST_WIDE_INT allocate
;
4230 ix86_compute_frame_layout (&frame
);
4232 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4233 slower on all targets. Also sdb doesn't like it. */
4235 if (frame_pointer_needed
)
4237 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4238 RTX_FRAME_RELATED_P (insn
) = 1;
4240 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4241 RTX_FRAME_RELATED_P (insn
) = 1;
4244 allocate
= frame
.to_allocate
;
4246 if (!frame
.save_regs_using_mov
)
4247 ix86_emit_save_regs ();
4249 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4251 /* When using red zone we may start register saving before allocating
4252 the stack frame saving one cycle of the prologue. */
4253 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
4254 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
4255 : stack_pointer_rtx
,
4256 -frame
.nregs
* UNITS_PER_WORD
);
4260 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4261 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4262 GEN_INT (-allocate
), -1);
4265 /* Only valid for Win32. */
4266 rtx eax
= gen_rtx_REG (SImode
, 0);
4267 bool eax_live
= ix86_eax_live_at_start_p ();
4274 emit_insn (gen_push (eax
));
4278 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
4279 RTX_FRAME_RELATED_P (insn
) = 1;
4281 insn
= emit_insn (gen_allocate_stack_worker (eax
));
4282 RTX_FRAME_RELATED_P (insn
) = 1;
4287 if (frame_pointer_needed
)
4288 t
= plus_constant (hard_frame_pointer_rtx
,
4291 - frame
.nregs
* UNITS_PER_WORD
);
4293 t
= plus_constant (stack_pointer_rtx
, allocate
);
4294 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
4298 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
4300 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4301 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4303 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4304 -frame
.nregs
* UNITS_PER_WORD
);
4307 pic_reg_used
= false;
4308 if (pic_offset_table_rtx
4309 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4310 || current_function_profile
))
4312 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4314 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4315 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4317 pic_reg_used
= true;
4322 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4324 /* Even with accurate pre-reload life analysis, we can wind up
4325 deleting all references to the pic register after reload.
4326 Consider if cross-jumping unifies two sides of a branch
4327 controlled by a comparison vs the only read from a global.
4328 In which case, allow the set_got to be deleted, though we're
4329 too late to do anything about the ebx save in the prologue. */
4330 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4333 /* Prevent function calls from be scheduled before the call to mcount.
4334 In the pic_reg_used case, make sure that the got load isn't deleted. */
4335 if (current_function_profile
)
4336 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4339 /* Emit code to restore saved registers using MOV insns. First register
4340 is restored from POINTER + OFFSET. */
4342 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
4343 int maybe_eh_return
)
4346 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
4348 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4349 if (ix86_save_reg (regno
, maybe_eh_return
))
4351 /* Ensure that adjust_address won't be forced to produce pointer
4352 out of range allowed by x86-64 instruction set. */
4353 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
4357 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4358 emit_move_insn (r11
, GEN_INT (offset
));
4359 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
4360 base_address
= gen_rtx_MEM (Pmode
, r11
);
4363 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4364 adjust_address (base_address
, Pmode
, offset
));
4365 offset
+= UNITS_PER_WORD
;
4369 /* Restore function stack, frame, and registers. */
4372 ix86_expand_epilogue (int style
)
4375 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4376 struct ix86_frame frame
;
4377 HOST_WIDE_INT offset
;
4379 ix86_compute_frame_layout (&frame
);
4381 /* Calculate start of saved registers relative to ebp. Special care
4382 must be taken for the normal return case of a function using
4383 eh_return: the eax and edx registers are marked as saved, but not
4384 restored along this path. */
4385 offset
= frame
.nregs
;
4386 if (current_function_calls_eh_return
&& style
!= 2)
4388 offset
*= -UNITS_PER_WORD
;
4390 /* If we're only restoring one register and sp is not valid then
4391 using a move instruction to restore the register since it's
4392 less work than reloading sp and popping the register.
4394 The default code result in stack adjustment using add/lea instruction,
4395 while this code results in LEAVE instruction (or discrete equivalent),
4396 so it is profitable in some other cases as well. Especially when there
4397 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4398 and there is exactly one register to pop. This heuristic may need some
4399 tuning in future. */
4400 if ((!sp_valid
&& frame
.nregs
<= 1)
4401 || (TARGET_EPILOGUE_USING_MOVE
4402 && cfun
->machine
->use_fast_prologue_epilogue
4403 && (frame
.nregs
> 1 || frame
.to_allocate
))
4404 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4405 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4406 && cfun
->machine
->use_fast_prologue_epilogue
4407 && frame
.nregs
== 1)
4408 || current_function_calls_eh_return
)
4410 /* Restore registers. We can use ebp or esp to address the memory
4411 locations. If both are available, default to ebp, since offsets
4412 are known to be small. Only exception is esp pointing directly to the
4413 end of block of saved registers, where we may simplify addressing
4416 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4417 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4418 frame
.to_allocate
, style
== 2);
4420 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4421 offset
, style
== 2);
4423 /* eh_return epilogues need %ecx added to the stack pointer. */
4426 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4428 if (frame_pointer_needed
)
4430 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4431 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4432 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4434 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4435 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4437 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
4442 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4443 tmp
= plus_constant (tmp
, (frame
.to_allocate
4444 + frame
.nregs
* UNITS_PER_WORD
));
4445 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4448 else if (!frame_pointer_needed
)
4449 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4450 GEN_INT (frame
.to_allocate
4451 + frame
.nregs
* UNITS_PER_WORD
),
4453 /* If not an i386, mov & pop is faster than "leave". */
4454 else if (TARGET_USE_LEAVE
|| optimize_size
4455 || !cfun
->machine
->use_fast_prologue_epilogue
)
4456 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4459 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4460 hard_frame_pointer_rtx
,
4463 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4465 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4470 /* First step is to deallocate the stack frame so that we can
4471 pop the registers. */
4474 if (!frame_pointer_needed
)
4476 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4477 hard_frame_pointer_rtx
,
4478 GEN_INT (offset
), style
);
4480 else if (frame
.to_allocate
)
4481 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4482 GEN_INT (frame
.to_allocate
), style
);
4484 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4485 if (ix86_save_reg (regno
, false))
4488 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4490 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4492 if (frame_pointer_needed
)
4494 /* Leave results in shorter dependency chains on CPUs that are
4495 able to grok it fast. */
4496 if (TARGET_USE_LEAVE
)
4497 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4498 else if (TARGET_64BIT
)
4499 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4501 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4505 /* Sibcall epilogues don't want a return instruction. */
4509 if (current_function_pops_args
&& current_function_args_size
)
4511 rtx popc
= GEN_INT (current_function_pops_args
);
4513 /* i386 can only pop 64K bytes. If asked to pop more, pop
4514 return address, do explicit add, and jump indirectly to the
4517 if (current_function_pops_args
>= 65536)
4519 rtx ecx
= gen_rtx_REG (SImode
, 2);
4521 /* There is no "pascal" calling convention in 64bit ABI. */
4525 emit_insn (gen_popsi1 (ecx
));
4526 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4527 emit_jump_insn (gen_return_indirect_internal (ecx
));
4530 emit_jump_insn (gen_return_pop_internal (popc
));
4533 emit_jump_insn (gen_return_internal ());
4536 /* Reset from the function's potential modifications. */
4539 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4540 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4542 if (pic_offset_table_rtx
)
4543 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4546 /* Extract the parts of an RTL expression that is a valid memory address
4547 for an instruction. Return 0 if the structure of the address is
4548 grossly off. Return -1 if the address contains ASHIFT, so it is not
4549 strictly valid, but still used for computing length of lea instruction. */
4552 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
4554 rtx base
= NULL_RTX
;
4555 rtx index
= NULL_RTX
;
4556 rtx disp
= NULL_RTX
;
4557 HOST_WIDE_INT scale
= 1;
4558 rtx scale_rtx
= NULL_RTX
;
4560 enum ix86_address_seg seg
= SEG_DEFAULT
;
4562 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4564 else if (GET_CODE (addr
) == PLUS
)
4574 addends
[n
++] = XEXP (op
, 1);
4577 while (GET_CODE (op
) == PLUS
);
4582 for (i
= n
; i
>= 0; --i
)
4585 switch (GET_CODE (op
))
4590 index
= XEXP (op
, 0);
4591 scale_rtx
= XEXP (op
, 1);
4595 if (XINT (op
, 1) == UNSPEC_TP
4596 && TARGET_TLS_DIRECT_SEG_REFS
4597 && seg
== SEG_DEFAULT
)
4598 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
4627 else if (GET_CODE (addr
) == MULT
)
4629 index
= XEXP (addr
, 0); /* index*scale */
4630 scale_rtx
= XEXP (addr
, 1);
4632 else if (GET_CODE (addr
) == ASHIFT
)
4636 /* We're called for lea too, which implements ashift on occasion. */
4637 index
= XEXP (addr
, 0);
4638 tmp
= XEXP (addr
, 1);
4639 if (GET_CODE (tmp
) != CONST_INT
)
4641 scale
= INTVAL (tmp
);
4642 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4648 disp
= addr
; /* displacement */
4650 /* Extract the integral value of scale. */
4653 if (GET_CODE (scale_rtx
) != CONST_INT
)
4655 scale
= INTVAL (scale_rtx
);
4658 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4659 if (base
&& index
&& scale
== 1
4660 && (index
== arg_pointer_rtx
4661 || index
== frame_pointer_rtx
4662 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
4669 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4670 if ((base
== hard_frame_pointer_rtx
4671 || base
== frame_pointer_rtx
4672 || base
== arg_pointer_rtx
) && !disp
)
4675 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4676 Avoid this by transforming to [%esi+0]. */
4677 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
4678 && base
&& !index
&& !disp
4680 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4683 /* Special case: encode reg+reg instead of reg*2. */
4684 if (!base
&& index
&& scale
&& scale
== 2)
4685 base
= index
, scale
= 1;
4687 /* Special case: scaling cannot be encoded without base or displacement. */
4688 if (!base
&& !disp
&& index
&& scale
!= 1)
4700 /* Return cost of the memory address x.
4701 For i386, it is better to use a complex address than let gcc copy
4702 the address into a reg and make a new pseudo. But not if the address
4703 requires to two regs - that would mean more pseudos with longer
4706 ix86_address_cost (rtx x
)
4708 struct ix86_address parts
;
4711 if (!ix86_decompose_address (x
, &parts
))
4714 /* More complex memory references are better. */
4715 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4717 if (parts
.seg
!= SEG_DEFAULT
)
4720 /* Attempt to minimize number of registers in the address. */
4722 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4724 && (!REG_P (parts
.index
)
4725 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4729 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4731 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4732 && parts
.base
!= parts
.index
)
4735 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4736 since it's predecode logic can't detect the length of instructions
4737 and it degenerates to vector decoded. Increase cost of such
4738 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4739 to split such addresses or even refuse such addresses at all.
4741 Following addressing modes are affected:
4746 The first and last case may be avoidable by explicitly coding the zero in
4747 memory address, but I don't have AMD-K6 machine handy to check this
4751 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4752 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4753 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4759 /* If X is a machine specific address (i.e. a symbol or label being
4760 referenced as a displacement from the GOT implemented using an
4761 UNSPEC), then return the base term. Otherwise return X. */
4764 ix86_find_base_term (rtx x
)
4770 if (GET_CODE (x
) != CONST
)
4773 if (GET_CODE (term
) == PLUS
4774 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4775 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4776 term
= XEXP (term
, 0);
4777 if (GET_CODE (term
) != UNSPEC
4778 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4781 term
= XVECEXP (term
, 0, 0);
4783 if (GET_CODE (term
) != SYMBOL_REF
4784 && GET_CODE (term
) != LABEL_REF
)
4790 term
= ix86_delegitimize_address (x
);
4792 if (GET_CODE (term
) != SYMBOL_REF
4793 && GET_CODE (term
) != LABEL_REF
)
4799 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4800 this is used for to form addresses to local data when -fPIC is in
4804 darwin_local_data_pic (rtx disp
)
4806 if (GET_CODE (disp
) == MINUS
)
4808 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
4809 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
4810 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
4812 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
4813 if (! strcmp (sym_name
, "<pic base>"))
4821 /* Determine if a given RTX is a valid constant. We already know this
4822 satisfies CONSTANT_P. */
4825 legitimate_constant_p (rtx x
)
4829 switch (GET_CODE (x
))
4832 /* TLS symbols are not constant. */
4833 if (tls_symbolic_operand (x
, Pmode
))
4838 inner
= XEXP (x
, 0);
4840 /* Offsets of TLS symbols are never valid.
4841 Discourage CSE from creating them. */
4842 if (GET_CODE (inner
) == PLUS
4843 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
4846 if (GET_CODE (inner
) == PLUS
)
4848 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
4850 inner
= XEXP (inner
, 0);
4853 if (TARGET_MACHO
&& darwin_local_data_pic (inner
))
4856 if (GET_CODE (inner
) == MINUS
)
4858 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
4860 inner
= XEXP (inner
, 0);
4863 /* Only some unspecs are valid as "constants". */
4864 if (GET_CODE (inner
) == UNSPEC
)
4865 switch (XINT (inner
, 1))
4869 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4871 return local_dynamic_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4881 /* Otherwise we handle everything else in the move patterns. */
4885 /* Determine if it's legal to put X into the constant pool. This
4886 is not possible for the address of thread-local symbols, which
4887 is checked above. */
4890 ix86_cannot_force_const_mem (rtx x
)
4892 return !legitimate_constant_p (x
);
4895 /* Determine if a given RTX is a valid constant address. */
4898 constant_address_p (rtx x
)
4900 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
4903 /* Nonzero if the constant value X is a legitimate general operand
4904 when generating PIC code. It is given that flag_pic is on and
4905 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4908 legitimate_pic_operand_p (rtx x
)
4912 switch (GET_CODE (x
))
4915 inner
= XEXP (x
, 0);
4917 /* Only some unspecs are valid as "constants". */
4918 if (GET_CODE (inner
) == UNSPEC
)
4919 switch (XINT (inner
, 1))
4922 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4930 return legitimate_pic_address_disp_p (x
);
4937 /* Determine if a given CONST RTX is a valid memory displacement
4941 legitimate_pic_address_disp_p (rtx disp
)
4945 /* In 64bit mode we can allow direct addresses of symbols and labels
4946 when they are not dynamic symbols. */
4949 /* TLS references should always be enclosed in UNSPEC. */
4950 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
4952 if (GET_CODE (disp
) == SYMBOL_REF
4953 && ix86_cmodel
== CM_SMALL_PIC
4954 && SYMBOL_REF_LOCAL_P (disp
))
4956 if (GET_CODE (disp
) == LABEL_REF
)
4958 if (GET_CODE (disp
) == CONST
4959 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
4961 rtx op0
= XEXP (XEXP (disp
, 0), 0);
4962 rtx op1
= XEXP (XEXP (disp
, 0), 1);
4964 /* TLS references should always be enclosed in UNSPEC. */
4965 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
4967 if (((GET_CODE (op0
) == SYMBOL_REF
4968 && ix86_cmodel
== CM_SMALL_PIC
4969 && SYMBOL_REF_LOCAL_P (op0
))
4970 || GET_CODE (op0
) == LABEL_REF
)
4971 && GET_CODE (op1
) == CONST_INT
4972 && INTVAL (op1
) < 16*1024*1024
4973 && INTVAL (op1
) >= -16*1024*1024)
4977 if (GET_CODE (disp
) != CONST
)
4979 disp
= XEXP (disp
, 0);
4983 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4984 of GOT tables. We should not need these anyway. */
4985 if (GET_CODE (disp
) != UNSPEC
4986 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
4989 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4990 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4996 if (GET_CODE (disp
) == PLUS
)
4998 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5000 disp
= XEXP (disp
, 0);
5004 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
5007 if (GET_CODE (disp
) != UNSPEC
)
5010 switch (XINT (disp
, 1))
5015 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5017 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
5018 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
5019 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5021 case UNSPEC_GOTTPOFF
:
5022 case UNSPEC_GOTNTPOFF
:
5023 case UNSPEC_INDNTPOFF
:
5026 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5028 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5030 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5036 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5037 memory address for an instruction. The MODE argument is the machine mode
5038 for the MEM expression that wants to use this address.
5040 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5041 convert common non-canonical forms to canonical form so that they will
5045 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5047 struct ix86_address parts
;
5048 rtx base
, index
, disp
;
5049 HOST_WIDE_INT scale
;
5050 const char *reason
= NULL
;
5051 rtx reason_rtx
= NULL_RTX
;
5053 if (TARGET_DEBUG_ADDR
)
5056 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5057 GET_MODE_NAME (mode
), strict
);
5061 if (ix86_decompose_address (addr
, &parts
) <= 0)
5063 reason
= "decomposition failed";
5068 index
= parts
.index
;
5070 scale
= parts
.scale
;
5072 /* Validate base register.
5074 Don't allow SUBREG's here, it can lead to spill failures when the base
5075 is one word out of a two word structure, which is represented internally
5082 if (GET_CODE (base
) != REG
)
5084 reason
= "base is not a register";
5088 if (GET_MODE (base
) != Pmode
)
5090 reason
= "base is not in Pmode";
5094 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
5095 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
5097 reason
= "base is not valid";
5102 /* Validate index register.
5104 Don't allow SUBREG's here, it can lead to spill failures when the index
5105 is one word out of a two word structure, which is represented internally
5112 if (GET_CODE (index
) != REG
)
5114 reason
= "index is not a register";
5118 if (GET_MODE (index
) != Pmode
)
5120 reason
= "index is not in Pmode";
5124 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
5125 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
5127 reason
= "index is not valid";
5132 /* Validate scale factor. */
5135 reason_rtx
= GEN_INT (scale
);
5138 reason
= "scale without index";
5142 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5144 reason
= "scale is not a valid multiplier";
5149 /* Validate displacement. */
5154 if (GET_CODE (disp
) == CONST
5155 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5156 switch (XINT (XEXP (disp
, 0), 1))
5160 case UNSPEC_GOTPCREL
:
5163 goto is_legitimate_pic
;
5165 case UNSPEC_GOTTPOFF
:
5166 case UNSPEC_GOTNTPOFF
:
5167 case UNSPEC_INDNTPOFF
:
5173 reason
= "invalid address unspec";
5177 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5179 && !machopic_operand_p (disp
)
5184 if (TARGET_64BIT
&& (index
|| base
))
5186 /* foo@dtpoff(%rX) is ok. */
5187 if (GET_CODE (disp
) != CONST
5188 || GET_CODE (XEXP (disp
, 0)) != PLUS
5189 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5190 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5191 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5192 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5194 reason
= "non-constant pic memory reference";
5198 else if (! legitimate_pic_address_disp_p (disp
))
5200 reason
= "displacement is an invalid pic construct";
5204 /* This code used to verify that a symbolic pic displacement
5205 includes the pic_offset_table_rtx register.
5207 While this is good idea, unfortunately these constructs may
5208 be created by "adds using lea" optimization for incorrect
5217 This code is nonsensical, but results in addressing
5218 GOT table with pic_offset_table_rtx base. We can't
5219 just refuse it easily, since it gets matched by
5220 "addsi3" pattern, that later gets split to lea in the
5221 case output register differs from input. While this
5222 can be handled by separate addsi pattern for this case
5223 that never results in lea, this seems to be easier and
5224 correct fix for crash to disable this test. */
5226 else if (GET_CODE (disp
) != LABEL_REF
5227 && GET_CODE (disp
) != CONST_INT
5228 && (GET_CODE (disp
) != CONST
5229 || !legitimate_constant_p (disp
))
5230 && (GET_CODE (disp
) != SYMBOL_REF
5231 || !legitimate_constant_p (disp
)))
5233 reason
= "displacement is not constant";
5236 else if (TARGET_64BIT
5237 && !x86_64_immediate_operand (disp
, VOIDmode
))
5239 reason
= "displacement is out of range";
5244 /* Everything looks valid. */
5245 if (TARGET_DEBUG_ADDR
)
5246 fprintf (stderr
, "Success.\n");
5250 if (TARGET_DEBUG_ADDR
)
5252 fprintf (stderr
, "Error: %s\n", reason
);
5253 debug_rtx (reason_rtx
);
5258 /* Return an unique alias set for the GOT. */
5260 static HOST_WIDE_INT
5261 ix86_GOT_alias_set (void)
5263 static HOST_WIDE_INT set
= -1;
5265 set
= new_alias_set ();
5269 /* Return a legitimate reference for ORIG (an address) using the
5270 register REG. If REG is 0, a new pseudo is generated.
5272 There are two types of references that must be handled:
5274 1. Global data references must load the address from the GOT, via
5275 the PIC reg. An insn is emitted to do this load, and the reg is
5278 2. Static data references, constant pool addresses, and code labels
5279 compute the address as an offset from the GOT, whose base is in
5280 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5281 differentiate them from global data objects. The returned
5282 address is the PIC reg + an unspec constant.
5284 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5285 reg also appears in the address. */
5288 legitimize_pic_address (rtx orig
, rtx reg
)
5296 reg
= gen_reg_rtx (Pmode
);
5297 /* Use the generic Mach-O PIC machinery. */
5298 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5301 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5303 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5305 /* This symbol may be referenced via a displacement from the PIC
5306 base address (@GOTOFF). */
5308 if (reload_in_progress
)
5309 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5310 if (GET_CODE (addr
) == CONST
)
5311 addr
= XEXP (addr
, 0);
5312 if (GET_CODE (addr
) == PLUS
)
5314 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
5315 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
5318 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5319 new = gen_rtx_CONST (Pmode
, new);
5320 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5324 emit_move_insn (reg
, new);
5328 else if (GET_CODE (addr
) == SYMBOL_REF
)
5332 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5333 new = gen_rtx_CONST (Pmode
, new);
5334 new = gen_const_mem (Pmode
, new);
5335 set_mem_alias_set (new, ix86_GOT_alias_set ());
5338 reg
= gen_reg_rtx (Pmode
);
5339 /* Use directly gen_movsi, otherwise the address is loaded
5340 into register for CSE. We don't want to CSE this addresses,
5341 instead we CSE addresses from the GOT table, so skip this. */
5342 emit_insn (gen_movsi (reg
, new));
5347 /* This symbol must be referenced via a load from the
5348 Global Offset Table (@GOT). */
5350 if (reload_in_progress
)
5351 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5352 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5353 new = gen_rtx_CONST (Pmode
, new);
5354 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5355 new = gen_const_mem (Pmode
, new);
5356 set_mem_alias_set (new, ix86_GOT_alias_set ());
5359 reg
= gen_reg_rtx (Pmode
);
5360 emit_move_insn (reg
, new);
5366 if (GET_CODE (addr
) == CONST
)
5368 addr
= XEXP (addr
, 0);
5370 /* We must match stuff we generate before. Assume the only
5371 unspecs that can get here are ours. Not that we could do
5372 anything with them anyway.... */
5373 if (GET_CODE (addr
) == UNSPEC
5374 || (GET_CODE (addr
) == PLUS
5375 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5377 if (GET_CODE (addr
) != PLUS
)
5380 if (GET_CODE (addr
) == PLUS
)
5382 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5384 /* Check first to see if this is a constant offset from a @GOTOFF
5385 symbol reference. */
5386 if (local_symbolic_operand (op0
, Pmode
)
5387 && GET_CODE (op1
) == CONST_INT
)
5391 if (reload_in_progress
)
5392 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5393 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5395 new = gen_rtx_PLUS (Pmode
, new, op1
);
5396 new = gen_rtx_CONST (Pmode
, new);
5397 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5401 emit_move_insn (reg
, new);
5407 if (INTVAL (op1
) < -16*1024*1024
5408 || INTVAL (op1
) >= 16*1024*1024)
5409 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
5414 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5415 new = legitimize_pic_address (XEXP (addr
, 1),
5416 base
== reg
? NULL_RTX
: reg
);
5418 if (GET_CODE (new) == CONST_INT
)
5419 new = plus_constant (base
, INTVAL (new));
5422 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5424 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5425 new = XEXP (new, 1);
5427 new = gen_rtx_PLUS (Pmode
, base
, new);
5435 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5438 get_thread_pointer (int to_reg
)
5442 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5446 reg
= gen_reg_rtx (Pmode
);
5447 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
5448 insn
= emit_insn (insn
);
5453 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5454 false if we expect this to be used for a memory address and true if
5455 we expect to load the address into a register. */
5458 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
5460 rtx dest
, base
, off
, pic
;
5465 case TLS_MODEL_GLOBAL_DYNAMIC
:
5466 dest
= gen_reg_rtx (Pmode
);
5469 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5472 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5473 insns
= get_insns ();
5476 emit_libcall_block (insns
, dest
, rax
, x
);
5479 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5482 case TLS_MODEL_LOCAL_DYNAMIC
:
5483 base
= gen_reg_rtx (Pmode
);
5486 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5489 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5490 insns
= get_insns ();
5493 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5494 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5495 emit_libcall_block (insns
, base
, rax
, note
);
5498 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5500 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5501 off
= gen_rtx_CONST (Pmode
, off
);
5503 return gen_rtx_PLUS (Pmode
, base
, off
);
5505 case TLS_MODEL_INITIAL_EXEC
:
5509 type
= UNSPEC_GOTNTPOFF
;
5513 if (reload_in_progress
)
5514 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5515 pic
= pic_offset_table_rtx
;
5516 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5518 else if (!TARGET_GNU_TLS
)
5520 pic
= gen_reg_rtx (Pmode
);
5521 emit_insn (gen_set_got (pic
));
5522 type
= UNSPEC_GOTTPOFF
;
5527 type
= UNSPEC_INDNTPOFF
;
5530 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5531 off
= gen_rtx_CONST (Pmode
, off
);
5533 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5534 off
= gen_const_mem (Pmode
, off
);
5535 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5537 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5539 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5540 off
= force_reg (Pmode
, off
);
5541 return gen_rtx_PLUS (Pmode
, base
, off
);
5545 base
= get_thread_pointer (true);
5546 dest
= gen_reg_rtx (Pmode
);
5547 emit_insn (gen_subsi3 (dest
, base
, off
));
5551 case TLS_MODEL_LOCAL_EXEC
:
5552 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5553 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5554 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5555 off
= gen_rtx_CONST (Pmode
, off
);
5557 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5559 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5560 return gen_rtx_PLUS (Pmode
, base
, off
);
5564 base
= get_thread_pointer (true);
5565 dest
= gen_reg_rtx (Pmode
);
5566 emit_insn (gen_subsi3 (dest
, base
, off
));
5577 /* Try machine-dependent ways of modifying an illegitimate address
5578 to be legitimate. If we find one, return the new, valid address.
5579 This macro is used in only one place: `memory_address' in explow.c.
5581 OLDX is the address as it was before break_out_memory_refs was called.
5582 In some cases it is useful to look at this to decide what needs to be done.
5584 MODE and WIN are passed so that this macro can use
5585 GO_IF_LEGITIMATE_ADDRESS.
5587 It is always safe for this macro to do nothing. It exists to recognize
5588 opportunities to optimize the output.
5590 For the 80386, we handle X+REG by loading X into a register R and
5591 using R+REG. R will go in a general reg and indexing will be used.
5592 However, if REG is a broken-out memory address or multiplication,
5593 nothing needs to be done because REG can certainly go in a general reg.
5595 When -fpic is used, special handling is needed for symbolic references.
5596 See comments by legitimize_pic_address in i386.c for details. */
5599 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
5604 if (TARGET_DEBUG_ADDR
)
5606 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5607 GET_MODE_NAME (mode
));
5611 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
5613 return legitimize_tls_address (x
, log
, false);
5614 if (GET_CODE (x
) == CONST
5615 && GET_CODE (XEXP (x
, 0)) == PLUS
5616 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
5617 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
5619 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
5620 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
5623 if (flag_pic
&& SYMBOLIC_CONST (x
))
5624 return legitimize_pic_address (x
, 0);
5626 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5627 if (GET_CODE (x
) == ASHIFT
5628 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5629 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5632 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5633 GEN_INT (1 << log
));
5636 if (GET_CODE (x
) == PLUS
)
5638 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5640 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5641 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5642 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5645 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5646 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5647 GEN_INT (1 << log
));
5650 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5651 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5652 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5655 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5656 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5657 GEN_INT (1 << log
));
5660 /* Put multiply first if it isn't already. */
5661 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5663 rtx tmp
= XEXP (x
, 0);
5664 XEXP (x
, 0) = XEXP (x
, 1);
5669 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5670 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5671 created by virtual register instantiation, register elimination, and
5672 similar optimizations. */
5673 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5676 x
= gen_rtx_PLUS (Pmode
,
5677 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5678 XEXP (XEXP (x
, 1), 0)),
5679 XEXP (XEXP (x
, 1), 1));
5683 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5684 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5685 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5686 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5687 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5688 && CONSTANT_P (XEXP (x
, 1)))
5691 rtx other
= NULL_RTX
;
5693 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5695 constant
= XEXP (x
, 1);
5696 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5698 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5700 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5701 other
= XEXP (x
, 1);
5709 x
= gen_rtx_PLUS (Pmode
,
5710 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5711 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5712 plus_constant (other
, INTVAL (constant
)));
5716 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5719 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5722 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5725 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5728 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5732 && GET_CODE (XEXP (x
, 1)) == REG
5733 && GET_CODE (XEXP (x
, 0)) == REG
)
5736 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5739 x
= legitimize_pic_address (x
, 0);
5742 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5745 if (GET_CODE (XEXP (x
, 0)) == REG
)
5747 rtx temp
= gen_reg_rtx (Pmode
);
5748 rtx val
= force_operand (XEXP (x
, 1), temp
);
5750 emit_move_insn (temp
, val
);
5756 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5758 rtx temp
= gen_reg_rtx (Pmode
);
5759 rtx val
= force_operand (XEXP (x
, 0), temp
);
5761 emit_move_insn (temp
, val
);
5771 /* Print an integer constant expression in assembler syntax. Addition
5772 and subtraction are the only arithmetic that may appear in these
5773 expressions. FILE is the stdio stream to write to, X is the rtx, and
5774 CODE is the operand print code from the output string. */
5777 output_pic_addr_const (FILE *file
, rtx x
, int code
)
5781 switch (GET_CODE (x
))
5791 /* Mark the decl as referenced so that cgraph will output the function. */
5792 if (SYMBOL_REF_DECL (x
))
5793 mark_decl_referenced (SYMBOL_REF_DECL (x
));
5795 assemble_name (file
, XSTR (x
, 0));
5796 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
5797 fputs ("@PLT", file
);
5804 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5805 assemble_name (asm_out_file
, buf
);
5809 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5813 /* This used to output parentheses around the expression,
5814 but that does not work on the 386 (either ATT or BSD assembler). */
5815 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5819 if (GET_MODE (x
) == VOIDmode
)
5821 /* We can use %d if the number is <32 bits and positive. */
5822 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5823 fprintf (file
, "0x%lx%08lx",
5824 (unsigned long) CONST_DOUBLE_HIGH (x
),
5825 (unsigned long) CONST_DOUBLE_LOW (x
));
5827 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5830 /* We can't handle floating point constants;
5831 PRINT_OPERAND must handle them. */
5832 output_operand_lossage ("floating constant misused");
5836 /* Some assemblers need integer constants to appear first. */
5837 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5839 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5841 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5843 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5845 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5847 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5855 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5856 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5858 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5860 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5864 if (XVECLEN (x
, 0) != 1)
5866 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5867 switch (XINT (x
, 1))
5870 fputs ("@GOT", file
);
5873 fputs ("@GOTOFF", file
);
5875 case UNSPEC_GOTPCREL
:
5876 fputs ("@GOTPCREL(%rip)", file
);
5878 case UNSPEC_GOTTPOFF
:
5879 /* FIXME: This might be @TPOFF in Sun ld too. */
5880 fputs ("@GOTTPOFF", file
);
5883 fputs ("@TPOFF", file
);
5887 fputs ("@TPOFF", file
);
5889 fputs ("@NTPOFF", file
);
5892 fputs ("@DTPOFF", file
);
5894 case UNSPEC_GOTNTPOFF
:
5896 fputs ("@GOTTPOFF(%rip)", file
);
5898 fputs ("@GOTNTPOFF", file
);
5900 case UNSPEC_INDNTPOFF
:
5901 fputs ("@INDNTPOFF", file
);
5904 output_operand_lossage ("invalid UNSPEC as operand");
5910 output_operand_lossage ("invalid expression as operand");
5914 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5915 We need to handle our special PIC relocations. */
5918 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
5921 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5925 fprintf (file
, "%s", ASM_LONG
);
5928 output_pic_addr_const (file
, x
, '\0');
5930 output_addr_const (file
, x
);
5934 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5935 We need to emit DTP-relative relocations. */
5938 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5940 fputs (ASM_LONG
, file
);
5941 output_addr_const (file
, x
);
5942 fputs ("@DTPOFF", file
);
5948 fputs (", 0", file
);
5955 /* In the name of slightly smaller debug output, and to cater to
5956 general assembler losage, recognize PIC+GOTOFF and turn it back
5957 into a direct symbol reference. */
5960 ix86_delegitimize_address (rtx orig_x
)
5964 if (GET_CODE (x
) == MEM
)
5969 if (GET_CODE (x
) != CONST
5970 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5971 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
5972 || GET_CODE (orig_x
) != MEM
)
5974 return XVECEXP (XEXP (x
, 0), 0, 0);
5977 if (GET_CODE (x
) != PLUS
5978 || GET_CODE (XEXP (x
, 1)) != CONST
)
5981 if (GET_CODE (XEXP (x
, 0)) == REG
5982 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5983 /* %ebx + GOT/GOTOFF */
5985 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5987 /* %ebx + %reg * scale + GOT/GOTOFF */
5989 if (GET_CODE (XEXP (y
, 0)) == REG
5990 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5992 else if (GET_CODE (XEXP (y
, 1)) == REG
5993 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5997 if (GET_CODE (y
) != REG
5998 && GET_CODE (y
) != MULT
5999 && GET_CODE (y
) != ASHIFT
)
6005 x
= XEXP (XEXP (x
, 1), 0);
6006 if (GET_CODE (x
) == UNSPEC
6007 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6008 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6011 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6012 return XVECEXP (x
, 0, 0);
6015 if (GET_CODE (x
) == PLUS
6016 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6017 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6018 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6019 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6020 && GET_CODE (orig_x
) != MEM
)))
6022 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6024 return gen_rtx_PLUS (Pmode
, y
, x
);
6032 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6037 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6039 enum rtx_code second_code
, bypass_code
;
6040 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6041 if (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
)
6043 code
= ix86_fp_compare_code_to_integer (code
);
6047 code
= reverse_condition (code
);
6058 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6063 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6064 Those same assemblers have the same but opposite losage on cmov. */
6067 suffix
= fp
? "nbe" : "a";
6070 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6072 else if (mode
== CCmode
|| mode
== CCGCmode
)
6083 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6085 else if (mode
== CCmode
|| mode
== CCGCmode
)
6094 suffix
= fp
? "nb" : "ae";
6097 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6107 suffix
= fp
? "u" : "p";
6110 suffix
= fp
? "nu" : "np";
6115 fputs (suffix
, file
);
6118 /* Print the name of register X to FILE based on its machine mode and number.
6119 If CODE is 'w', pretend the mode is HImode.
6120 If CODE is 'b', pretend the mode is QImode.
6121 If CODE is 'k', pretend the mode is SImode.
6122 If CODE is 'q', pretend the mode is DImode.
6123 If CODE is 'h', pretend the reg is the `high' byte register.
6124 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6127 print_reg (rtx x
, int code
, FILE *file
)
6129 if (REGNO (x
) == ARG_POINTER_REGNUM
6130 || REGNO (x
) == FRAME_POINTER_REGNUM
6131 || REGNO (x
) == FLAGS_REG
6132 || REGNO (x
) == FPSR_REG
)
6135 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6138 if (code
== 'w' || MMX_REG_P (x
))
6140 else if (code
== 'b')
6142 else if (code
== 'k')
6144 else if (code
== 'q')
6146 else if (code
== 'y')
6148 else if (code
== 'h')
6151 code
= GET_MODE_SIZE (GET_MODE (x
));
6153 /* Irritatingly, AMD extended registers use different naming convention
6154 from the normal registers. */
6155 if (REX_INT_REG_P (x
))
6162 error ("extended registers have no high halves");
6165 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6168 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6171 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6174 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6177 error ("unsupported operand size for extended register");
6185 if (STACK_TOP_P (x
))
6187 fputs ("st(0)", file
);
6194 if (! ANY_FP_REG_P (x
))
6195 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6200 fputs (hi_reg_name
[REGNO (x
)], file
);
6203 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
6205 fputs (qi_reg_name
[REGNO (x
)], file
);
6208 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
6210 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6217 /* Locate some local-dynamic symbol still in use by this function
6218 so that we can print its name in some tls_local_dynamic_base
6222 get_some_local_dynamic_name (void)
6226 if (cfun
->machine
->some_ld_name
)
6227 return cfun
->machine
->some_ld_name
;
6229 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6231 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6232 return cfun
->machine
->some_ld_name
;
6238 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
6242 if (GET_CODE (x
) == SYMBOL_REF
6243 && local_dynamic_symbolic_operand (x
, Pmode
))
6245 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6253 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6254 C -- print opcode suffix for set/cmov insn.
6255 c -- like C, but print reversed condition
6256 F,f -- likewise, but for floating-point.
6257 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6259 R -- print the prefix for register names.
6260 z -- print the opcode suffix for the size of the current operand.
6261 * -- print a star (in certain assembler syntax)
6262 A -- print an absolute memory reference.
6263 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6264 s -- print a shift double count, followed by the assemblers argument
6266 b -- print the QImode name of the register for the indicated operand.
6267 %b0 would print %al if operands[0] is reg 0.
6268 w -- likewise, print the HImode name of the register.
6269 k -- likewise, print the SImode name of the register.
6270 q -- likewise, print the DImode name of the register.
6271 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6272 y -- print "st(0)" instead of "st" as a register.
6273 D -- print condition for SSE cmp instruction.
6274 P -- if PIC, print an @PLT suffix.
6275 X -- don't print any sort of PIC '@' suffix for a symbol.
6276 & -- print some in-use local-dynamic symbol name.
6280 print_operand (FILE *file
, rtx x
, int code
)
6287 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6292 assemble_name (file
, get_some_local_dynamic_name ());
6296 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6298 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6300 /* Intel syntax. For absolute addresses, registers should not
6301 be surrounded by braces. */
6302 if (GET_CODE (x
) != REG
)
6305 PRINT_OPERAND (file
, x
, 0);
6313 PRINT_OPERAND (file
, x
, 0);
6318 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6323 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6328 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6333 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6338 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6343 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6348 /* 387 opcodes don't get size suffixes if the operands are
6350 if (STACK_REG_P (x
))
6353 /* Likewise if using Intel opcodes. */
6354 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6357 /* This is the size of op from size of operand. */
6358 switch (GET_MODE_SIZE (GET_MODE (x
)))
6361 #ifdef HAVE_GAS_FILDS_FISTS
6367 if (GET_MODE (x
) == SFmode
)
6382 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6384 #ifdef GAS_MNEMONICS
6410 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6412 PRINT_OPERAND (file
, x
, 0);
6418 /* Little bit of braindamage here. The SSE compare instructions
6419 does use completely different names for the comparisons that the
6420 fp conditional moves. */
6421 switch (GET_CODE (x
))
6436 fputs ("unord", file
);
6440 fputs ("neq", file
);
6444 fputs ("nlt", file
);
6448 fputs ("nle", file
);
6451 fputs ("ord", file
);
6459 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6460 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6462 switch (GET_MODE (x
))
6464 case HImode
: putc ('w', file
); break;
6466 case SFmode
: putc ('l', file
); break;
6468 case DFmode
: putc ('q', file
); break;
6476 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6479 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6480 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6483 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6486 /* Like above, but reverse condition */
6488 /* Check to see if argument to %c is really a constant
6489 and not a condition code which needs to be reversed. */
6490 if (!COMPARISON_P (x
))
6492 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6495 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6498 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6499 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6502 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6508 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6511 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6514 int pred_val
= INTVAL (XEXP (x
, 0));
6516 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6517 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6519 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6520 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6522 /* Emit hints only in the case default branch prediction
6523 heuristics would fail. */
6524 if (taken
!= cputaken
)
6526 /* We use 3e (DS) prefix for taken branches and
6527 2e (CS) prefix for not taken branches. */
6529 fputs ("ds ; ", file
);
6531 fputs ("cs ; ", file
);
6538 output_operand_lossage ("invalid operand code `%c'", code
);
6542 if (GET_CODE (x
) == REG
)
6543 print_reg (x
, code
, file
);
6545 else if (GET_CODE (x
) == MEM
)
6547 /* No `byte ptr' prefix for call instructions. */
6548 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6551 switch (GET_MODE_SIZE (GET_MODE (x
)))
6553 case 1: size
= "BYTE"; break;
6554 case 2: size
= "WORD"; break;
6555 case 4: size
= "DWORD"; break;
6556 case 8: size
= "QWORD"; break;
6557 case 12: size
= "XWORD"; break;
6558 case 16: size
= "XMMWORD"; break;
6563 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6566 else if (code
== 'w')
6568 else if (code
== 'k')
6572 fputs (" PTR ", file
);
6576 /* Avoid (%rip) for call operands. */
6577 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6578 && GET_CODE (x
) != CONST_INT
)
6579 output_addr_const (file
, x
);
6580 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6581 output_operand_lossage ("invalid constraints for operand");
6586 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6591 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6592 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6594 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6596 fprintf (file
, "0x%08lx", l
);
6599 /* These float cases don't actually occur as immediate operands. */
6600 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6604 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6605 fprintf (file
, "%s", dstr
);
6608 else if (GET_CODE (x
) == CONST_DOUBLE
6609 && GET_MODE (x
) == XFmode
)
6613 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6614 fprintf (file
, "%s", dstr
);
6621 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6623 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6626 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6627 || GET_CODE (x
) == LABEL_REF
)
6629 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6632 fputs ("OFFSET FLAT:", file
);
6635 if (GET_CODE (x
) == CONST_INT
)
6636 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6638 output_pic_addr_const (file
, x
, code
);
6640 output_addr_const (file
, x
);
6644 /* Print a memory operand whose address is ADDR. */
6647 print_operand_address (FILE *file
, rtx addr
)
6649 struct ix86_address parts
;
6650 rtx base
, index
, disp
;
6653 if (! ix86_decompose_address (addr
, &parts
))
6657 index
= parts
.index
;
6659 scale
= parts
.scale
;
6667 if (USER_LABEL_PREFIX
[0] == 0)
6669 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
6675 if (!base
&& !index
)
6677 /* Displacement only requires special attention. */
6679 if (GET_CODE (disp
) == CONST_INT
)
6681 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
6683 if (USER_LABEL_PREFIX
[0] == 0)
6685 fputs ("ds:", file
);
6687 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
6690 output_pic_addr_const (file
, disp
, 0);
6692 output_addr_const (file
, disp
);
6694 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6696 && ((GET_CODE (disp
) == SYMBOL_REF
6697 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
6698 || GET_CODE (disp
) == LABEL_REF
6699 || (GET_CODE (disp
) == CONST
6700 && GET_CODE (XEXP (disp
, 0)) == PLUS
6701 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
6702 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
6703 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
6704 fputs ("(%rip)", file
);
6708 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6713 output_pic_addr_const (file
, disp
, 0);
6714 else if (GET_CODE (disp
) == LABEL_REF
)
6715 output_asm_label (disp
);
6717 output_addr_const (file
, disp
);
6722 print_reg (base
, 0, file
);
6726 print_reg (index
, 0, file
);
6728 fprintf (file
, ",%d", scale
);
6734 rtx offset
= NULL_RTX
;
6738 /* Pull out the offset of a symbol; print any symbol itself. */
6739 if (GET_CODE (disp
) == CONST
6740 && GET_CODE (XEXP (disp
, 0)) == PLUS
6741 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6743 offset
= XEXP (XEXP (disp
, 0), 1);
6744 disp
= gen_rtx_CONST (VOIDmode
,
6745 XEXP (XEXP (disp
, 0), 0));
6749 output_pic_addr_const (file
, disp
, 0);
6750 else if (GET_CODE (disp
) == LABEL_REF
)
6751 output_asm_label (disp
);
6752 else if (GET_CODE (disp
) == CONST_INT
)
6755 output_addr_const (file
, disp
);
6761 print_reg (base
, 0, file
);
6764 if (INTVAL (offset
) >= 0)
6766 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6770 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6777 print_reg (index
, 0, file
);
6779 fprintf (file
, "*%d", scale
);
6787 output_addr_const_extra (FILE *file
, rtx x
)
6791 if (GET_CODE (x
) != UNSPEC
)
6794 op
= XVECEXP (x
, 0, 0);
6795 switch (XINT (x
, 1))
6797 case UNSPEC_GOTTPOFF
:
6798 output_addr_const (file
, op
);
6799 /* FIXME: This might be @TPOFF in Sun ld. */
6800 fputs ("@GOTTPOFF", file
);
6803 output_addr_const (file
, op
);
6804 fputs ("@TPOFF", file
);
6807 output_addr_const (file
, op
);
6809 fputs ("@TPOFF", file
);
6811 fputs ("@NTPOFF", file
);
6814 output_addr_const (file
, op
);
6815 fputs ("@DTPOFF", file
);
6817 case UNSPEC_GOTNTPOFF
:
6818 output_addr_const (file
, op
);
6820 fputs ("@GOTTPOFF(%rip)", file
);
6822 fputs ("@GOTNTPOFF", file
);
6824 case UNSPEC_INDNTPOFF
:
6825 output_addr_const (file
, op
);
6826 fputs ("@INDNTPOFF", file
);
6836 /* Split one or more DImode RTL references into pairs of SImode
6837 references. The RTL can be REG, offsettable MEM, integer constant, or
6838 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6839 split and "num" is its length. lo_half and hi_half are output arrays
6840 that parallel "operands". */
6843 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6847 rtx op
= operands
[num
];
6849 /* simplify_subreg refuse to split volatile memory addresses,
6850 but we still have to handle it. */
6851 if (GET_CODE (op
) == MEM
)
6853 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6854 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6858 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6859 GET_MODE (op
) == VOIDmode
6860 ? DImode
: GET_MODE (op
), 0);
6861 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6862 GET_MODE (op
) == VOIDmode
6863 ? DImode
: GET_MODE (op
), 4);
6867 /* Split one or more TImode RTL references into pairs of SImode
6868 references. The RTL can be REG, offsettable MEM, integer constant, or
6869 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6870 split and "num" is its length. lo_half and hi_half are output arrays
6871 that parallel "operands". */
6874 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6878 rtx op
= operands
[num
];
6880 /* simplify_subreg refuse to split volatile memory addresses, but we
6881 still have to handle it. */
6882 if (GET_CODE (op
) == MEM
)
6884 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6885 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6889 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6890 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6895 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6896 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6897 is the expression of the binary operation. The output may either be
6898 emitted here, or returned to the caller, like all output_* functions.
6900 There is no guarantee that the operands are the same mode, as they
6901 might be within FLOAT or FLOAT_EXTEND expressions. */
6903 #ifndef SYSV386_COMPAT
6904 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6905 wants to fix the assemblers because that causes incompatibility
6906 with gcc. No-one wants to fix gcc because that causes
6907 incompatibility with assemblers... You can use the option of
6908 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6909 #define SYSV386_COMPAT 1
6913 output_387_binary_op (rtx insn
, rtx
*operands
)
6915 static char buf
[30];
6918 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6920 #ifdef ENABLE_CHECKING
6921 /* Even if we do not want to check the inputs, this documents input
6922 constraints. Which helps in understanding the following code. */
6923 if (STACK_REG_P (operands
[0])
6924 && ((REG_P (operands
[1])
6925 && REGNO (operands
[0]) == REGNO (operands
[1])
6926 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6927 || (REG_P (operands
[2])
6928 && REGNO (operands
[0]) == REGNO (operands
[2])
6929 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6930 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6936 switch (GET_CODE (operands
[3]))
6939 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6940 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6948 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6949 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6957 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6958 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6966 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6967 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6981 if (GET_MODE (operands
[0]) == SFmode
)
6982 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6984 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6989 switch (GET_CODE (operands
[3]))
6993 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6995 rtx temp
= operands
[2];
6996 operands
[2] = operands
[1];
7000 /* know operands[0] == operands[1]. */
7002 if (GET_CODE (operands
[2]) == MEM
)
7008 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7010 if (STACK_TOP_P (operands
[0]))
7011 /* How is it that we are storing to a dead operand[2]?
7012 Well, presumably operands[1] is dead too. We can't
7013 store the result to st(0) as st(0) gets popped on this
7014 instruction. Instead store to operands[2] (which I
7015 think has to be st(1)). st(1) will be popped later.
7016 gcc <= 2.8.1 didn't have this check and generated
7017 assembly code that the Unixware assembler rejected. */
7018 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7020 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7024 if (STACK_TOP_P (operands
[0]))
7025 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7027 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7032 if (GET_CODE (operands
[1]) == MEM
)
7038 if (GET_CODE (operands
[2]) == MEM
)
7044 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7047 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7048 derived assemblers, confusingly reverse the direction of
7049 the operation for fsub{r} and fdiv{r} when the
7050 destination register is not st(0). The Intel assembler
7051 doesn't have this brain damage. Read !SYSV386_COMPAT to
7052 figure out what the hardware really does. */
7053 if (STACK_TOP_P (operands
[0]))
7054 p
= "{p\t%0, %2|rp\t%2, %0}";
7056 p
= "{rp\t%2, %0|p\t%0, %2}";
7058 if (STACK_TOP_P (operands
[0]))
7059 /* As above for fmul/fadd, we can't store to st(0). */
7060 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7062 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7067 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7070 if (STACK_TOP_P (operands
[0]))
7071 p
= "{rp\t%0, %1|p\t%1, %0}";
7073 p
= "{p\t%1, %0|rp\t%0, %1}";
7075 if (STACK_TOP_P (operands
[0]))
7076 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7078 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7083 if (STACK_TOP_P (operands
[0]))
7085 if (STACK_TOP_P (operands
[1]))
7086 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7088 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7091 else if (STACK_TOP_P (operands
[1]))
7094 p
= "{\t%1, %0|r\t%0, %1}";
7096 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7102 p
= "{r\t%2, %0|\t%0, %2}";
7104 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7117 /* Output code to initialize control word copies used by
7118 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7119 is set to control word rounding downwards. */
7121 emit_i387_cw_initialization (rtx normal
, rtx round_down
)
7123 rtx reg
= gen_reg_rtx (HImode
);
7125 emit_insn (gen_x86_fnstcw_1 (normal
));
7126 emit_move_insn (reg
, normal
);
7127 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7129 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7131 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7132 emit_move_insn (round_down
, reg
);
7135 /* Output code for INSN to convert a float to a signed int. OPERANDS
7136 are the insn operands. The output may be [HSD]Imode and the input
7137 operand may be [SDX]Fmode. */
7140 output_fix_trunc (rtx insn
, rtx
*operands
)
7142 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7143 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7145 /* Jump through a hoop or two for DImode, since the hardware has no
7146 non-popping instruction. We used to do this a different way, but
7147 that was somewhat fragile and broke with post-reload splitters. */
7148 if (dimode_p
&& !stack_top_dies
)
7149 output_asm_insn ("fld\t%y1", operands
);
7151 if (!STACK_TOP_P (operands
[1]))
7154 if (GET_CODE (operands
[0]) != MEM
)
7157 output_asm_insn ("fldcw\t%3", operands
);
7158 if (stack_top_dies
|| dimode_p
)
7159 output_asm_insn ("fistp%z0\t%0", operands
);
7161 output_asm_insn ("fist%z0\t%0", operands
);
7162 output_asm_insn ("fldcw\t%2", operands
);
7167 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7168 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7169 when fucom should be used. */
7172 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
7175 rtx cmp_op0
= operands
[0];
7176 rtx cmp_op1
= operands
[1];
7177 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7182 cmp_op1
= operands
[2];
7186 if (GET_MODE (operands
[0]) == SFmode
)
7188 return "ucomiss\t{%1, %0|%0, %1}";
7190 return "comiss\t{%1, %0|%0, %1}";
7193 return "ucomisd\t{%1, %0|%0, %1}";
7195 return "comisd\t{%1, %0|%0, %1}";
7198 if (! STACK_TOP_P (cmp_op0
))
7201 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7203 if (STACK_REG_P (cmp_op1
)
7205 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7206 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7208 /* If both the top of the 387 stack dies, and the other operand
7209 is also a stack register that dies, then this must be a
7210 `fcompp' float compare */
7214 /* There is no double popping fcomi variant. Fortunately,
7215 eflags is immune from the fstp's cc clobbering. */
7217 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7219 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7220 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
7227 return "fucompp\n\tfnstsw\t%0";
7229 return "fcompp\n\tfnstsw\t%0";
7242 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7244 static const char * const alt
[24] =
7256 "fcomi\t{%y1, %0|%0, %y1}",
7257 "fcomip\t{%y1, %0|%0, %y1}",
7258 "fucomi\t{%y1, %0|%0, %y1}",
7259 "fucomip\t{%y1, %0|%0, %y1}",
7266 "fcom%z2\t%y2\n\tfnstsw\t%0",
7267 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7268 "fucom%z2\t%y2\n\tfnstsw\t%0",
7269 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7271 "ficom%z2\t%y2\n\tfnstsw\t%0",
7272 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7280 mask
= eflags_p
<< 3;
7281 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7282 mask
|= unordered_p
<< 1;
7283 mask
|= stack_top_dies
;
7296 ix86_output_addr_vec_elt (FILE *file
, int value
)
7298 const char *directive
= ASM_LONG
;
7303 directive
= ASM_QUAD
;
7309 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7313 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
7316 fprintf (file
, "%s%s%d-%s%d\n",
7317 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7318 else if (HAVE_AS_GOTOFF_IN_DATA
)
7319 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7321 else if (TARGET_MACHO
)
7323 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
7324 machopic_output_function_base_name (file
);
7325 fprintf(file
, "\n");
7329 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7330 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7333 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7337 ix86_expand_clear (rtx dest
)
7341 /* We play register width games, which are only valid after reload. */
7342 if (!reload_completed
)
7345 /* Avoid HImode and its attendant prefix byte. */
7346 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7347 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7349 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7351 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7352 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7354 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7355 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7361 /* X is an unchanging MEM. If it is a constant pool reference, return
7362 the constant pool rtx, else NULL. */
7365 maybe_get_pool_constant (rtx x
)
7367 x
= ix86_delegitimize_address (XEXP (x
, 0));
7369 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7370 return get_pool_constant (x
);
7376 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
7378 int strict
= (reload_in_progress
|| reload_completed
);
7380 enum tls_model model
;
7385 model
= GET_CODE (op1
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (op1
) : 0;
7388 op1
= legitimize_tls_address (op1
, model
, true);
7389 op1
= force_operand (op1
, op0
);
7394 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7399 rtx temp
= ((reload_in_progress
7400 || ((op0
&& GET_CODE (op0
) == REG
)
7402 ? op0
: gen_reg_rtx (Pmode
));
7403 op1
= machopic_indirect_data_reference (op1
, temp
);
7404 op1
= machopic_legitimize_pic_address (op1
, mode
,
7405 temp
== op1
? 0 : temp
);
7407 else if (MACHOPIC_INDIRECT
)
7408 op1
= machopic_indirect_data_reference (op1
, 0);
7412 if (GET_CODE (op0
) == MEM
)
7413 op1
= force_reg (Pmode
, op1
);
7415 op1
= legitimize_address (op1
, op1
, Pmode
);
7416 #endif /* TARGET_MACHO */
7420 if (GET_CODE (op0
) == MEM
7421 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7422 || !push_operand (op0
, mode
))
7423 && GET_CODE (op1
) == MEM
)
7424 op1
= force_reg (mode
, op1
);
7426 if (push_operand (op0
, mode
)
7427 && ! general_no_elim_operand (op1
, mode
))
7428 op1
= copy_to_mode_reg (mode
, op1
);
7430 /* Force large constants in 64bit compilation into register
7431 to get them CSEed. */
7432 if (TARGET_64BIT
&& mode
== DImode
7433 && immediate_operand (op1
, mode
)
7434 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
7435 && !register_operand (op0
, mode
)
7436 && optimize
&& !reload_completed
&& !reload_in_progress
)
7437 op1
= copy_to_mode_reg (mode
, op1
);
7439 if (FLOAT_MODE_P (mode
))
7441 /* If we are loading a floating point constant to a register,
7442 force the value to memory now, since we'll get better code
7443 out the back end. */
7447 else if (GET_CODE (op1
) == CONST_DOUBLE
)
7449 op1
= validize_mem (force_const_mem (mode
, op1
));
7450 if (!register_operand (op0
, mode
))
7452 rtx temp
= gen_reg_rtx (mode
);
7453 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
7454 emit_move_insn (op0
, temp
);
7461 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7465 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
7467 /* Force constants other than zero into memory. We do not know how
7468 the instructions used to build constants modify the upper 64 bits
7469 of the register, once we have that information we may be able
7470 to handle some of them more efficiently. */
7471 if ((reload_in_progress
| reload_completed
) == 0
7472 && register_operand (operands
[0], mode
)
7473 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
7474 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
7476 /* Make operand1 a register if it isn't already. */
7478 && !register_operand (operands
[0], mode
)
7479 && !register_operand (operands
[1], mode
))
7481 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7482 emit_move_insn (operands
[0], temp
);
7486 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7489 /* Attempt to expand a binary operator. Make the expansion closer to the
7490 actual machine, then just general_operand, which will allow 3 separate
7491 memory references (one output, two input) in a single insn. */
7494 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
7497 int matching_memory
;
7498 rtx src1
, src2
, dst
, op
, clob
;
7504 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7505 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7506 && (rtx_equal_p (dst
, src2
)
7507 || immediate_operand (src1
, mode
)))
7514 /* If the destination is memory, and we do not have matching source
7515 operands, do things in registers. */
7516 matching_memory
= 0;
7517 if (GET_CODE (dst
) == MEM
)
7519 if (rtx_equal_p (dst
, src1
))
7520 matching_memory
= 1;
7521 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7522 && rtx_equal_p (dst
, src2
))
7523 matching_memory
= 2;
7525 dst
= gen_reg_rtx (mode
);
7528 /* Both source operands cannot be in memory. */
7529 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7531 if (matching_memory
!= 2)
7532 src2
= force_reg (mode
, src2
);
7534 src1
= force_reg (mode
, src1
);
7537 /* If the operation is not commutable, source 1 cannot be a constant
7538 or non-matching memory. */
7539 if ((CONSTANT_P (src1
)
7540 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7541 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7542 src1
= force_reg (mode
, src1
);
7544 /* If optimizing, copy to regs to improve CSE */
7545 if (optimize
&& ! no_new_pseudos
)
7547 if (GET_CODE (dst
) == MEM
)
7548 dst
= gen_reg_rtx (mode
);
7549 if (GET_CODE (src1
) == MEM
)
7550 src1
= force_reg (mode
, src1
);
7551 if (GET_CODE (src2
) == MEM
)
7552 src2
= force_reg (mode
, src2
);
7555 /* Emit the instruction. */
7557 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7558 if (reload_in_progress
)
7560 /* Reload doesn't know about the flags register, and doesn't know that
7561 it doesn't want to clobber it. We can only do this with PLUS. */
7568 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7569 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7572 /* Fix up the destination if needed. */
7573 if (dst
!= operands
[0])
7574 emit_move_insn (operands
[0], dst
);
7577 /* Return TRUE or FALSE depending on whether the binary operator meets the
7578 appropriate constraints. */
7581 ix86_binary_operator_ok (enum rtx_code code
,
7582 enum machine_mode mode ATTRIBUTE_UNUSED
,
7585 /* Both source operands cannot be in memory. */
7586 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7588 /* If the operation is not commutable, source 1 cannot be a constant. */
7589 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7591 /* If the destination is memory, we must have a matching source operand. */
7592 if (GET_CODE (operands
[0]) == MEM
7593 && ! (rtx_equal_p (operands
[0], operands
[1])
7594 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7595 && rtx_equal_p (operands
[0], operands
[2]))))
7597 /* If the operation is not commutable and the source 1 is memory, we must
7598 have a matching destination. */
7599 if (GET_CODE (operands
[1]) == MEM
7600 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
7601 && ! rtx_equal_p (operands
[0], operands
[1]))
7606 /* Attempt to expand a unary operator. Make the expansion closer to the
7607 actual machine, then just general_operand, which will allow 2 separate
7608 memory references (one output, one input) in a single insn. */
7611 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
7614 int matching_memory
;
7615 rtx src
, dst
, op
, clob
;
7620 /* If the destination is memory, and we do not have matching source
7621 operands, do things in registers. */
7622 matching_memory
= 0;
7623 if (GET_CODE (dst
) == MEM
)
7625 if (rtx_equal_p (dst
, src
))
7626 matching_memory
= 1;
7628 dst
= gen_reg_rtx (mode
);
7631 /* When source operand is memory, destination must match. */
7632 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7633 src
= force_reg (mode
, src
);
7635 /* If optimizing, copy to regs to improve CSE */
7636 if (optimize
&& ! no_new_pseudos
)
7638 if (GET_CODE (dst
) == MEM
)
7639 dst
= gen_reg_rtx (mode
);
7640 if (GET_CODE (src
) == MEM
)
7641 src
= force_reg (mode
, src
);
7644 /* Emit the instruction. */
7646 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7647 if (reload_in_progress
|| code
== NOT
)
7649 /* Reload doesn't know about the flags register, and doesn't know that
7650 it doesn't want to clobber it. */
7657 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7658 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7661 /* Fix up the destination if needed. */
7662 if (dst
!= operands
[0])
7663 emit_move_insn (operands
[0], dst
);
7666 /* Return TRUE or FALSE depending on whether the unary operator meets the
7667 appropriate constraints. */
7670 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
7671 enum machine_mode mode ATTRIBUTE_UNUSED
,
7672 rtx operands
[2] ATTRIBUTE_UNUSED
)
7674 /* If one of operands is memory, source and destination must match. */
7675 if ((GET_CODE (operands
[0]) == MEM
7676 || GET_CODE (operands
[1]) == MEM
)
7677 && ! rtx_equal_p (operands
[0], operands
[1]))
7682 /* Return TRUE or FALSE depending on whether the first SET in INSN
7683 has source and destination with matching CC modes, and that the
7684 CC mode is at least as constrained as REQ_MODE. */
7687 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
7690 enum machine_mode set_mode
;
7692 set
= PATTERN (insn
);
7693 if (GET_CODE (set
) == PARALLEL
)
7694 set
= XVECEXP (set
, 0, 0);
7695 if (GET_CODE (set
) != SET
)
7697 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7700 set_mode
= GET_MODE (SET_DEST (set
));
7704 if (req_mode
!= CCNOmode
7705 && (req_mode
!= CCmode
7706 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7710 if (req_mode
== CCGCmode
)
7714 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7718 if (req_mode
== CCZmode
)
7728 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7731 /* Generate insn patterns to do an integer compare of OPERANDS. */
7734 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
7736 enum machine_mode cmpmode
;
7739 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7740 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7742 /* This is very simple, but making the interface the same as in the
7743 FP case makes the rest of the code easier. */
7744 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7745 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7747 /* Return the test that should be put into the flags user, i.e.
7748 the bcc, scc, or cmov instruction. */
7749 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7752 /* Figure out whether to use ordered or unordered fp comparisons.
7753 Return the appropriate mode to use. */
7756 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
7758 /* ??? In order to make all comparisons reversible, we do all comparisons
7759 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7760 all forms trapping and nontrapping comparisons, we can make inequality
7761 comparisons trapping again, since it results in better code when using
7762 FCOM based compares. */
7763 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7767 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
7769 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7770 return ix86_fp_compare_mode (code
);
7773 /* Only zero flag is needed. */
7775 case NE
: /* ZF!=0 */
7777 /* Codes needing carry flag. */
7778 case GEU
: /* CF=0 */
7779 case GTU
: /* CF=0 & ZF=0 */
7780 case LTU
: /* CF=1 */
7781 case LEU
: /* CF=1 | ZF=1 */
7783 /* Codes possibly doable only with sign flag when
7784 comparing against zero. */
7785 case GE
: /* SF=OF or SF=0 */
7786 case LT
: /* SF<>OF or SF=1 */
7787 if (op1
== const0_rtx
)
7790 /* For other cases Carry flag is not required. */
7792 /* Codes doable only with sign flag when comparing
7793 against zero, but we miss jump instruction for it
7794 so we need to use relational tests against overflow
7795 that thus needs to be zero. */
7796 case GT
: /* ZF=0 & SF=OF */
7797 case LE
: /* ZF=1 | SF<>OF */
7798 if (op1
== const0_rtx
)
7802 /* strcmp pattern do (use flags) and combine may ask us for proper
7811 /* Return the fixed registers used for condition codes. */
7814 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
7821 /* If two condition code modes are compatible, return a condition code
7822 mode which is compatible with both. Otherwise, return
7825 static enum machine_mode
7826 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
7831 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
7834 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
7835 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
7863 /* These are only compatible with themselves, which we already
7869 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7872 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
7874 enum rtx_code swapped_code
= swap_condition (code
);
7875 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7876 || (ix86_fp_comparison_cost (swapped_code
)
7877 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7880 /* Swap, force into registers, or otherwise massage the two operands
7881 to a fp comparison. The operands are updated in place; the new
7882 comparison code is returned. */
7884 static enum rtx_code
7885 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
7887 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7888 rtx op0
= *pop0
, op1
= *pop1
;
7889 enum machine_mode op_mode
= GET_MODE (op0
);
7890 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7892 /* All of the unordered compare instructions only work on registers.
7893 The same is true of the XFmode compare instructions. The same is
7894 true of the fcomi compare instructions. */
7897 && (fpcmp_mode
== CCFPUmode
7898 || op_mode
== XFmode
7899 || ix86_use_fcomi_compare (code
)))
7901 op0
= force_reg (op_mode
, op0
);
7902 op1
= force_reg (op_mode
, op1
);
7906 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7907 things around if they appear profitable, otherwise force op0
7910 if (standard_80387_constant_p (op0
) == 0
7911 || (GET_CODE (op0
) == MEM
7912 && ! (standard_80387_constant_p (op1
) == 0
7913 || GET_CODE (op1
) == MEM
)))
7916 tmp
= op0
, op0
= op1
, op1
= tmp
;
7917 code
= swap_condition (code
);
7920 if (GET_CODE (op0
) != REG
)
7921 op0
= force_reg (op_mode
, op0
);
7923 if (CONSTANT_P (op1
))
7925 if (standard_80387_constant_p (op1
))
7926 op1
= force_reg (op_mode
, op1
);
7928 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7932 /* Try to rearrange the comparison to make it cheaper. */
7933 if (ix86_fp_comparison_cost (code
)
7934 > ix86_fp_comparison_cost (swap_condition (code
))
7935 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
7938 tmp
= op0
, op0
= op1
, op1
= tmp
;
7939 code
= swap_condition (code
);
7940 if (GET_CODE (op0
) != REG
)
7941 op0
= force_reg (op_mode
, op0
);
7949 /* Convert comparison codes we use to represent FP comparison to integer
7950 code that will result in proper branch. Return UNKNOWN if no such code
7954 ix86_fp_compare_code_to_integer (enum rtx_code code
)
7983 /* Split comparison code CODE into comparisons we can do using branch
7984 instructions. BYPASS_CODE is comparison code for branch that will
7985 branch around FIRST_CODE and SECOND_CODE. If some of branches
7986 is not required, set value to UNKNOWN.
7987 We never require more than two branches. */
7990 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
7991 enum rtx_code
*first_code
,
7992 enum rtx_code
*second_code
)
7995 *bypass_code
= UNKNOWN
;
7996 *second_code
= UNKNOWN
;
7998 /* The fcomi comparison sets flags as follows:
8008 case GT
: /* GTU - CF=0 & ZF=0 */
8009 case GE
: /* GEU - CF=0 */
8010 case ORDERED
: /* PF=0 */
8011 case UNORDERED
: /* PF=1 */
8012 case UNEQ
: /* EQ - ZF=1 */
8013 case UNLT
: /* LTU - CF=1 */
8014 case UNLE
: /* LEU - CF=1 | ZF=1 */
8015 case LTGT
: /* EQ - ZF=0 */
8017 case LT
: /* LTU - CF=1 - fails on unordered */
8019 *bypass_code
= UNORDERED
;
8021 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8023 *bypass_code
= UNORDERED
;
8025 case EQ
: /* EQ - ZF=1 - fails on unordered */
8027 *bypass_code
= UNORDERED
;
8029 case NE
: /* NE - ZF=0 - fails on unordered */
8031 *second_code
= UNORDERED
;
8033 case UNGE
: /* GEU - CF=0 - fails on unordered */
8035 *second_code
= UNORDERED
;
8037 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8039 *second_code
= UNORDERED
;
8044 if (!TARGET_IEEE_FP
)
8046 *second_code
= UNKNOWN
;
8047 *bypass_code
= UNKNOWN
;
8051 /* Return cost of comparison done fcom + arithmetics operations on AX.
8052 All following functions do use number of instructions as a cost metrics.
8053 In future this should be tweaked to compute bytes for optimize_size and
8054 take into account performance of various instructions on various CPUs. */
8056 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8058 if (!TARGET_IEEE_FP
)
8060 /* The cost of code output by ix86_expand_fp_compare. */
8088 /* Return cost of comparison done using fcomi operation.
8089 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8091 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
8093 enum rtx_code bypass_code
, first_code
, second_code
;
8094 /* Return arbitrarily high cost when instruction is not supported - this
8095 prevents gcc from using it. */
8098 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8099 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
8102 /* Return cost of comparison done using sahf operation.
8103 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8105 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
8107 enum rtx_code bypass_code
, first_code
, second_code
;
8108 /* Return arbitrarily high cost when instruction is not preferred - this
8109 avoids gcc from using it. */
8110 if (!TARGET_USE_SAHF
&& !optimize_size
)
8112 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8113 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
8116 /* Compute cost of the comparison done using any method.
8117 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8119 ix86_fp_comparison_cost (enum rtx_code code
)
8121 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8124 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8125 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8127 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8128 if (min
> sahf_cost
)
8130 if (min
> fcomi_cost
)
8135 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8138 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
8139 rtx
*second_test
, rtx
*bypass_test
)
8141 enum machine_mode fpcmp_mode
, intcmp_mode
;
8143 int cost
= ix86_fp_comparison_cost (code
);
8144 enum rtx_code bypass_code
, first_code
, second_code
;
8146 fpcmp_mode
= ix86_fp_compare_mode (code
);
8147 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8150 *second_test
= NULL_RTX
;
8152 *bypass_test
= NULL_RTX
;
8154 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8156 /* Do fcomi/sahf based test when profitable. */
8157 if ((bypass_code
== UNKNOWN
|| bypass_test
)
8158 && (second_code
== UNKNOWN
|| second_test
)
8159 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8163 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8164 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8170 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8171 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8173 scratch
= gen_reg_rtx (HImode
);
8174 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8175 emit_insn (gen_x86_sahf_1 (scratch
));
8178 /* The FP codes work out to act like unsigned. */
8179 intcmp_mode
= fpcmp_mode
;
8181 if (bypass_code
!= UNKNOWN
)
8182 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8183 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8185 if (second_code
!= UNKNOWN
)
8186 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8187 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8192 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8193 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8194 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8196 scratch
= gen_reg_rtx (HImode
);
8197 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8199 /* In the unordered case, we have to check C2 for NaN's, which
8200 doesn't happen to work out to anything nice combination-wise.
8201 So do some bit twiddling on the value we've got in AH to come
8202 up with an appropriate set of condition codes. */
8204 intcmp_mode
= CCNOmode
;
8209 if (code
== GT
|| !TARGET_IEEE_FP
)
8211 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8216 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8217 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8218 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8219 intcmp_mode
= CCmode
;
8225 if (code
== LT
&& TARGET_IEEE_FP
)
8227 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8228 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8229 intcmp_mode
= CCmode
;
8234 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8240 if (code
== GE
|| !TARGET_IEEE_FP
)
8242 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8247 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8248 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8255 if (code
== LE
&& TARGET_IEEE_FP
)
8257 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8258 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8259 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8260 intcmp_mode
= CCmode
;
8265 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8271 if (code
== EQ
&& TARGET_IEEE_FP
)
8273 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8274 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8275 intcmp_mode
= CCmode
;
8280 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8287 if (code
== NE
&& TARGET_IEEE_FP
)
8289 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8290 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8296 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8302 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8306 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8315 /* Return the test that should be put into the flags user, i.e.
8316 the bcc, scc, or cmov instruction. */
8317 return gen_rtx_fmt_ee (code
, VOIDmode
,
8318 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8323 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
8326 op0
= ix86_compare_op0
;
8327 op1
= ix86_compare_op1
;
8330 *second_test
= NULL_RTX
;
8332 *bypass_test
= NULL_RTX
;
8334 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8335 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8336 second_test
, bypass_test
);
8338 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8343 /* Return true if the CODE will result in nontrivial jump sequence. */
8345 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
8347 enum rtx_code bypass_code
, first_code
, second_code
;
8350 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8351 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
8355 ix86_expand_branch (enum rtx_code code
, rtx label
)
8359 switch (GET_MODE (ix86_compare_op0
))
8365 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8366 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8367 gen_rtx_LABEL_REF (VOIDmode
, label
),
8369 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8378 enum rtx_code bypass_code
, first_code
, second_code
;
8380 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8383 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8385 /* Check whether we will use the natural sequence with one jump. If
8386 so, we can expand jump early. Otherwise delay expansion by
8387 creating compound insn to not confuse optimizers. */
8388 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
8391 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8392 gen_rtx_LABEL_REF (VOIDmode
, label
),
8397 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8398 ix86_compare_op0
, ix86_compare_op1
);
8399 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8400 gen_rtx_LABEL_REF (VOIDmode
, label
),
8402 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8404 use_fcomi
= ix86_use_fcomi_compare (code
);
8405 vec
= rtvec_alloc (3 + !use_fcomi
);
8406 RTVEC_ELT (vec
, 0) = tmp
;
8408 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8410 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8413 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8415 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8423 /* Expand DImode branch into multiple compare+branch. */
8425 rtx lo
[2], hi
[2], label2
;
8426 enum rtx_code code1
, code2
, code3
;
8428 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8430 tmp
= ix86_compare_op0
;
8431 ix86_compare_op0
= ix86_compare_op1
;
8432 ix86_compare_op1
= tmp
;
8433 code
= swap_condition (code
);
8435 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8436 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8438 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8439 avoid two branches. This costs one extra insn, so disable when
8440 optimizing for size. */
8442 if ((code
== EQ
|| code
== NE
)
8444 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8449 if (hi
[1] != const0_rtx
)
8450 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8451 NULL_RTX
, 0, OPTAB_WIDEN
);
8454 if (lo
[1] != const0_rtx
)
8455 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8456 NULL_RTX
, 0, OPTAB_WIDEN
);
8458 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8459 NULL_RTX
, 0, OPTAB_WIDEN
);
8461 ix86_compare_op0
= tmp
;
8462 ix86_compare_op1
= const0_rtx
;
8463 ix86_expand_branch (code
, label
);
8467 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8468 op1 is a constant and the low word is zero, then we can just
8469 examine the high word. */
8471 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8474 case LT
: case LTU
: case GE
: case GEU
:
8475 ix86_compare_op0
= hi
[0];
8476 ix86_compare_op1
= hi
[1];
8477 ix86_expand_branch (code
, label
);
8483 /* Otherwise, we need two or three jumps. */
8485 label2
= gen_label_rtx ();
8488 code2
= swap_condition (code
);
8489 code3
= unsigned_condition (code
);
8493 case LT
: case GT
: case LTU
: case GTU
:
8496 case LE
: code1
= LT
; code2
= GT
; break;
8497 case GE
: code1
= GT
; code2
= LT
; break;
8498 case LEU
: code1
= LTU
; code2
= GTU
; break;
8499 case GEU
: code1
= GTU
; code2
= LTU
; break;
8501 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
8502 case NE
: code2
= UNKNOWN
; break;
8510 * if (hi(a) < hi(b)) goto true;
8511 * if (hi(a) > hi(b)) goto false;
8512 * if (lo(a) < lo(b)) goto true;
8516 ix86_compare_op0
= hi
[0];
8517 ix86_compare_op1
= hi
[1];
8519 if (code1
!= UNKNOWN
)
8520 ix86_expand_branch (code1
, label
);
8521 if (code2
!= UNKNOWN
)
8522 ix86_expand_branch (code2
, label2
);
8524 ix86_compare_op0
= lo
[0];
8525 ix86_compare_op1
= lo
[1];
8526 ix86_expand_branch (code3
, label
);
8528 if (code2
!= UNKNOWN
)
8529 emit_label (label2
);
8538 /* Split branch based on floating point condition. */
8540 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
8541 rtx target1
, rtx target2
, rtx tmp
)
8544 rtx label
= NULL_RTX
;
8546 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8549 if (target2
!= pc_rtx
)
8552 code
= reverse_condition_maybe_unordered (code
);
8557 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8558 tmp
, &second
, &bypass
);
8560 if (split_branch_probability
>= 0)
8562 /* Distribute the probabilities across the jumps.
8563 Assume the BYPASS and SECOND to be always test
8565 probability
= split_branch_probability
;
8567 /* Value of 1 is low enough to make no need for probability
8568 to be updated. Later we may run some experiments and see
8569 if unordered values are more frequent in practice. */
8571 bypass_probability
= 1;
8573 second_probability
= 1;
8575 if (bypass
!= NULL_RTX
)
8577 label
= gen_label_rtx ();
8578 i
= emit_jump_insn (gen_rtx_SET
8580 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8582 gen_rtx_LABEL_REF (VOIDmode
,
8585 if (bypass_probability
>= 0)
8587 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8588 GEN_INT (bypass_probability
),
8591 i
= emit_jump_insn (gen_rtx_SET
8593 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8594 condition
, target1
, target2
)));
8595 if (probability
>= 0)
8597 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8598 GEN_INT (probability
),
8600 if (second
!= NULL_RTX
)
8602 i
= emit_jump_insn (gen_rtx_SET
8604 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8606 if (second_probability
>= 0)
8608 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8609 GEN_INT (second_probability
),
8612 if (label
!= NULL_RTX
)
8617 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
8619 rtx ret
, tmp
, tmpreg
, equiv
;
8620 rtx second_test
, bypass_test
;
8622 if (GET_MODE (ix86_compare_op0
) == DImode
8624 return 0; /* FAIL */
8626 if (GET_MODE (dest
) != QImode
)
8629 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8630 PUT_MODE (ret
, QImode
);
8635 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8636 if (bypass_test
|| second_test
)
8638 rtx test
= second_test
;
8640 rtx tmp2
= gen_reg_rtx (QImode
);
8647 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8649 PUT_MODE (test
, QImode
);
8650 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8653 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8655 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8658 /* Attach a REG_EQUAL note describing the comparison result. */
8659 equiv
= simplify_gen_relational (code
, QImode
,
8660 GET_MODE (ix86_compare_op0
),
8661 ix86_compare_op0
, ix86_compare_op1
);
8662 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
8664 return 1; /* DONE */
8667 /* Expand comparison setting or clearing carry flag. Return true when
8668 successful and set pop for the operation. */
8670 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
8672 enum machine_mode mode
=
8673 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
8675 /* Do not handle DImode compares that go trought special path. Also we can't
8676 deal with FP compares yet. This is possible to add. */
8677 if ((mode
== DImode
&& !TARGET_64BIT
))
8679 if (FLOAT_MODE_P (mode
))
8681 rtx second_test
= NULL
, bypass_test
= NULL
;
8682 rtx compare_op
, compare_seq
;
8684 /* Shortcut: following common codes never translate into carry flag compares. */
8685 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
8686 || code
== ORDERED
|| code
== UNORDERED
)
8689 /* These comparisons require zero flag; swap operands so they won't. */
8690 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
8696 code
= swap_condition (code
);
8699 /* Try to expand the comparison and verify that we end up with carry flag
8700 based comparison. This is fails to be true only when we decide to expand
8701 comparison using arithmetic that is not too common scenario. */
8703 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8704 &second_test
, &bypass_test
);
8705 compare_seq
= get_insns ();
8708 if (second_test
|| bypass_test
)
8710 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
8711 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
8712 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
8714 code
= GET_CODE (compare_op
);
8715 if (code
!= LTU
&& code
!= GEU
)
8717 emit_insn (compare_seq
);
8721 if (!INTEGRAL_MODE_P (mode
))
8729 /* Convert a==0 into (unsigned)a<1. */
8732 if (op1
!= const0_rtx
)
8735 code
= (code
== EQ
? LTU
: GEU
);
8738 /* Convert a>b into b<a or a>=b-1. */
8741 if (GET_CODE (op1
) == CONST_INT
)
8743 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
8744 /* Bail out on overflow. We still can swap operands but that
8745 would force loading of the constant into register. */
8746 if (op1
== const0_rtx
8747 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
8749 code
= (code
== GTU
? GEU
: LTU
);
8756 code
= (code
== GTU
? LTU
: GEU
);
8760 /* Convert a>=0 into (unsigned)a<0x80000000. */
8763 if (mode
== DImode
|| op1
!= const0_rtx
)
8765 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
8766 code
= (code
== LT
? GEU
: LTU
);
8770 if (mode
== DImode
|| op1
!= constm1_rtx
)
8772 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
8773 code
= (code
== LE
? GEU
: LTU
);
8779 /* Swapping operands may cause constant to appear as first operand. */
8780 if (!nonimmediate_operand (op0
, VOIDmode
))
8784 op0
= force_reg (mode
, op0
);
8786 ix86_compare_op0
= op0
;
8787 ix86_compare_op1
= op1
;
8788 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
8789 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
8795 ix86_expand_int_movcc (rtx operands
[])
8797 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8798 rtx compare_seq
, compare_op
;
8799 rtx second_test
, bypass_test
;
8800 enum machine_mode mode
= GET_MODE (operands
[0]);
8801 bool sign_bit_compare_p
= false;;
8804 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8805 compare_seq
= get_insns ();
8808 compare_code
= GET_CODE (compare_op
);
8810 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
8811 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
8812 sign_bit_compare_p
= true;
8814 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8815 HImode insns, we'd be swallowed in word prefix ops. */
8817 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
8818 && (mode
!= DImode
|| TARGET_64BIT
)
8819 && GET_CODE (operands
[2]) == CONST_INT
8820 && GET_CODE (operands
[3]) == CONST_INT
)
8822 rtx out
= operands
[0];
8823 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8824 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8828 /* Sign bit compares are better done using shifts than we do by using
8830 if (sign_bit_compare_p
8831 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
8832 ix86_compare_op1
, &compare_op
))
8834 /* Detect overlap between destination and compare sources. */
8837 if (!sign_bit_compare_p
)
8841 compare_code
= GET_CODE (compare_op
);
8843 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
8844 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
8847 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
8850 /* To simplify rest of code, restrict to the GEU case. */
8851 if (compare_code
== LTU
)
8853 HOST_WIDE_INT tmp
= ct
;
8856 compare_code
= reverse_condition (compare_code
);
8857 code
= reverse_condition (code
);
8862 PUT_CODE (compare_op
,
8863 reverse_condition_maybe_unordered
8864 (GET_CODE (compare_op
)));
8866 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
8870 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8871 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8872 tmp
= gen_reg_rtx (mode
);
8875 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
8877 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
8881 if (code
== GT
|| code
== GE
)
8882 code
= reverse_condition (code
);
8885 HOST_WIDE_INT tmp
= ct
;
8890 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
8891 ix86_compare_op1
, VOIDmode
, 0, -1);
8904 tmp
= expand_simple_binop (mode
, PLUS
,
8906 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8917 tmp
= expand_simple_binop (mode
, IOR
,
8919 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8921 else if (diff
== -1 && ct
)
8931 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
8933 tmp
= expand_simple_binop (mode
, PLUS
,
8934 copy_rtx (tmp
), GEN_INT (cf
),
8935 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8943 * andl cf - ct, dest
8953 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
8956 tmp
= expand_simple_binop (mode
, AND
,
8958 gen_int_mode (cf
- ct
, mode
),
8959 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8961 tmp
= expand_simple_binop (mode
, PLUS
,
8962 copy_rtx (tmp
), GEN_INT (ct
),
8963 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8966 if (!rtx_equal_p (tmp
, out
))
8967 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
8969 return 1; /* DONE */
8975 tmp
= ct
, ct
= cf
, cf
= tmp
;
8977 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8979 /* We may be reversing unordered compare to normal compare, that
8980 is not valid in general (we may convert non-trapping condition
8981 to trapping one), however on i386 we currently emit all
8982 comparisons unordered. */
8983 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8984 code
= reverse_condition_maybe_unordered (code
);
8988 compare_code
= reverse_condition (compare_code
);
8989 code
= reverse_condition (code
);
8993 compare_code
= UNKNOWN
;
8994 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
8995 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
8997 if (ix86_compare_op1
== const0_rtx
8998 && (code
== LT
|| code
== GE
))
8999 compare_code
= code
;
9000 else if (ix86_compare_op1
== constm1_rtx
)
9004 else if (code
== GT
)
9009 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9010 if (compare_code
!= UNKNOWN
9011 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9012 && (cf
== -1 || ct
== -1))
9014 /* If lea code below could be used, only optimize
9015 if it results in a 2 insn sequence. */
9017 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9018 || diff
== 3 || diff
== 5 || diff
== 9)
9019 || (compare_code
== LT
&& ct
== -1)
9020 || (compare_code
== GE
&& cf
== -1))
9023 * notl op1 (if necessary)
9031 code
= reverse_condition (code
);
9034 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9035 ix86_compare_op1
, VOIDmode
, 0, -1);
9037 out
= expand_simple_binop (mode
, IOR
,
9039 out
, 1, OPTAB_DIRECT
);
9040 if (out
!= operands
[0])
9041 emit_move_insn (operands
[0], out
);
9043 return 1; /* DONE */
9048 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9049 || diff
== 3 || diff
== 5 || diff
== 9)
9050 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9052 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
9058 * lea cf(dest*(ct-cf)),dest
9062 * This also catches the degenerate setcc-only case.
9068 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9069 ix86_compare_op1
, VOIDmode
, 0, 1);
9072 /* On x86_64 the lea instruction operates on Pmode, so we need
9073 to get arithmetics done in proper mode to match. */
9075 tmp
= copy_rtx (out
);
9079 out1
= copy_rtx (out
);
9080 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9084 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9090 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9093 if (!rtx_equal_p (tmp
, out
))
9096 out
= force_operand (tmp
, copy_rtx (out
));
9098 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9100 if (!rtx_equal_p (out
, operands
[0]))
9101 emit_move_insn (operands
[0], copy_rtx (out
));
9103 return 1; /* DONE */
9107 * General case: Jumpful:
9108 * xorl dest,dest cmpl op1, op2
9109 * cmpl op1, op2 movl ct, dest
9111 * decl dest movl cf, dest
9112 * andl (cf-ct),dest 1:
9117 * This is reasonably steep, but branch mispredict costs are
9118 * high on modern cpus, so consider failing only if optimizing
9122 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9123 && BRANCH_COST
>= 2)
9129 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9130 /* We may be reversing unordered compare to normal compare,
9131 that is not valid in general (we may convert non-trapping
9132 condition to trapping one), however on i386 we currently
9133 emit all comparisons unordered. */
9134 code
= reverse_condition_maybe_unordered (code
);
9137 code
= reverse_condition (code
);
9138 if (compare_code
!= UNKNOWN
)
9139 compare_code
= reverse_condition (compare_code
);
9143 if (compare_code
!= UNKNOWN
)
9145 /* notl op1 (if needed)
9150 For x < 0 (resp. x <= -1) there will be no notl,
9151 so if possible swap the constants to get rid of the
9153 True/false will be -1/0 while code below (store flag
9154 followed by decrement) is 0/-1, so the constants need
9155 to be exchanged once more. */
9157 if (compare_code
== GE
|| !cf
)
9159 code
= reverse_condition (code
);
9164 HOST_WIDE_INT tmp
= cf
;
9169 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9170 ix86_compare_op1
, VOIDmode
, 0, -1);
9174 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9175 ix86_compare_op1
, VOIDmode
, 0, 1);
9177 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
9178 copy_rtx (out
), 1, OPTAB_DIRECT
);
9181 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
9182 gen_int_mode (cf
- ct
, mode
),
9183 copy_rtx (out
), 1, OPTAB_DIRECT
);
9185 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
9186 copy_rtx (out
), 1, OPTAB_DIRECT
);
9187 if (!rtx_equal_p (out
, operands
[0]))
9188 emit_move_insn (operands
[0], copy_rtx (out
));
9190 return 1; /* DONE */
9194 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9196 /* Try a few things more with specific constants and a variable. */
9199 rtx var
, orig_out
, out
, tmp
;
9201 if (BRANCH_COST
<= 2)
9202 return 0; /* FAIL */
9204 /* If one of the two operands is an interesting constant, load a
9205 constant with the above and mask it in with a logical operation. */
9207 if (GET_CODE (operands
[2]) == CONST_INT
)
9210 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
9211 operands
[3] = constm1_rtx
, op
= and_optab
;
9212 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
9213 operands
[3] = const0_rtx
, op
= ior_optab
;
9215 return 0; /* FAIL */
9217 else if (GET_CODE (operands
[3]) == CONST_INT
)
9220 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
9221 operands
[2] = constm1_rtx
, op
= and_optab
;
9222 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
9223 operands
[2] = const0_rtx
, op
= ior_optab
;
9225 return 0; /* FAIL */
9228 return 0; /* FAIL */
9230 orig_out
= operands
[0];
9231 tmp
= gen_reg_rtx (mode
);
9234 /* Recurse to get the constant loaded. */
9235 if (ix86_expand_int_movcc (operands
) == 0)
9236 return 0; /* FAIL */
9238 /* Mask in the interesting variable. */
9239 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9241 if (!rtx_equal_p (out
, orig_out
))
9242 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
9244 return 1; /* DONE */
9248 * For comparison with above,
9258 if (! nonimmediate_operand (operands
[2], mode
))
9259 operands
[2] = force_reg (mode
, operands
[2]);
9260 if (! nonimmediate_operand (operands
[3], mode
))
9261 operands
[3] = force_reg (mode
, operands
[3]);
9263 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9265 rtx tmp
= gen_reg_rtx (mode
);
9266 emit_move_insn (tmp
, operands
[3]);
9269 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9271 rtx tmp
= gen_reg_rtx (mode
);
9272 emit_move_insn (tmp
, operands
[2]);
9276 if (! register_operand (operands
[2], VOIDmode
)
9278 || ! register_operand (operands
[3], VOIDmode
)))
9279 operands
[2] = force_reg (mode
, operands
[2]);
9282 && ! register_operand (operands
[3], VOIDmode
))
9283 operands
[3] = force_reg (mode
, operands
[3]);
9285 emit_insn (compare_seq
);
9286 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9287 gen_rtx_IF_THEN_ELSE (mode
,
9288 compare_op
, operands
[2],
9291 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9292 gen_rtx_IF_THEN_ELSE (mode
,
9294 copy_rtx (operands
[3]),
9295 copy_rtx (operands
[0]))));
9297 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9298 gen_rtx_IF_THEN_ELSE (mode
,
9300 copy_rtx (operands
[2]),
9301 copy_rtx (operands
[0]))));
9303 return 1; /* DONE */
9307 ix86_expand_fp_movcc (rtx operands
[])
9311 rtx compare_op
, second_test
, bypass_test
;
9313 /* For SF/DFmode conditional moves based on comparisons
9314 in same mode, we may want to use SSE min/max instructions. */
9315 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9316 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9317 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9318 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9320 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9321 /* We may be called from the post-reload splitter. */
9322 && (!REG_P (operands
[0])
9323 || SSE_REG_P (operands
[0])
9324 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9326 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9327 code
= GET_CODE (operands
[1]);
9329 /* See if we have (cross) match between comparison operands and
9330 conditional move operands. */
9331 if (rtx_equal_p (operands
[2], op1
))
9336 code
= reverse_condition_maybe_unordered (code
);
9338 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9340 /* Check for min operation. */
9341 if (code
== LT
|| code
== UNLE
)
9349 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9350 if (memory_operand (op0
, VOIDmode
))
9351 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9352 if (GET_MODE (operands
[0]) == SFmode
)
9353 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9355 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9358 /* Check for max operation. */
9359 if (code
== GT
|| code
== UNGE
)
9367 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9368 if (memory_operand (op0
, VOIDmode
))
9369 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9370 if (GET_MODE (operands
[0]) == SFmode
)
9371 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9373 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9377 /* Manage condition to be sse_comparison_operator. In case we are
9378 in non-ieee mode, try to canonicalize the destination operand
9379 to be first in the comparison - this helps reload to avoid extra
9381 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9382 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9384 rtx tmp
= ix86_compare_op0
;
9385 ix86_compare_op0
= ix86_compare_op1
;
9386 ix86_compare_op1
= tmp
;
9387 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9388 VOIDmode
, ix86_compare_op0
,
9391 /* Similarly try to manage result to be first operand of conditional
9392 move. We also don't support the NE comparison on SSE, so try to
9394 if ((rtx_equal_p (operands
[0], operands
[3])
9395 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9396 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9398 rtx tmp
= operands
[2];
9399 operands
[2] = operands
[3];
9401 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9402 (GET_CODE (operands
[1])),
9403 VOIDmode
, ix86_compare_op0
,
9406 if (GET_MODE (operands
[0]) == SFmode
)
9407 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9408 operands
[2], operands
[3],
9409 ix86_compare_op0
, ix86_compare_op1
));
9411 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9412 operands
[2], operands
[3],
9413 ix86_compare_op0
, ix86_compare_op1
));
9417 /* The floating point conditional move instructions don't directly
9418 support conditions resulting from a signed integer comparison. */
9420 code
= GET_CODE (operands
[1]);
9421 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9423 /* The floating point conditional move instructions don't directly
9424 support signed integer comparisons. */
9426 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9428 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9430 tmp
= gen_reg_rtx (QImode
);
9431 ix86_expand_setcc (code
, tmp
);
9433 ix86_compare_op0
= tmp
;
9434 ix86_compare_op1
= const0_rtx
;
9435 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9437 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9439 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9440 emit_move_insn (tmp
, operands
[3]);
9443 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9445 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9446 emit_move_insn (tmp
, operands
[2]);
9450 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9456 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9457 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9462 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9463 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9471 /* Expand conditional increment or decrement using adb/sbb instructions.
9472 The default case using setcc followed by the conditional move can be
9473 done by generic code. */
9475 ix86_expand_int_addcc (rtx operands
[])
9477 enum rtx_code code
= GET_CODE (operands
[1]);
9479 rtx val
= const0_rtx
;
9481 enum machine_mode mode
= GET_MODE (operands
[0]);
9483 if (operands
[3] != const1_rtx
9484 && operands
[3] != constm1_rtx
)
9486 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9487 ix86_compare_op1
, &compare_op
))
9489 code
= GET_CODE (compare_op
);
9491 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9492 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9495 code
= ix86_fp_compare_code_to_integer (code
);
9502 PUT_CODE (compare_op
,
9503 reverse_condition_maybe_unordered
9504 (GET_CODE (compare_op
)));
9506 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9508 PUT_MODE (compare_op
, mode
);
9510 /* Construct either adc or sbb insn. */
9511 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
9513 switch (GET_MODE (operands
[0]))
9516 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9519 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9522 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9525 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9533 switch (GET_MODE (operands
[0]))
9536 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9539 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9542 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9545 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9551 return 1; /* DONE */
9555 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9556 works for floating pointer parameters and nonoffsetable memories.
9557 For pushes, it returns just stack offsets; the values will be saved
9558 in the right order. Maximally three parts are generated. */
9561 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
9566 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
9568 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9570 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9572 if (size
< 2 || size
> 3)
9575 /* Optimize constant pool reference to immediates. This is used by fp
9576 moves, that force all constants to memory to allow combining. */
9577 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
9579 rtx tmp
= maybe_get_pool_constant (operand
);
9584 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9586 /* The only non-offsetable memories we handle are pushes. */
9587 if (! push_operand (operand
, VOIDmode
))
9590 operand
= copy_rtx (operand
);
9591 PUT_MODE (operand
, Pmode
);
9592 parts
[0] = parts
[1] = parts
[2] = operand
;
9594 else if (!TARGET_64BIT
)
9597 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9600 if (REG_P (operand
))
9602 if (!reload_completed
)
9604 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9605 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9607 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9609 else if (offsettable_memref_p (operand
))
9611 operand
= adjust_address (operand
, SImode
, 0);
9613 parts
[1] = adjust_address (operand
, SImode
, 4);
9615 parts
[2] = adjust_address (operand
, SImode
, 8);
9617 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9622 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9626 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9627 parts
[2] = gen_int_mode (l
[2], SImode
);
9630 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9635 parts
[1] = gen_int_mode (l
[1], SImode
);
9636 parts
[0] = gen_int_mode (l
[0], SImode
);
9645 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9646 if (mode
== XFmode
|| mode
== TFmode
)
9648 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
9649 if (REG_P (operand
))
9651 if (!reload_completed
)
9653 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9654 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
9656 else if (offsettable_memref_p (operand
))
9658 operand
= adjust_address (operand
, DImode
, 0);
9660 parts
[1] = adjust_address (operand
, upper_mode
, 8);
9662 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9667 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9668 real_to_target (l
, &r
, mode
);
9669 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9670 if (HOST_BITS_PER_WIDE_INT
>= 64)
9673 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9674 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9677 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9678 if (upper_mode
== SImode
)
9679 parts
[1] = gen_int_mode (l
[2], SImode
);
9680 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9683 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9684 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
9687 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
9697 /* Emit insns to perform a move or push of DI, DF, and XF values.
9698 Return false when normal moves are needed; true when all required
9699 insns have been emitted. Operands 2-4 contain the input values
9700 int the correct order; operands 5-7 contain the output values. */
9703 ix86_split_long_move (rtx operands
[])
9709 enum machine_mode mode
= GET_MODE (operands
[0]);
9711 /* The DFmode expanders may ask us to move double.
9712 For 64bit target this is single move. By hiding the fact
9713 here we simplify i386.md splitters. */
9714 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9716 /* Optimize constant pool reference to immediates. This is used by
9717 fp moves, that force all constants to memory to allow combining. */
9719 if (GET_CODE (operands
[1]) == MEM
9720 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9721 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9722 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9723 if (push_operand (operands
[0], VOIDmode
))
9725 operands
[0] = copy_rtx (operands
[0]);
9726 PUT_MODE (operands
[0], Pmode
);
9729 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9730 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9731 emit_move_insn (operands
[0], operands
[1]);
9735 /* The only non-offsettable memory we handle is push. */
9736 if (push_operand (operands
[0], VOIDmode
))
9738 else if (GET_CODE (operands
[0]) == MEM
9739 && ! offsettable_memref_p (operands
[0]))
9742 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9743 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9745 /* When emitting push, take care for source operands on the stack. */
9746 if (push
&& GET_CODE (operands
[1]) == MEM
9747 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9750 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9751 XEXP (part
[1][2], 0));
9752 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9753 XEXP (part
[1][1], 0));
9756 /* We need to do copy in the right order in case an address register
9757 of the source overlaps the destination. */
9758 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9760 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9762 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9765 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9768 /* Collision in the middle part can be handled by reordering. */
9769 if (collisions
== 1 && nparts
== 3
9770 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9773 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9774 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9777 /* If there are more collisions, we can't handle it by reordering.
9778 Do an lea to the last part and use only one colliding move. */
9779 else if (collisions
> 1)
9785 base
= part
[0][nparts
- 1];
9787 /* Handle the case when the last part isn't valid for lea.
9788 Happens in 64-bit mode storing the 12-byte XFmode. */
9789 if (GET_MODE (base
) != Pmode
)
9790 base
= gen_rtx_REG (Pmode
, REGNO (base
));
9792 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
9793 part
[1][0] = replace_equiv_address (part
[1][0], base
);
9794 part
[1][1] = replace_equiv_address (part
[1][1],
9795 plus_constant (base
, UNITS_PER_WORD
));
9797 part
[1][2] = replace_equiv_address (part
[1][2],
9798 plus_constant (base
, 8));
9808 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
9809 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
9810 emit_move_insn (part
[0][2], part
[1][2]);
9815 /* In 64bit mode we don't have 32bit push available. In case this is
9816 register, it is OK - we will just use larger counterpart. We also
9817 retype memory - these comes from attempt to avoid REX prefix on
9818 moving of second half of TFmode value. */
9819 if (GET_MODE (part
[1][1]) == SImode
)
9821 if (GET_CODE (part
[1][1]) == MEM
)
9822 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9823 else if (REG_P (part
[1][1]))
9824 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9827 if (GET_MODE (part
[1][0]) == SImode
)
9828 part
[1][0] = part
[1][1];
9831 emit_move_insn (part
[0][1], part
[1][1]);
9832 emit_move_insn (part
[0][0], part
[1][0]);
9836 /* Choose correct order to not overwrite the source before it is copied. */
9837 if ((REG_P (part
[0][0])
9838 && REG_P (part
[1][1])
9839 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9841 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9843 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9847 operands
[2] = part
[0][2];
9848 operands
[3] = part
[0][1];
9849 operands
[4] = part
[0][0];
9850 operands
[5] = part
[1][2];
9851 operands
[6] = part
[1][1];
9852 operands
[7] = part
[1][0];
9856 operands
[2] = part
[0][1];
9857 operands
[3] = part
[0][0];
9858 operands
[5] = part
[1][1];
9859 operands
[6] = part
[1][0];
9866 operands
[2] = part
[0][0];
9867 operands
[3] = part
[0][1];
9868 operands
[4] = part
[0][2];
9869 operands
[5] = part
[1][0];
9870 operands
[6] = part
[1][1];
9871 operands
[7] = part
[1][2];
9875 operands
[2] = part
[0][0];
9876 operands
[3] = part
[0][1];
9877 operands
[5] = part
[1][0];
9878 operands
[6] = part
[1][1];
9881 emit_move_insn (operands
[2], operands
[5]);
9882 emit_move_insn (operands
[3], operands
[6]);
9884 emit_move_insn (operands
[4], operands
[7]);
9890 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
9892 rtx low
[2], high
[2];
9895 if (GET_CODE (operands
[2]) == CONST_INT
)
9897 split_di (operands
, 2, low
, high
);
9898 count
= INTVAL (operands
[2]) & 63;
9902 emit_move_insn (high
[0], low
[1]);
9903 emit_move_insn (low
[0], const0_rtx
);
9906 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9910 if (!rtx_equal_p (operands
[0], operands
[1]))
9911 emit_move_insn (operands
[0], operands
[1]);
9912 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9913 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9918 if (!rtx_equal_p (operands
[0], operands
[1]))
9919 emit_move_insn (operands
[0], operands
[1]);
9921 split_di (operands
, 1, low
, high
);
9923 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9924 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9926 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9928 if (! no_new_pseudos
)
9929 scratch
= force_reg (SImode
, const0_rtx
);
9931 emit_move_insn (scratch
, const0_rtx
);
9933 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9937 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9942 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
9944 rtx low
[2], high
[2];
9947 if (GET_CODE (operands
[2]) == CONST_INT
)
9949 split_di (operands
, 2, low
, high
);
9950 count
= INTVAL (operands
[2]) & 63;
9954 emit_move_insn (high
[0], high
[1]);
9955 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9956 emit_move_insn (low
[0], high
[0]);
9959 else if (count
>= 32)
9961 emit_move_insn (low
[0], high
[1]);
9963 if (! reload_completed
)
9964 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
9967 emit_move_insn (high
[0], low
[0]);
9968 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9972 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9976 if (!rtx_equal_p (operands
[0], operands
[1]))
9977 emit_move_insn (operands
[0], operands
[1]);
9978 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9979 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
9984 if (!rtx_equal_p (operands
[0], operands
[1]))
9985 emit_move_insn (operands
[0], operands
[1]);
9987 split_di (operands
, 1, low
, high
);
9989 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9990 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
9992 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9994 if (! no_new_pseudos
)
9995 scratch
= gen_reg_rtx (SImode
);
9996 emit_move_insn (scratch
, high
[0]);
9997 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
9998 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10002 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10007 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
10009 rtx low
[2], high
[2];
10012 if (GET_CODE (operands
[2]) == CONST_INT
)
10014 split_di (operands
, 2, low
, high
);
10015 count
= INTVAL (operands
[2]) & 63;
10019 emit_move_insn (low
[0], high
[1]);
10020 emit_move_insn (high
[0], const0_rtx
);
10023 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10027 if (!rtx_equal_p (operands
[0], operands
[1]))
10028 emit_move_insn (operands
[0], operands
[1]);
10029 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10030 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10035 if (!rtx_equal_p (operands
[0], operands
[1]))
10036 emit_move_insn (operands
[0], operands
[1]);
10038 split_di (operands
, 1, low
, high
);
10040 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10041 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10043 /* Heh. By reversing the arguments, we can reuse this pattern. */
10044 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10046 if (! no_new_pseudos
)
10047 scratch
= force_reg (SImode
, const0_rtx
);
10049 emit_move_insn (scratch
, const0_rtx
);
10051 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10055 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10059 /* Helper function for the string operations below. Dest VARIABLE whether
10060 it is aligned to VALUE bytes. If true, jump to the label. */
10062 ix86_expand_aligntest (rtx variable
, int value
)
10064 rtx label
= gen_label_rtx ();
10065 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10066 if (GET_MODE (variable
) == DImode
)
10067 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10069 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10070 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10075 /* Adjust COUNTER by the VALUE. */
10077 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
10079 if (GET_MODE (countreg
) == DImode
)
10080 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10082 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10085 /* Zero extend possibly SImode EXP to Pmode register. */
10087 ix86_zero_extend_to_Pmode (rtx exp
)
10090 if (GET_MODE (exp
) == VOIDmode
)
10091 return force_reg (Pmode
, exp
);
10092 if (GET_MODE (exp
) == Pmode
)
10093 return copy_to_mode_reg (Pmode
, exp
);
10094 r
= gen_reg_rtx (Pmode
);
10095 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10099 /* Expand string move (memcpy) operation. Use i386 string operations when
10100 profitable. expand_clrmem contains similar code. */
10102 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
10104 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
10105 enum machine_mode counter_mode
;
10106 HOST_WIDE_INT align
= 0;
10107 unsigned HOST_WIDE_INT count
= 0;
10109 if (GET_CODE (align_exp
) == CONST_INT
)
10110 align
= INTVAL (align_exp
);
10112 /* Can't use any of this if the user has appropriated esi or edi. */
10113 if (global_regs
[4] || global_regs
[5])
10116 /* This simple hack avoids all inlining code and simplifies code below. */
10117 if (!TARGET_ALIGN_STRINGOPS
)
10120 if (GET_CODE (count_exp
) == CONST_INT
)
10122 count
= INTVAL (count_exp
);
10123 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10127 /* Figure out proper mode for counter. For 32bits it is always SImode,
10128 for 64bits use SImode when possible, otherwise DImode.
10129 Set count to number of bytes copied when known at compile time. */
10131 || GET_MODE (count_exp
) == SImode
10132 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10133 counter_mode
= SImode
;
10135 counter_mode
= DImode
;
10137 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10140 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10141 if (destreg
!= XEXP (dst
, 0))
10142 dst
= replace_equiv_address_nv (dst
, destreg
);
10143 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10144 if (srcreg
!= XEXP (src
, 0))
10145 src
= replace_equiv_address_nv (src
, srcreg
);
10147 /* When optimizing for size emit simple rep ; movsb instruction for
10148 counts not divisible by 4. */
10150 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10152 emit_insn (gen_cld ());
10153 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10154 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10155 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
10156 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
10160 /* For constant aligned (or small unaligned) copies use rep movsl
10161 followed by code copying the rest. For PentiumPro ensure 8 byte
10162 alignment to allow rep movsl acceleration. */
10164 else if (count
!= 0
10166 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10167 || optimize_size
|| count
< (unsigned int) 64))
10169 unsigned HOST_WIDE_INT offset
= 0;
10170 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10171 rtx srcmem
, dstmem
;
10173 emit_insn (gen_cld ());
10174 if (count
& ~(size
- 1))
10176 countreg
= copy_to_mode_reg (counter_mode
,
10177 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10178 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10179 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10181 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10182 GEN_INT (size
== 4 ? 2 : 3));
10183 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10184 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10186 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10187 countreg
, destexp
, srcexp
));
10188 offset
= count
& ~(size
- 1);
10190 if (size
== 8 && (count
& 0x04))
10192 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
10194 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
10196 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10201 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
10203 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
10205 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10210 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
10212 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
10214 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10217 /* The generic code based on the glibc implementation:
10218 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10219 allowing accelerated copying there)
10220 - copy the data using rep movsl
10221 - copy the rest. */
10226 rtx srcmem
, dstmem
;
10227 int desired_alignment
= (TARGET_PENTIUMPRO
10228 && (count
== 0 || count
>= (unsigned int) 260)
10229 ? 8 : UNITS_PER_WORD
);
10230 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10231 dst
= change_address (dst
, BLKmode
, destreg
);
10232 src
= change_address (src
, BLKmode
, srcreg
);
10234 /* In case we don't know anything about the alignment, default to
10235 library version, since it is usually equally fast and result in
10238 Also emit call when we know that the count is large and call overhead
10239 will not be important. */
10240 if (!TARGET_INLINE_ALL_STRINGOPS
10241 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10244 if (TARGET_SINGLE_STRINGOP
)
10245 emit_insn (gen_cld ());
10247 countreg2
= gen_reg_rtx (Pmode
);
10248 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10250 /* We don't use loops to align destination and to copy parts smaller
10251 than 4 bytes, because gcc is able to optimize such code better (in
10252 the case the destination or the count really is aligned, gcc is often
10253 able to predict the branches) and also it is friendlier to the
10254 hardware branch prediction.
10256 Using loops is beneficial for generic case, because we can
10257 handle small counts using the loops. Many CPUs (such as Athlon)
10258 have large REP prefix setup costs.
10260 This is quite costly. Maybe we can revisit this decision later or
10261 add some customizability to this code. */
10263 if (count
== 0 && align
< desired_alignment
)
10265 label
= gen_label_rtx ();
10266 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10267 LEU
, 0, counter_mode
, 1, label
);
10271 rtx label
= ix86_expand_aligntest (destreg
, 1);
10272 srcmem
= change_address (src
, QImode
, srcreg
);
10273 dstmem
= change_address (dst
, QImode
, destreg
);
10274 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10275 ix86_adjust_counter (countreg
, 1);
10276 emit_label (label
);
10277 LABEL_NUSES (label
) = 1;
10281 rtx label
= ix86_expand_aligntest (destreg
, 2);
10282 srcmem
= change_address (src
, HImode
, srcreg
);
10283 dstmem
= change_address (dst
, HImode
, destreg
);
10284 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10285 ix86_adjust_counter (countreg
, 2);
10286 emit_label (label
);
10287 LABEL_NUSES (label
) = 1;
10289 if (align
<= 4 && desired_alignment
> 4)
10291 rtx label
= ix86_expand_aligntest (destreg
, 4);
10292 srcmem
= change_address (src
, SImode
, srcreg
);
10293 dstmem
= change_address (dst
, SImode
, destreg
);
10294 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10295 ix86_adjust_counter (countreg
, 4);
10296 emit_label (label
);
10297 LABEL_NUSES (label
) = 1;
10300 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10302 emit_label (label
);
10303 LABEL_NUSES (label
) = 1;
10306 if (!TARGET_SINGLE_STRINGOP
)
10307 emit_insn (gen_cld ());
10310 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10312 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
10316 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
10317 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
10319 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10320 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10321 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10322 countreg2
, destexp
, srcexp
));
10326 emit_label (label
);
10327 LABEL_NUSES (label
) = 1;
10329 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10331 srcmem
= change_address (src
, SImode
, srcreg
);
10332 dstmem
= change_address (dst
, SImode
, destreg
);
10333 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10335 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10337 rtx label
= ix86_expand_aligntest (countreg
, 4);
10338 srcmem
= change_address (src
, SImode
, srcreg
);
10339 dstmem
= change_address (dst
, SImode
, destreg
);
10340 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10341 emit_label (label
);
10342 LABEL_NUSES (label
) = 1;
10344 if (align
> 2 && count
!= 0 && (count
& 2))
10346 srcmem
= change_address (src
, HImode
, srcreg
);
10347 dstmem
= change_address (dst
, HImode
, destreg
);
10348 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10350 if (align
<= 2 || count
== 0)
10352 rtx label
= ix86_expand_aligntest (countreg
, 2);
10353 srcmem
= change_address (src
, HImode
, srcreg
);
10354 dstmem
= change_address (dst
, HImode
, destreg
);
10355 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10356 emit_label (label
);
10357 LABEL_NUSES (label
) = 1;
10359 if (align
> 1 && count
!= 0 && (count
& 1))
10361 srcmem
= change_address (src
, QImode
, srcreg
);
10362 dstmem
= change_address (dst
, QImode
, destreg
);
10363 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10365 if (align
<= 1 || count
== 0)
10367 rtx label
= ix86_expand_aligntest (countreg
, 1);
10368 srcmem
= change_address (src
, QImode
, srcreg
);
10369 dstmem
= change_address (dst
, QImode
, destreg
);
10370 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10371 emit_label (label
);
10372 LABEL_NUSES (label
) = 1;
10379 /* Expand string clear operation (bzero). Use i386 string operations when
10380 profitable. expand_movmem contains similar code. */
10382 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
10384 rtx destreg
, zeroreg
, countreg
, destexp
;
10385 enum machine_mode counter_mode
;
10386 HOST_WIDE_INT align
= 0;
10387 unsigned HOST_WIDE_INT count
= 0;
10389 if (GET_CODE (align_exp
) == CONST_INT
)
10390 align
= INTVAL (align_exp
);
10392 /* Can't use any of this if the user has appropriated esi. */
10393 if (global_regs
[4])
10396 /* This simple hack avoids all inlining code and simplifies code below. */
10397 if (!TARGET_ALIGN_STRINGOPS
)
10400 if (GET_CODE (count_exp
) == CONST_INT
)
10402 count
= INTVAL (count_exp
);
10403 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10406 /* Figure out proper mode for counter. For 32bits it is always SImode,
10407 for 64bits use SImode when possible, otherwise DImode.
10408 Set count to number of bytes copied when known at compile time. */
10410 || GET_MODE (count_exp
) == SImode
10411 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10412 counter_mode
= SImode
;
10414 counter_mode
= DImode
;
10416 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10417 if (destreg
!= XEXP (dst
, 0))
10418 dst
= replace_equiv_address_nv (dst
, destreg
);
10421 /* When optimizing for size emit simple rep ; movsb instruction for
10422 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10423 sequence is 7 bytes long, so if optimizing for size and count is
10424 small enough that some stosl, stosw and stosb instructions without
10425 rep are shorter, fall back into the next if. */
10427 if ((!optimize
|| optimize_size
)
10430 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
10432 emit_insn (gen_cld ());
10434 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10435 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10436 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10437 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
10439 else if (count
!= 0
10441 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10442 || optimize_size
|| count
< (unsigned int) 64))
10444 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10445 unsigned HOST_WIDE_INT offset
= 0;
10447 emit_insn (gen_cld ());
10449 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10450 if (count
& ~(size
- 1))
10452 unsigned HOST_WIDE_INT repcount
;
10453 unsigned int max_nonrep
;
10455 repcount
= count
>> (size
== 4 ? 2 : 3);
10457 repcount
&= 0x3fffffff;
10459 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10460 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10461 bytes. In both cases the latter seems to be faster for small
10463 max_nonrep
= size
== 4 ? 7 : 4;
10464 if (!optimize_size
)
10467 case PROCESSOR_PENTIUM4
:
10468 case PROCESSOR_NOCONA
:
10475 if (repcount
<= max_nonrep
)
10476 while (repcount
-- > 0)
10478 rtx mem
= adjust_automodify_address_nv (dst
,
10479 GET_MODE (zeroreg
),
10481 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
10486 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
10487 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10488 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10489 GEN_INT (size
== 4 ? 2 : 3));
10490 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10491 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
10493 offset
= count
& ~(size
- 1);
10496 if (size
== 8 && (count
& 0x04))
10498 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
10500 emit_insn (gen_strset (destreg
, mem
,
10501 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10506 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
10508 emit_insn (gen_strset (destreg
, mem
,
10509 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10514 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
10516 emit_insn (gen_strset (destreg
, mem
,
10517 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10524 /* Compute desired alignment of the string operation. */
10525 int desired_alignment
= (TARGET_PENTIUMPRO
10526 && (count
== 0 || count
>= (unsigned int) 260)
10527 ? 8 : UNITS_PER_WORD
);
10529 /* In case we don't know anything about the alignment, default to
10530 library version, since it is usually equally fast and result in
10533 Also emit call when we know that the count is large and call overhead
10534 will not be important. */
10535 if (!TARGET_INLINE_ALL_STRINGOPS
10536 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10539 if (TARGET_SINGLE_STRINGOP
)
10540 emit_insn (gen_cld ());
10542 countreg2
= gen_reg_rtx (Pmode
);
10543 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10544 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10545 /* Get rid of MEM_OFFSET, it won't be accurate. */
10546 dst
= change_address (dst
, BLKmode
, destreg
);
10548 if (count
== 0 && align
< desired_alignment
)
10550 label
= gen_label_rtx ();
10551 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10552 LEU
, 0, counter_mode
, 1, label
);
10556 rtx label
= ix86_expand_aligntest (destreg
, 1);
10557 emit_insn (gen_strset (destreg
, dst
,
10558 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10559 ix86_adjust_counter (countreg
, 1);
10560 emit_label (label
);
10561 LABEL_NUSES (label
) = 1;
10565 rtx label
= ix86_expand_aligntest (destreg
, 2);
10566 emit_insn (gen_strset (destreg
, dst
,
10567 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10568 ix86_adjust_counter (countreg
, 2);
10569 emit_label (label
);
10570 LABEL_NUSES (label
) = 1;
10572 if (align
<= 4 && desired_alignment
> 4)
10574 rtx label
= ix86_expand_aligntest (destreg
, 4);
10575 emit_insn (gen_strset (destreg
, dst
,
10577 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10579 ix86_adjust_counter (countreg
, 4);
10580 emit_label (label
);
10581 LABEL_NUSES (label
) = 1;
10584 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10586 emit_label (label
);
10587 LABEL_NUSES (label
) = 1;
10591 if (!TARGET_SINGLE_STRINGOP
)
10592 emit_insn (gen_cld ());
10595 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10597 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
10601 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
10602 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
10604 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10605 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
10609 emit_label (label
);
10610 LABEL_NUSES (label
) = 1;
10613 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10614 emit_insn (gen_strset (destreg
, dst
,
10615 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10616 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10618 rtx label
= ix86_expand_aligntest (countreg
, 4);
10619 emit_insn (gen_strset (destreg
, dst
,
10620 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10621 emit_label (label
);
10622 LABEL_NUSES (label
) = 1;
10624 if (align
> 2 && count
!= 0 && (count
& 2))
10625 emit_insn (gen_strset (destreg
, dst
,
10626 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10627 if (align
<= 2 || count
== 0)
10629 rtx label
= ix86_expand_aligntest (countreg
, 2);
10630 emit_insn (gen_strset (destreg
, dst
,
10631 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10632 emit_label (label
);
10633 LABEL_NUSES (label
) = 1;
10635 if (align
> 1 && count
!= 0 && (count
& 1))
10636 emit_insn (gen_strset (destreg
, dst
,
10637 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10638 if (align
<= 1 || count
== 0)
10640 rtx label
= ix86_expand_aligntest (countreg
, 1);
10641 emit_insn (gen_strset (destreg
, dst
,
10642 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10643 emit_label (label
);
10644 LABEL_NUSES (label
) = 1;
10650 /* Expand strlen. */
10652 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
10654 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10656 /* The generic case of strlen expander is long. Avoid it's
10657 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10659 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10660 && !TARGET_INLINE_ALL_STRINGOPS
10662 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10665 addr
= force_reg (Pmode
, XEXP (src
, 0));
10666 scratch1
= gen_reg_rtx (Pmode
);
10668 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10671 /* Well it seems that some optimizer does not combine a call like
10672 foo(strlen(bar), strlen(bar));
10673 when the move and the subtraction is done here. It does calculate
10674 the length just once when these instructions are done inside of
10675 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10676 often used and I use one fewer register for the lifetime of
10677 output_strlen_unroll() this is better. */
10679 emit_move_insn (out
, addr
);
10681 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
10683 /* strlensi_unroll_1 returns the address of the zero at the end of
10684 the string, like memchr(), so compute the length by subtracting
10685 the start address. */
10687 emit_insn (gen_subdi3 (out
, out
, addr
));
10689 emit_insn (gen_subsi3 (out
, out
, addr
));
10694 scratch2
= gen_reg_rtx (Pmode
);
10695 scratch3
= gen_reg_rtx (Pmode
);
10696 scratch4
= force_reg (Pmode
, constm1_rtx
);
10698 emit_move_insn (scratch3
, addr
);
10699 eoschar
= force_reg (QImode
, eoschar
);
10701 emit_insn (gen_cld ());
10702 src
= replace_equiv_address_nv (src
, scratch3
);
10704 /* If .md starts supporting :P, this can be done in .md. */
10705 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
10706 scratch4
), UNSPEC_SCAS
);
10707 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
10710 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10711 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10715 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10716 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10722 /* Expand the appropriate insns for doing strlen if not just doing
10725 out = result, initialized with the start address
10726 align_rtx = alignment of the address.
10727 scratch = scratch register, initialized with the startaddress when
10728 not aligned, otherwise undefined
10730 This is just the body. It needs the initializations mentioned above and
10731 some address computing at the end. These things are done in i386.md. */
10734 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
10738 rtx align_2_label
= NULL_RTX
;
10739 rtx align_3_label
= NULL_RTX
;
10740 rtx align_4_label
= gen_label_rtx ();
10741 rtx end_0_label
= gen_label_rtx ();
10743 rtx tmpreg
= gen_reg_rtx (SImode
);
10744 rtx scratch
= gen_reg_rtx (SImode
);
10748 if (GET_CODE (align_rtx
) == CONST_INT
)
10749 align
= INTVAL (align_rtx
);
10751 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10753 /* Is there a known alignment and is it less than 4? */
10756 rtx scratch1
= gen_reg_rtx (Pmode
);
10757 emit_move_insn (scratch1
, out
);
10758 /* Is there a known alignment and is it not 2? */
10761 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10762 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10764 /* Leave just the 3 lower bits. */
10765 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10766 NULL_RTX
, 0, OPTAB_WIDEN
);
10768 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10769 Pmode
, 1, align_4_label
);
10770 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
10771 Pmode
, 1, align_2_label
);
10772 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
10773 Pmode
, 1, align_3_label
);
10777 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10778 check if is aligned to 4 - byte. */
10780 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
10781 NULL_RTX
, 0, OPTAB_WIDEN
);
10783 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10784 Pmode
, 1, align_4_label
);
10787 mem
= change_address (src
, QImode
, out
);
10789 /* Now compare the bytes. */
10791 /* Compare the first n unaligned byte on a byte per byte basis. */
10792 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10793 QImode
, 1, end_0_label
);
10795 /* Increment the address. */
10797 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10799 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10801 /* Not needed with an alignment of 2 */
10804 emit_label (align_2_label
);
10806 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10810 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10812 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10814 emit_label (align_3_label
);
10817 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10821 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10823 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10826 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10827 align this loop. It gives only huge programs, but does not help to
10829 emit_label (align_4_label
);
10831 mem
= change_address (src
, SImode
, out
);
10832 emit_move_insn (scratch
, mem
);
10834 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10836 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10838 /* This formula yields a nonzero result iff one of the bytes is zero.
10839 This saves three branches inside loop and many cycles. */
10841 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10842 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10843 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10844 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10845 gen_int_mode (0x80808080, SImode
)));
10846 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10851 rtx reg
= gen_reg_rtx (SImode
);
10852 rtx reg2
= gen_reg_rtx (Pmode
);
10853 emit_move_insn (reg
, tmpreg
);
10854 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10856 /* If zero is not in the first two bytes, move two bytes forward. */
10857 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10858 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10859 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10860 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10861 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10864 /* Emit lea manually to avoid clobbering of flags. */
10865 emit_insn (gen_rtx_SET (SImode
, reg2
,
10866 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
10868 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10869 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10870 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10871 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10878 rtx end_2_label
= gen_label_rtx ();
10879 /* Is zero in the first two bytes? */
10881 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10882 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10883 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10884 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10885 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10887 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10888 JUMP_LABEL (tmp
) = end_2_label
;
10890 /* Not in the first two. Move two bytes forward. */
10891 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10893 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
10895 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
10897 emit_label (end_2_label
);
10901 /* Avoid branch in fixing the byte. */
10902 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10903 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10904 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
10906 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
10908 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
10910 emit_label (end_0_label
);
10914 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
10915 rtx callarg2 ATTRIBUTE_UNUSED
,
10916 rtx pop
, int sibcall
)
10918 rtx use
= NULL
, call
;
10920 if (pop
== const0_rtx
)
10922 if (TARGET_64BIT
&& pop
)
10926 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
10927 fnaddr
= machopic_indirect_call_target (fnaddr
);
10929 /* Static functions and indirect calls don't need the pic register. */
10930 if (! TARGET_64BIT
&& flag_pic
10931 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10932 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
10933 use_reg (&use
, pic_offset_table_rtx
);
10935 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10937 rtx al
= gen_rtx_REG (QImode
, 0);
10938 emit_move_insn (al
, callarg2
);
10939 use_reg (&use
, al
);
10941 #endif /* TARGET_MACHO */
10943 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10945 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10946 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10948 if (sibcall
&& TARGET_64BIT
10949 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
10952 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10953 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
10954 emit_move_insn (fnaddr
, addr
);
10955 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10958 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10960 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10963 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10964 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10965 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10968 call
= emit_call_insn (call
);
10970 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10974 /* Clear stack slot assignments remembered from previous functions.
10975 This is called from INIT_EXPANDERS once before RTL is emitted for each
10978 static struct machine_function
*
10979 ix86_init_machine_status (void)
10981 struct machine_function
*f
;
10983 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
10984 f
->use_fast_prologue_epilogue_nregs
= -1;
10989 /* Return a MEM corresponding to a stack slot with mode MODE.
10990 Allocate a new slot if necessary.
10992 The RTL for a function can have several slots available: N is
10993 which slot to use. */
10996 assign_386_stack_local (enum machine_mode mode
, int n
)
10998 struct stack_local_entry
*s
;
11000 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11003 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11004 if (s
->mode
== mode
&& s
->n
== n
)
11007 s
= (struct stack_local_entry
*)
11008 ggc_alloc (sizeof (struct stack_local_entry
));
11011 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11013 s
->next
= ix86_stack_locals
;
11014 ix86_stack_locals
= s
;
11018 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11020 static GTY(()) rtx ix86_tls_symbol
;
11022 ix86_tls_get_addr (void)
11025 if (!ix86_tls_symbol
)
11027 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11028 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11029 ? "___tls_get_addr"
11030 : "__tls_get_addr");
11033 return ix86_tls_symbol
;
11036 /* Calculate the length of the memory address in the instruction
11037 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11040 memory_address_length (rtx addr
)
11042 struct ix86_address parts
;
11043 rtx base
, index
, disp
;
11046 if (GET_CODE (addr
) == PRE_DEC
11047 || GET_CODE (addr
) == POST_INC
11048 || GET_CODE (addr
) == PRE_MODIFY
11049 || GET_CODE (addr
) == POST_MODIFY
)
11052 if (! ix86_decompose_address (addr
, &parts
))
11056 index
= parts
.index
;
11061 - esp as the base always wants an index,
11062 - ebp as the base always wants a displacement. */
11064 /* Register Indirect. */
11065 if (base
&& !index
&& !disp
)
11067 /* esp (for its index) and ebp (for its displacement) need
11068 the two-byte modrm form. */
11069 if (addr
== stack_pointer_rtx
11070 || addr
== arg_pointer_rtx
11071 || addr
== frame_pointer_rtx
11072 || addr
== hard_frame_pointer_rtx
)
11076 /* Direct Addressing. */
11077 else if (disp
&& !base
&& !index
)
11082 /* Find the length of the displacement constant. */
11085 if (GET_CODE (disp
) == CONST_INT
11086 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11092 /* ebp always wants a displacement. */
11093 else if (base
== hard_frame_pointer_rtx
)
11096 /* An index requires the two-byte modrm form.... */
11098 /* ...like esp, which always wants an index. */
11099 || base
== stack_pointer_rtx
11100 || base
== arg_pointer_rtx
11101 || base
== frame_pointer_rtx
)
11108 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11109 is set, expect that insn have 8bit immediate alternative. */
11111 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
11115 extract_insn_cached (insn
);
11116 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11117 if (CONSTANT_P (recog_data
.operand
[i
]))
11122 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11123 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11127 switch (get_attr_mode (insn
))
11138 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11143 fatal_insn ("unknown insn mode", insn
);
11149 /* Compute default value for "length_address" attribute. */
11151 ix86_attr_length_address_default (rtx insn
)
11155 if (get_attr_type (insn
) == TYPE_LEA
)
11157 rtx set
= PATTERN (insn
);
11158 if (GET_CODE (set
) == SET
)
11160 else if (GET_CODE (set
) == PARALLEL
11161 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
11162 set
= XVECEXP (set
, 0, 0);
11165 #ifdef ENABLE_CHECKING
11171 return memory_address_length (SET_SRC (set
));
11174 extract_insn_cached (insn
);
11175 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11176 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11178 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11184 /* Return the maximum number of instructions a cpu can issue. */
11187 ix86_issue_rate (void)
11191 case PROCESSOR_PENTIUM
:
11195 case PROCESSOR_PENTIUMPRO
:
11196 case PROCESSOR_PENTIUM4
:
11197 case PROCESSOR_ATHLON
:
11199 case PROCESSOR_NOCONA
:
11207 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11208 by DEP_INSN and nothing set by DEP_INSN. */
11211 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11215 /* Simplify the test for uninteresting insns. */
11216 if (insn_type
!= TYPE_SETCC
11217 && insn_type
!= TYPE_ICMOV
11218 && insn_type
!= TYPE_FCMOV
11219 && insn_type
!= TYPE_IBR
)
11222 if ((set
= single_set (dep_insn
)) != 0)
11224 set
= SET_DEST (set
);
11227 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11228 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11229 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11230 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11232 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11233 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11238 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11241 /* This test is true if the dependent insn reads the flags but
11242 not any other potentially set register. */
11243 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11246 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11252 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11253 address with operands set by DEP_INSN. */
11256 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11260 if (insn_type
== TYPE_LEA
11263 addr
= PATTERN (insn
);
11264 if (GET_CODE (addr
) == SET
)
11266 else if (GET_CODE (addr
) == PARALLEL
11267 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11268 addr
= XVECEXP (addr
, 0, 0);
11271 addr
= SET_SRC (addr
);
11276 extract_insn_cached (insn
);
11277 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11278 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11280 addr
= XEXP (recog_data
.operand
[i
], 0);
11287 return modified_in_p (addr
, dep_insn
);
11291 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
11293 enum attr_type insn_type
, dep_insn_type
;
11294 enum attr_memory memory
;
11296 int dep_insn_code_number
;
11298 /* Anti and output dependencies have zero cost on all CPUs. */
11299 if (REG_NOTE_KIND (link
) != 0)
11302 dep_insn_code_number
= recog_memoized (dep_insn
);
11304 /* If we can't recognize the insns, we can't really do anything. */
11305 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11308 insn_type
= get_attr_type (insn
);
11309 dep_insn_type
= get_attr_type (dep_insn
);
11313 case PROCESSOR_PENTIUM
:
11314 /* Address Generation Interlock adds a cycle of latency. */
11315 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11318 /* ??? Compares pair with jump/setcc. */
11319 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11322 /* Floating point stores require value to be ready one cycle earlier. */
11323 if (insn_type
== TYPE_FMOV
11324 && get_attr_memory (insn
) == MEMORY_STORE
11325 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11329 case PROCESSOR_PENTIUMPRO
:
11330 memory
= get_attr_memory (insn
);
11332 /* INT->FP conversion is expensive. */
11333 if (get_attr_fp_int_src (dep_insn
))
11336 /* There is one cycle extra latency between an FP op and a store. */
11337 if (insn_type
== TYPE_FMOV
11338 && (set
= single_set (dep_insn
)) != NULL_RTX
11339 && (set2
= single_set (insn
)) != NULL_RTX
11340 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11341 && GET_CODE (SET_DEST (set2
)) == MEM
)
11344 /* Show ability of reorder buffer to hide latency of load by executing
11345 in parallel with previous instruction in case
11346 previous instruction is not needed to compute the address. */
11347 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11348 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11350 /* Claim moves to take one cycle, as core can issue one load
11351 at time and the next load can start cycle later. */
11352 if (dep_insn_type
== TYPE_IMOV
11353 || dep_insn_type
== TYPE_FMOV
)
11361 memory
= get_attr_memory (insn
);
11363 /* The esp dependency is resolved before the instruction is really
11365 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11366 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11369 /* INT->FP conversion is expensive. */
11370 if (get_attr_fp_int_src (dep_insn
))
11373 /* Show ability of reorder buffer to hide latency of load by executing
11374 in parallel with previous instruction in case
11375 previous instruction is not needed to compute the address. */
11376 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11377 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11379 /* Claim moves to take one cycle, as core can issue one load
11380 at time and the next load can start cycle later. */
11381 if (dep_insn_type
== TYPE_IMOV
11382 || dep_insn_type
== TYPE_FMOV
)
11391 case PROCESSOR_ATHLON
:
11393 memory
= get_attr_memory (insn
);
11395 /* Show ability of reorder buffer to hide latency of load by executing
11396 in parallel with previous instruction in case
11397 previous instruction is not needed to compute the address. */
11398 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11399 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11401 enum attr_unit unit
= get_attr_unit (insn
);
11404 /* Because of the difference between the length of integer and
11405 floating unit pipeline preparation stages, the memory operands
11406 for floating point are cheaper.
11408 ??? For Athlon it the difference is most probably 2. */
11409 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
11412 loadcost
= TARGET_ATHLON
? 2 : 0;
11414 if (cost
>= loadcost
)
11427 /* How many alternative schedules to try. This should be as wide as the
11428 scheduling freedom in the DFA, but no wider. Making this value too
11429 large results extra work for the scheduler. */
11432 ia32_multipass_dfa_lookahead (void)
11434 if (ix86_tune
== PROCESSOR_PENTIUM
)
11437 if (ix86_tune
== PROCESSOR_PENTIUMPRO
11438 || ix86_tune
== PROCESSOR_K6
)
11446 /* Compute the alignment given to a constant that is being placed in memory.
11447 EXP is the constant and ALIGN is the alignment that the object would
11449 The value of this function is used instead of that alignment to align
11453 ix86_constant_alignment (tree exp
, int align
)
11455 if (TREE_CODE (exp
) == REAL_CST
)
11457 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11459 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11462 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
11463 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
11464 return BITS_PER_WORD
;
11469 /* Compute the alignment for a static variable.
11470 TYPE is the data type, and ALIGN is the alignment that
11471 the object would ordinarily have. The value of this function is used
11472 instead of that alignment to align the object. */
11475 ix86_data_alignment (tree type
, int align
)
11477 if (AGGREGATE_TYPE_P (type
)
11478 && TYPE_SIZE (type
)
11479 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11480 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11481 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11484 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11485 to 16byte boundary. */
11488 if (AGGREGATE_TYPE_P (type
)
11489 && TYPE_SIZE (type
)
11490 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11491 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11492 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11496 if (TREE_CODE (type
) == ARRAY_TYPE
)
11498 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11500 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11503 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11506 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11508 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11511 else if ((TREE_CODE (type
) == RECORD_TYPE
11512 || TREE_CODE (type
) == UNION_TYPE
11513 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11514 && TYPE_FIELDS (type
))
11516 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11518 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11521 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11522 || TREE_CODE (type
) == INTEGER_TYPE
)
11524 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11526 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11533 /* Compute the alignment for a local variable.
11534 TYPE is the data type, and ALIGN is the alignment that
11535 the object would ordinarily have. The value of this macro is used
11536 instead of that alignment to align the object. */
11539 ix86_local_alignment (tree type
, int align
)
11541 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11542 to 16byte boundary. */
11545 if (AGGREGATE_TYPE_P (type
)
11546 && TYPE_SIZE (type
)
11547 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11548 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11549 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11552 if (TREE_CODE (type
) == ARRAY_TYPE
)
11554 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11556 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11559 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11561 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11563 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11566 else if ((TREE_CODE (type
) == RECORD_TYPE
11567 || TREE_CODE (type
) == UNION_TYPE
11568 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11569 && TYPE_FIELDS (type
))
11571 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11573 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11576 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11577 || TREE_CODE (type
) == INTEGER_TYPE
)
11580 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11582 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11588 /* Emit RTL insns to initialize the variable parts of a trampoline.
11589 FNADDR is an RTX for the address of the function's pure code.
11590 CXT is an RTX for the static chain value for the function. */
11592 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
11596 /* Compute offset from the end of the jmp to the target function. */
11597 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11598 plus_constant (tramp
, 10),
11599 NULL_RTX
, 1, OPTAB_DIRECT
);
11600 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11601 gen_int_mode (0xb9, QImode
));
11602 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11603 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11604 gen_int_mode (0xe9, QImode
));
11605 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11610 /* Try to load address using shorter movl instead of movabs.
11611 We may want to support movq for kernel mode, but kernel does not use
11612 trampolines at the moment. */
11613 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
11615 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11616 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11617 gen_int_mode (0xbb41, HImode
));
11618 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11619 gen_lowpart (SImode
, fnaddr
));
11624 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11625 gen_int_mode (0xbb49, HImode
));
11626 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11630 /* Load static chain using movabs to r10. */
11631 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11632 gen_int_mode (0xba49, HImode
));
11633 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11636 /* Jump to the r11 */
11637 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11638 gen_int_mode (0xff49, HImode
));
11639 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11640 gen_int_mode (0xe3, QImode
));
11642 if (offset
> TRAMPOLINE_SIZE
)
11646 #ifdef ENABLE_EXECUTE_STACK
11647 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
11648 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
11652 #define def_builtin(MASK, NAME, TYPE, CODE) \
11654 if ((MASK) & target_flags \
11655 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11656 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11657 NULL, NULL_TREE); \
11660 struct builtin_description
11662 const unsigned int mask
;
11663 const enum insn_code icode
;
11664 const char *const name
;
11665 const enum ix86_builtins code
;
11666 const enum rtx_code comparison
;
11667 const unsigned int flag
;
11670 static const struct builtin_description bdesc_comi
[] =
11672 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
11673 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
11674 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
11675 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
11676 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
11677 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
11678 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
11679 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
11680 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
11681 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
11682 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
11683 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
11684 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
11685 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
11686 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
11687 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
11688 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
11689 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
11690 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
11691 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
11692 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
11693 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
11694 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
11695 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
11698 static const struct builtin_description bdesc_2arg
[] =
11701 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11702 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11703 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11704 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11705 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11706 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11707 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11708 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11710 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11711 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11712 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11713 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11714 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11715 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11716 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11717 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11718 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11719 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11720 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11721 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11722 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11723 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11724 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11725 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11726 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11727 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11728 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11729 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11731 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11732 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11733 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11734 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11736 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
11737 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
11738 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
11739 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
11741 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11742 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11743 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11744 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11745 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11748 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11749 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11750 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11751 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
11752 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11753 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11754 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11755 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
11757 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11758 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11759 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11760 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11761 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11762 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11763 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11764 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11766 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11767 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11768 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11770 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11771 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11772 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11773 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11775 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11776 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11778 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11779 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11780 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11781 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11782 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11783 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11785 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11786 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11787 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11788 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11790 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11791 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11792 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11793 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11794 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11795 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11798 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11799 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11800 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11802 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11803 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11804 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
11806 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11807 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11808 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11809 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11810 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11811 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11813 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11814 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11815 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11816 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11817 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
11818 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
11820 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
11821 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
11822 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
11823 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
11825 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
11826 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
11829 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
11830 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
11831 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
11832 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
11833 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
11834 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
11835 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
11836 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
11838 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
11839 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
11840 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
11841 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
11842 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
11843 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
11844 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
11845 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
11846 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
11847 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
11848 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
11849 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
11850 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
11851 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
11852 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
11853 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
11854 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
11855 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
11856 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
11857 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
11859 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
11860 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
11861 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
11862 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
11864 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
11865 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
11866 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
11867 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
11869 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
11870 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
11871 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
11874 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
11875 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
11876 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
11877 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
11878 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
11879 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
11880 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
11881 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
11883 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
11884 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
11885 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
11886 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
11887 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
11888 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
11889 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
11890 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
11892 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
11893 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
11895 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
11896 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
11897 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
11898 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
11900 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
11901 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
11903 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
11904 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
11905 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
11906 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
11907 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
11908 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
11910 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
11911 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
11912 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
11913 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
11915 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
11916 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
11917 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
11918 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
11919 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
11920 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
11921 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
11922 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
11924 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
11925 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
11926 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
11928 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
11929 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
11931 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
11932 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
11934 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
11935 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
11936 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
11937 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
11938 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
11939 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
11941 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
11942 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
11943 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
11944 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
11945 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
11946 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
11948 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
11949 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
11950 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
11951 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
11953 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
11955 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
11956 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
11957 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
11958 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
11961 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
11962 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
11963 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
11964 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
11965 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
11966 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
11969 static const struct builtin_description bdesc_1arg
[] =
11971 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
11972 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
11974 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
11975 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
11976 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
11978 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
11979 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
11980 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
11981 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
11982 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
11983 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
11985 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
11986 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
11987 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
11988 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
11990 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
11992 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
11993 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
11995 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
11996 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
11997 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
11998 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
11999 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12001 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12003 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12004 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12005 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
12006 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
12008 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12009 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12010 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12012 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
12015 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
12016 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
12017 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
12021 ix86_init_builtins (void)
12024 ix86_init_mmx_sse_builtins ();
12027 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12028 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12031 ix86_init_mmx_sse_builtins (void)
12033 const struct builtin_description
* d
;
12036 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
12037 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
12038 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
12039 tree V2DI_type_node
= build_vector_type_for_mode (intDI_type_node
, V2DImode
);
12040 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
12041 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
12042 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
12043 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
12044 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
12045 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
12047 tree pchar_type_node
= build_pointer_type (char_type_node
);
12048 tree pcchar_type_node
= build_pointer_type (
12049 build_type_variant (char_type_node
, 1, 0));
12050 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12051 tree pcfloat_type_node
= build_pointer_type (
12052 build_type_variant (float_type_node
, 1, 0));
12053 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12054 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12055 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12058 tree int_ftype_v4sf_v4sf
12059 = build_function_type_list (integer_type_node
,
12060 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12061 tree v4si_ftype_v4sf_v4sf
12062 = build_function_type_list (V4SI_type_node
,
12063 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12064 /* MMX/SSE/integer conversions. */
12065 tree int_ftype_v4sf
12066 = build_function_type_list (integer_type_node
,
12067 V4SF_type_node
, NULL_TREE
);
12068 tree int64_ftype_v4sf
12069 = build_function_type_list (long_long_integer_type_node
,
12070 V4SF_type_node
, NULL_TREE
);
12071 tree int_ftype_v8qi
12072 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12073 tree v4sf_ftype_v4sf_int
12074 = build_function_type_list (V4SF_type_node
,
12075 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12076 tree v4sf_ftype_v4sf_int64
12077 = build_function_type_list (V4SF_type_node
,
12078 V4SF_type_node
, long_long_integer_type_node
,
12080 tree v4sf_ftype_v4sf_v2si
12081 = build_function_type_list (V4SF_type_node
,
12082 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12083 tree int_ftype_v4hi_int
12084 = build_function_type_list (integer_type_node
,
12085 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12086 tree v4hi_ftype_v4hi_int_int
12087 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12088 integer_type_node
, integer_type_node
,
12090 /* Miscellaneous. */
12091 tree v8qi_ftype_v4hi_v4hi
12092 = build_function_type_list (V8QI_type_node
,
12093 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12094 tree v4hi_ftype_v2si_v2si
12095 = build_function_type_list (V4HI_type_node
,
12096 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12097 tree v4sf_ftype_v4sf_v4sf_int
12098 = build_function_type_list (V4SF_type_node
,
12099 V4SF_type_node
, V4SF_type_node
,
12100 integer_type_node
, NULL_TREE
);
12101 tree v2si_ftype_v4hi_v4hi
12102 = build_function_type_list (V2SI_type_node
,
12103 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12104 tree v4hi_ftype_v4hi_int
12105 = build_function_type_list (V4HI_type_node
,
12106 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12107 tree v4hi_ftype_v4hi_di
12108 = build_function_type_list (V4HI_type_node
,
12109 V4HI_type_node
, long_long_unsigned_type_node
,
12111 tree v2si_ftype_v2si_di
12112 = build_function_type_list (V2SI_type_node
,
12113 V2SI_type_node
, long_long_unsigned_type_node
,
12115 tree void_ftype_void
12116 = build_function_type (void_type_node
, void_list_node
);
12117 tree void_ftype_unsigned
12118 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12119 tree void_ftype_unsigned_unsigned
12120 = build_function_type_list (void_type_node
, unsigned_type_node
,
12121 unsigned_type_node
, NULL_TREE
);
12122 tree void_ftype_pcvoid_unsigned_unsigned
12123 = build_function_type_list (void_type_node
, const_ptr_type_node
,
12124 unsigned_type_node
, unsigned_type_node
,
12126 tree unsigned_ftype_void
12127 = build_function_type (unsigned_type_node
, void_list_node
);
12129 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12130 tree v4sf_ftype_void
12131 = build_function_type (V4SF_type_node
, void_list_node
);
12132 tree v2si_ftype_v4sf
12133 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12134 /* Loads/stores. */
12135 tree void_ftype_v8qi_v8qi_pchar
12136 = build_function_type_list (void_type_node
,
12137 V8QI_type_node
, V8QI_type_node
,
12138 pchar_type_node
, NULL_TREE
);
12139 tree v4sf_ftype_pcfloat
12140 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
12141 /* @@@ the type is bogus */
12142 tree v4sf_ftype_v4sf_pv2si
12143 = build_function_type_list (V4SF_type_node
,
12144 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12145 tree void_ftype_pv2si_v4sf
12146 = build_function_type_list (void_type_node
,
12147 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12148 tree void_ftype_pfloat_v4sf
12149 = build_function_type_list (void_type_node
,
12150 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12151 tree void_ftype_pdi_di
12152 = build_function_type_list (void_type_node
,
12153 pdi_type_node
, long_long_unsigned_type_node
,
12155 tree void_ftype_pv2di_v2di
12156 = build_function_type_list (void_type_node
,
12157 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12158 /* Normal vector unops. */
12159 tree v4sf_ftype_v4sf
12160 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12162 /* Normal vector binops. */
12163 tree v4sf_ftype_v4sf_v4sf
12164 = build_function_type_list (V4SF_type_node
,
12165 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12166 tree v8qi_ftype_v8qi_v8qi
12167 = build_function_type_list (V8QI_type_node
,
12168 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12169 tree v4hi_ftype_v4hi_v4hi
12170 = build_function_type_list (V4HI_type_node
,
12171 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12172 tree v2si_ftype_v2si_v2si
12173 = build_function_type_list (V2SI_type_node
,
12174 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12175 tree di_ftype_di_di
12176 = build_function_type_list (long_long_unsigned_type_node
,
12177 long_long_unsigned_type_node
,
12178 long_long_unsigned_type_node
, NULL_TREE
);
12180 tree v2si_ftype_v2sf
12181 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12182 tree v2sf_ftype_v2si
12183 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12184 tree v2si_ftype_v2si
12185 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12186 tree v2sf_ftype_v2sf
12187 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12188 tree v2sf_ftype_v2sf_v2sf
12189 = build_function_type_list (V2SF_type_node
,
12190 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12191 tree v2si_ftype_v2sf_v2sf
12192 = build_function_type_list (V2SI_type_node
,
12193 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12194 tree pint_type_node
= build_pointer_type (integer_type_node
);
12195 tree pcint_type_node
= build_pointer_type (
12196 build_type_variant (integer_type_node
, 1, 0));
12197 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12198 tree pcdouble_type_node
= build_pointer_type (
12199 build_type_variant (double_type_node
, 1, 0));
12200 tree int_ftype_v2df_v2df
12201 = build_function_type_list (integer_type_node
,
12202 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12205 = build_function_type (intTI_type_node
, void_list_node
);
12206 tree v2di_ftype_void
12207 = build_function_type (V2DI_type_node
, void_list_node
);
12208 tree ti_ftype_ti_ti
12209 = build_function_type_list (intTI_type_node
,
12210 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12211 tree void_ftype_pcvoid
12212 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
12214 = build_function_type_list (V2DI_type_node
,
12215 long_long_unsigned_type_node
, NULL_TREE
);
12217 = build_function_type_list (long_long_unsigned_type_node
,
12218 V2DI_type_node
, NULL_TREE
);
12219 tree v4sf_ftype_v4si
12220 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12221 tree v4si_ftype_v4sf
12222 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12223 tree v2df_ftype_v4si
12224 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12225 tree v4si_ftype_v2df
12226 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12227 tree v2si_ftype_v2df
12228 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12229 tree v4sf_ftype_v2df
12230 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12231 tree v2df_ftype_v2si
12232 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12233 tree v2df_ftype_v4sf
12234 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12235 tree int_ftype_v2df
12236 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12237 tree int64_ftype_v2df
12238 = build_function_type_list (long_long_integer_type_node
,
12239 V2DF_type_node
, NULL_TREE
);
12240 tree v2df_ftype_v2df_int
12241 = build_function_type_list (V2DF_type_node
,
12242 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12243 tree v2df_ftype_v2df_int64
12244 = build_function_type_list (V2DF_type_node
,
12245 V2DF_type_node
, long_long_integer_type_node
,
12247 tree v4sf_ftype_v4sf_v2df
12248 = build_function_type_list (V4SF_type_node
,
12249 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12250 tree v2df_ftype_v2df_v4sf
12251 = build_function_type_list (V2DF_type_node
,
12252 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12253 tree v2df_ftype_v2df_v2df_int
12254 = build_function_type_list (V2DF_type_node
,
12255 V2DF_type_node
, V2DF_type_node
,
12258 tree v2df_ftype_v2df_pv2si
12259 = build_function_type_list (V2DF_type_node
,
12260 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12261 tree void_ftype_pv2si_v2df
12262 = build_function_type_list (void_type_node
,
12263 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12264 tree void_ftype_pdouble_v2df
12265 = build_function_type_list (void_type_node
,
12266 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12267 tree void_ftype_pint_int
12268 = build_function_type_list (void_type_node
,
12269 pint_type_node
, integer_type_node
, NULL_TREE
);
12270 tree void_ftype_v16qi_v16qi_pchar
12271 = build_function_type_list (void_type_node
,
12272 V16QI_type_node
, V16QI_type_node
,
12273 pchar_type_node
, NULL_TREE
);
12274 tree v2df_ftype_pcdouble
12275 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
12276 tree v2df_ftype_v2df_v2df
12277 = build_function_type_list (V2DF_type_node
,
12278 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12279 tree v16qi_ftype_v16qi_v16qi
12280 = build_function_type_list (V16QI_type_node
,
12281 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12282 tree v8hi_ftype_v8hi_v8hi
12283 = build_function_type_list (V8HI_type_node
,
12284 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12285 tree v4si_ftype_v4si_v4si
12286 = build_function_type_list (V4SI_type_node
,
12287 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12288 tree v2di_ftype_v2di_v2di
12289 = build_function_type_list (V2DI_type_node
,
12290 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12291 tree v2di_ftype_v2df_v2df
12292 = build_function_type_list (V2DI_type_node
,
12293 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12294 tree v2df_ftype_v2df
12295 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12296 tree v2df_ftype_double
12297 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12298 tree v2df_ftype_double_double
12299 = build_function_type_list (V2DF_type_node
,
12300 double_type_node
, double_type_node
, NULL_TREE
);
12301 tree int_ftype_v8hi_int
12302 = build_function_type_list (integer_type_node
,
12303 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12304 tree v8hi_ftype_v8hi_int_int
12305 = build_function_type_list (V8HI_type_node
,
12306 V8HI_type_node
, integer_type_node
,
12307 integer_type_node
, NULL_TREE
);
12308 tree v2di_ftype_v2di_int
12309 = build_function_type_list (V2DI_type_node
,
12310 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12311 tree v4si_ftype_v4si_int
12312 = build_function_type_list (V4SI_type_node
,
12313 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12314 tree v8hi_ftype_v8hi_int
12315 = build_function_type_list (V8HI_type_node
,
12316 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12317 tree v8hi_ftype_v8hi_v2di
12318 = build_function_type_list (V8HI_type_node
,
12319 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12320 tree v4si_ftype_v4si_v2di
12321 = build_function_type_list (V4SI_type_node
,
12322 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12323 tree v4si_ftype_v8hi_v8hi
12324 = build_function_type_list (V4SI_type_node
,
12325 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12326 tree di_ftype_v8qi_v8qi
12327 = build_function_type_list (long_long_unsigned_type_node
,
12328 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12329 tree di_ftype_v2si_v2si
12330 = build_function_type_list (long_long_unsigned_type_node
,
12331 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12332 tree v2di_ftype_v16qi_v16qi
12333 = build_function_type_list (V2DI_type_node
,
12334 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12335 tree v2di_ftype_v4si_v4si
12336 = build_function_type_list (V2DI_type_node
,
12337 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12338 tree int_ftype_v16qi
12339 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12340 tree v16qi_ftype_pcchar
12341 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
12342 tree void_ftype_pchar_v16qi
12343 = build_function_type_list (void_type_node
,
12344 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12345 tree v4si_ftype_pcint
12346 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
12347 tree void_ftype_pcint_v4si
12348 = build_function_type_list (void_type_node
,
12349 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
12350 tree v2di_ftype_v2di
12351 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12354 tree float128_type
;
12356 /* The __float80 type. */
12357 if (TYPE_MODE (long_double_type_node
) == XFmode
)
12358 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
12362 /* The __float80 type. */
12363 float80_type
= make_node (REAL_TYPE
);
12364 TYPE_PRECISION (float80_type
) = 80;
12365 layout_type (float80_type
);
12366 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
12369 float128_type
= make_node (REAL_TYPE
);
12370 TYPE_PRECISION (float128_type
) = 128;
12371 layout_type (float128_type
);
12372 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
12374 /* Add all builtins that are more or less simple operations on two
12376 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12378 /* Use one of the operands; the target can have a different mode for
12379 mask-generating compares. */
12380 enum machine_mode mode
;
12385 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12390 type
= v16qi_ftype_v16qi_v16qi
;
12393 type
= v8hi_ftype_v8hi_v8hi
;
12396 type
= v4si_ftype_v4si_v4si
;
12399 type
= v2di_ftype_v2di_v2di
;
12402 type
= v2df_ftype_v2df_v2df
;
12405 type
= ti_ftype_ti_ti
;
12408 type
= v4sf_ftype_v4sf_v4sf
;
12411 type
= v8qi_ftype_v8qi_v8qi
;
12414 type
= v4hi_ftype_v4hi_v4hi
;
12417 type
= v2si_ftype_v2si_v2si
;
12420 type
= di_ftype_di_di
;
12427 /* Override for comparisons. */
12428 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12429 || d
->icode
== CODE_FOR_maskncmpv4sf3
12430 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12431 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12432 type
= v4si_ftype_v4sf_v4sf
;
12434 if (d
->icode
== CODE_FOR_maskcmpv2df3
12435 || d
->icode
== CODE_FOR_maskncmpv2df3
12436 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12437 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12438 type
= v2di_ftype_v2df_v2df
;
12440 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12443 /* Add the remaining MMX insns with somewhat more complicated types. */
12444 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12445 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12446 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12447 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12448 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12450 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12451 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12452 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12454 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12455 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12457 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12458 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12460 /* comi/ucomi insns. */
12461 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12462 if (d
->mask
== MASK_SSE2
)
12463 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12465 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12467 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12468 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12469 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12471 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12472 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12473 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12474 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12475 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12476 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
12477 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12478 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
12479 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12480 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12481 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
12483 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12484 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12486 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12488 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
12489 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
12490 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
12491 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12492 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12493 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12495 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12496 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12497 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12498 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12500 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12501 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12502 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12503 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12505 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12507 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12509 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12510 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12511 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12512 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12513 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12514 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12516 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12518 /* Original 3DNow! */
12519 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12520 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12521 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12522 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12523 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12524 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12525 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12526 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12527 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12528 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12529 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12530 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12531 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12532 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12533 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12534 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12535 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12536 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12537 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12538 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12540 /* 3DNow! extension as used in the Athlon CPU. */
12541 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12542 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12543 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12544 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12545 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12546 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12548 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12551 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12552 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12554 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12555 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12556 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
12558 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
12559 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
12560 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
12561 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12562 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12563 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12565 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12566 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12567 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12568 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12570 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12571 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12572 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12573 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12574 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12576 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12577 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12578 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12579 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12581 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12582 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12584 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12586 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12587 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12589 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12590 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12591 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12592 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12593 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12595 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12597 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12598 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12599 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
12600 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
12602 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12603 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12604 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12606 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12607 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
12608 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12609 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12611 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12612 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12613 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12614 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
12615 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
12616 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12617 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12619 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
12620 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12621 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12623 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
12624 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
12625 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
12626 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
12627 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
12628 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
12629 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
12631 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
12633 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
12634 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
12636 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12637 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12638 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12640 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12641 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12642 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12644 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12645 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12647 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
12648 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12649 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12650 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12652 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
12653 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12654 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12655 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12657 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12658 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12660 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12662 /* Prescott New Instructions. */
12663 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
12664 void_ftype_pcvoid_unsigned_unsigned
,
12665 IX86_BUILTIN_MONITOR
);
12666 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
12667 void_ftype_unsigned_unsigned
,
12668 IX86_BUILTIN_MWAIT
);
12669 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
12671 IX86_BUILTIN_MOVSHDUP
);
12672 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
12674 IX86_BUILTIN_MOVSLDUP
);
12675 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
12676 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
12677 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
12678 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
12679 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
12680 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
12683 /* Errors in the source file can cause expand_expr to return const0_rtx
12684 where we expect a vector. To avoid crashing, use one of the vector
12685 clear instructions. */
12687 safe_vector_operand (rtx x
, enum machine_mode mode
)
12689 if (x
!= const0_rtx
)
12691 x
= gen_reg_rtx (mode
);
12693 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12694 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12695 : gen_rtx_SUBREG (DImode
, x
, 0)));
12697 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12698 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
12699 CONST0_RTX (V4SFmode
)));
12703 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12706 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
12709 tree arg0
= TREE_VALUE (arglist
);
12710 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12711 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12712 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12713 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12714 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12715 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12717 if (VECTOR_MODE_P (mode0
))
12718 op0
= safe_vector_operand (op0
, mode0
);
12719 if (VECTOR_MODE_P (mode1
))
12720 op1
= safe_vector_operand (op1
, mode1
);
12723 || GET_MODE (target
) != tmode
12724 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12725 target
= gen_reg_rtx (tmode
);
12727 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
12729 rtx x
= gen_reg_rtx (V4SImode
);
12730 emit_insn (gen_sse2_loadd (x
, op1
));
12731 op1
= gen_lowpart (TImode
, x
);
12734 /* In case the insn wants input operands in modes different from
12735 the result, abort. */
12736 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
12737 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
12740 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12741 op0
= copy_to_mode_reg (mode0
, op0
);
12742 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12743 op1
= copy_to_mode_reg (mode1
, op1
);
12745 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12746 yet one of the two must not be a memory. This is normally enforced
12747 by expanders, but we didn't bother to create one here. */
12748 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12749 op0
= copy_to_mode_reg (mode0
, op0
);
12751 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12758 /* Subroutine of ix86_expand_builtin to take care of stores. */
12761 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
12764 tree arg0
= TREE_VALUE (arglist
);
12765 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12766 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12767 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12768 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12769 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12771 if (VECTOR_MODE_P (mode1
))
12772 op1
= safe_vector_operand (op1
, mode1
);
12774 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12775 op1
= copy_to_mode_reg (mode1
, op1
);
12777 pat
= GEN_FCN (icode
) (op0
, op1
);
12783 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12786 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
12787 rtx target
, int do_load
)
12790 tree arg0
= TREE_VALUE (arglist
);
12791 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12792 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12793 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12796 || GET_MODE (target
) != tmode
12797 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12798 target
= gen_reg_rtx (tmode
);
12800 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12803 if (VECTOR_MODE_P (mode0
))
12804 op0
= safe_vector_operand (op0
, mode0
);
12806 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12807 op0
= copy_to_mode_reg (mode0
, op0
);
12810 pat
= GEN_FCN (icode
) (target
, op0
);
12817 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12818 sqrtss, rsqrtss, rcpss. */
12821 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
12824 tree arg0
= TREE_VALUE (arglist
);
12825 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12826 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12827 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12830 || GET_MODE (target
) != tmode
12831 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12832 target
= gen_reg_rtx (tmode
);
12834 if (VECTOR_MODE_P (mode0
))
12835 op0
= safe_vector_operand (op0
, mode0
);
12837 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12838 op0
= copy_to_mode_reg (mode0
, op0
);
12841 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12842 op1
= copy_to_mode_reg (mode0
, op1
);
12844 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12851 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12854 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
12858 tree arg0
= TREE_VALUE (arglist
);
12859 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12860 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12861 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12863 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12864 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12865 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12866 enum rtx_code comparison
= d
->comparison
;
12868 if (VECTOR_MODE_P (mode0
))
12869 op0
= safe_vector_operand (op0
, mode0
);
12870 if (VECTOR_MODE_P (mode1
))
12871 op1
= safe_vector_operand (op1
, mode1
);
12873 /* Swap operands if we have a comparison that isn't available in
12877 rtx tmp
= gen_reg_rtx (mode1
);
12878 emit_move_insn (tmp
, op1
);
12884 || GET_MODE (target
) != tmode
12885 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12886 target
= gen_reg_rtx (tmode
);
12888 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12889 op0
= copy_to_mode_reg (mode0
, op0
);
12890 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12891 op1
= copy_to_mode_reg (mode1
, op1
);
12893 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12894 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12901 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12904 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
12908 tree arg0
= TREE_VALUE (arglist
);
12909 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12910 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12911 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12913 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12914 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12915 enum rtx_code comparison
= d
->comparison
;
12917 if (VECTOR_MODE_P (mode0
))
12918 op0
= safe_vector_operand (op0
, mode0
);
12919 if (VECTOR_MODE_P (mode1
))
12920 op1
= safe_vector_operand (op1
, mode1
);
12922 /* Swap operands if we have a comparison that isn't available in
12931 target
= gen_reg_rtx (SImode
);
12932 emit_move_insn (target
, const0_rtx
);
12933 target
= gen_rtx_SUBREG (QImode
, target
, 0);
12935 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
12936 op0
= copy_to_mode_reg (mode0
, op0
);
12937 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
12938 op1
= copy_to_mode_reg (mode1
, op1
);
12940 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12941 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
12945 emit_insn (gen_rtx_SET (VOIDmode
,
12946 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
12947 gen_rtx_fmt_ee (comparison
, QImode
,
12951 return SUBREG_REG (target
);
12954 /* Expand an expression EXP that calls a built-in function,
12955 with result going to TARGET if that's convenient
12956 (and in mode MODE if that's convenient).
12957 SUBTARGET may be used as the target for computing one of EXP's operands.
12958 IGNORE is nonzero if the value is to be ignored. */
12961 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
12962 enum machine_mode mode ATTRIBUTE_UNUSED
,
12963 int ignore ATTRIBUTE_UNUSED
)
12965 const struct builtin_description
*d
;
12967 enum insn_code icode
;
12968 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
12969 tree arglist
= TREE_OPERAND (exp
, 1);
12970 tree arg0
, arg1
, arg2
;
12971 rtx op0
, op1
, op2
, pat
;
12972 enum machine_mode tmode
, mode0
, mode1
, mode2
;
12973 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
12977 case IX86_BUILTIN_EMMS
:
12978 emit_insn (gen_emms ());
12981 case IX86_BUILTIN_SFENCE
:
12982 emit_insn (gen_sfence ());
12985 case IX86_BUILTIN_PEXTRW
:
12986 case IX86_BUILTIN_PEXTRW128
:
12987 icode
= (fcode
== IX86_BUILTIN_PEXTRW
12988 ? CODE_FOR_mmx_pextrw
12989 : CODE_FOR_sse2_pextrw
);
12990 arg0
= TREE_VALUE (arglist
);
12991 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12992 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12993 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12994 tmode
= insn_data
[icode
].operand
[0].mode
;
12995 mode0
= insn_data
[icode
].operand
[1].mode
;
12996 mode1
= insn_data
[icode
].operand
[2].mode
;
12998 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12999 op0
= copy_to_mode_reg (mode0
, op0
);
13000 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13002 error ("selector must be an integer constant in the range 0..%i",
13003 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
13004 return gen_reg_rtx (tmode
);
13007 || GET_MODE (target
) != tmode
13008 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13009 target
= gen_reg_rtx (tmode
);
13010 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13016 case IX86_BUILTIN_PINSRW
:
13017 case IX86_BUILTIN_PINSRW128
:
13018 icode
= (fcode
== IX86_BUILTIN_PINSRW
13019 ? CODE_FOR_mmx_pinsrw
13020 : CODE_FOR_sse2_pinsrw
);
13021 arg0
= TREE_VALUE (arglist
);
13022 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13023 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13024 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13025 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13026 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13027 tmode
= insn_data
[icode
].operand
[0].mode
;
13028 mode0
= insn_data
[icode
].operand
[1].mode
;
13029 mode1
= insn_data
[icode
].operand
[2].mode
;
13030 mode2
= insn_data
[icode
].operand
[3].mode
;
13032 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13033 op0
= copy_to_mode_reg (mode0
, op0
);
13034 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13035 op1
= copy_to_mode_reg (mode1
, op1
);
13036 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13038 error ("selector must be an integer constant in the range 0..%i",
13039 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
13043 || GET_MODE (target
) != tmode
13044 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13045 target
= gen_reg_rtx (tmode
);
13046 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13052 case IX86_BUILTIN_MASKMOVQ
:
13053 case IX86_BUILTIN_MASKMOVDQU
:
13054 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13055 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13056 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13057 : CODE_FOR_sse2_maskmovdqu
));
13058 /* Note the arg order is different from the operand order. */
13059 arg1
= TREE_VALUE (arglist
);
13060 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13061 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13062 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13063 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13064 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13065 mode0
= insn_data
[icode
].operand
[0].mode
;
13066 mode1
= insn_data
[icode
].operand
[1].mode
;
13067 mode2
= insn_data
[icode
].operand
[2].mode
;
13069 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13070 op0
= copy_to_mode_reg (mode0
, op0
);
13071 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13072 op1
= copy_to_mode_reg (mode1
, op1
);
13073 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13074 op2
= copy_to_mode_reg (mode2
, op2
);
13075 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13081 case IX86_BUILTIN_SQRTSS
:
13082 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13083 case IX86_BUILTIN_RSQRTSS
:
13084 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13085 case IX86_BUILTIN_RCPSS
:
13086 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13088 case IX86_BUILTIN_LOADAPS
:
13089 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13091 case IX86_BUILTIN_LOADUPS
:
13092 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13094 case IX86_BUILTIN_STOREAPS
:
13095 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13097 case IX86_BUILTIN_STOREUPS
:
13098 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13100 case IX86_BUILTIN_LOADSS
:
13101 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13103 case IX86_BUILTIN_STORESS
:
13104 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13106 case IX86_BUILTIN_LOADHPS
:
13107 case IX86_BUILTIN_LOADLPS
:
13108 case IX86_BUILTIN_LOADHPD
:
13109 case IX86_BUILTIN_LOADLPD
:
13110 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13111 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13112 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13113 : CODE_FOR_sse2_movsd
);
13114 arg0
= TREE_VALUE (arglist
);
13115 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13116 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13117 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13118 tmode
= insn_data
[icode
].operand
[0].mode
;
13119 mode0
= insn_data
[icode
].operand
[1].mode
;
13120 mode1
= insn_data
[icode
].operand
[2].mode
;
13122 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13123 op0
= copy_to_mode_reg (mode0
, op0
);
13124 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13126 || GET_MODE (target
) != tmode
13127 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13128 target
= gen_reg_rtx (tmode
);
13129 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13135 case IX86_BUILTIN_STOREHPS
:
13136 case IX86_BUILTIN_STORELPS
:
13137 case IX86_BUILTIN_STOREHPD
:
13138 case IX86_BUILTIN_STORELPD
:
13139 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13140 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13141 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13142 : CODE_FOR_sse2_movsd
);
13143 arg0
= TREE_VALUE (arglist
);
13144 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13145 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13146 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13147 mode0
= insn_data
[icode
].operand
[1].mode
;
13148 mode1
= insn_data
[icode
].operand
[2].mode
;
13150 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13151 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13152 op1
= copy_to_mode_reg (mode1
, op1
);
13154 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13160 case IX86_BUILTIN_MOVNTPS
:
13161 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13162 case IX86_BUILTIN_MOVNTQ
:
13163 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13165 case IX86_BUILTIN_LDMXCSR
:
13166 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13167 target
= assign_386_stack_local (SImode
, 0);
13168 emit_move_insn (target
, op0
);
13169 emit_insn (gen_ldmxcsr (target
));
13172 case IX86_BUILTIN_STMXCSR
:
13173 target
= assign_386_stack_local (SImode
, 0);
13174 emit_insn (gen_stmxcsr (target
));
13175 return copy_to_mode_reg (SImode
, target
);
13177 case IX86_BUILTIN_SHUFPS
:
13178 case IX86_BUILTIN_SHUFPD
:
13179 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13180 ? CODE_FOR_sse_shufps
13181 : CODE_FOR_sse2_shufpd
);
13182 arg0
= TREE_VALUE (arglist
);
13183 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13184 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13185 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13186 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13187 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13188 tmode
= insn_data
[icode
].operand
[0].mode
;
13189 mode0
= insn_data
[icode
].operand
[1].mode
;
13190 mode1
= insn_data
[icode
].operand
[2].mode
;
13191 mode2
= insn_data
[icode
].operand
[3].mode
;
13193 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13194 op0
= copy_to_mode_reg (mode0
, op0
);
13195 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13196 op1
= copy_to_mode_reg (mode1
, op1
);
13197 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13199 /* @@@ better error message */
13200 error ("mask must be an immediate");
13201 return gen_reg_rtx (tmode
);
13204 || GET_MODE (target
) != tmode
13205 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13206 target
= gen_reg_rtx (tmode
);
13207 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13213 case IX86_BUILTIN_PSHUFW
:
13214 case IX86_BUILTIN_PSHUFD
:
13215 case IX86_BUILTIN_PSHUFHW
:
13216 case IX86_BUILTIN_PSHUFLW
:
13217 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13218 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13219 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13220 : CODE_FOR_mmx_pshufw
);
13221 arg0
= TREE_VALUE (arglist
);
13222 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13223 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13224 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13225 tmode
= insn_data
[icode
].operand
[0].mode
;
13226 mode1
= insn_data
[icode
].operand
[1].mode
;
13227 mode2
= insn_data
[icode
].operand
[2].mode
;
13229 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13230 op0
= copy_to_mode_reg (mode1
, op0
);
13231 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13233 /* @@@ better error message */
13234 error ("mask must be an immediate");
13238 || GET_MODE (target
) != tmode
13239 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13240 target
= gen_reg_rtx (tmode
);
13241 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13247 case IX86_BUILTIN_PSLLDQI128
:
13248 case IX86_BUILTIN_PSRLDQI128
:
13249 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13250 : CODE_FOR_sse2_lshrti3
);
13251 arg0
= TREE_VALUE (arglist
);
13252 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13253 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13254 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13255 tmode
= insn_data
[icode
].operand
[0].mode
;
13256 mode1
= insn_data
[icode
].operand
[1].mode
;
13257 mode2
= insn_data
[icode
].operand
[2].mode
;
13259 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13261 op0
= copy_to_reg (op0
);
13262 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13264 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13266 error ("shift must be an immediate");
13269 target
= gen_reg_rtx (V2DImode
);
13270 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13276 case IX86_BUILTIN_FEMMS
:
13277 emit_insn (gen_femms ());
13280 case IX86_BUILTIN_PAVGUSB
:
13281 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13283 case IX86_BUILTIN_PF2ID
:
13284 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13286 case IX86_BUILTIN_PFACC
:
13287 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13289 case IX86_BUILTIN_PFADD
:
13290 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13292 case IX86_BUILTIN_PFCMPEQ
:
13293 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13295 case IX86_BUILTIN_PFCMPGE
:
13296 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13298 case IX86_BUILTIN_PFCMPGT
:
13299 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13301 case IX86_BUILTIN_PFMAX
:
13302 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13304 case IX86_BUILTIN_PFMIN
:
13305 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13307 case IX86_BUILTIN_PFMUL
:
13308 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13310 case IX86_BUILTIN_PFRCP
:
13311 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13313 case IX86_BUILTIN_PFRCPIT1
:
13314 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13316 case IX86_BUILTIN_PFRCPIT2
:
13317 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13319 case IX86_BUILTIN_PFRSQIT1
:
13320 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13322 case IX86_BUILTIN_PFRSQRT
:
13323 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13325 case IX86_BUILTIN_PFSUB
:
13326 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13328 case IX86_BUILTIN_PFSUBR
:
13329 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13331 case IX86_BUILTIN_PI2FD
:
13332 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13334 case IX86_BUILTIN_PMULHRW
:
13335 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13337 case IX86_BUILTIN_PF2IW
:
13338 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13340 case IX86_BUILTIN_PFNACC
:
13341 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13343 case IX86_BUILTIN_PFPNACC
:
13344 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13346 case IX86_BUILTIN_PI2FW
:
13347 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13349 case IX86_BUILTIN_PSWAPDSI
:
13350 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13352 case IX86_BUILTIN_PSWAPDSF
:
13353 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13355 case IX86_BUILTIN_SSE_ZERO
:
13356 target
= gen_reg_rtx (V4SFmode
);
13357 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
13360 case IX86_BUILTIN_MMX_ZERO
:
13361 target
= gen_reg_rtx (DImode
);
13362 emit_insn (gen_mmx_clrdi (target
));
13365 case IX86_BUILTIN_CLRTI
:
13366 target
= gen_reg_rtx (V2DImode
);
13367 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
13371 case IX86_BUILTIN_SQRTSD
:
13372 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13373 case IX86_BUILTIN_LOADAPD
:
13374 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13375 case IX86_BUILTIN_LOADUPD
:
13376 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13378 case IX86_BUILTIN_STOREAPD
:
13379 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13380 case IX86_BUILTIN_STOREUPD
:
13381 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13383 case IX86_BUILTIN_LOADSD
:
13384 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13386 case IX86_BUILTIN_STORESD
:
13387 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13389 case IX86_BUILTIN_SETPD1
:
13390 target
= assign_386_stack_local (DFmode
, 0);
13391 arg0
= TREE_VALUE (arglist
);
13392 emit_move_insn (adjust_address (target
, DFmode
, 0),
13393 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13394 op0
= gen_reg_rtx (V2DFmode
);
13395 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13396 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
13399 case IX86_BUILTIN_SETPD
:
13400 target
= assign_386_stack_local (V2DFmode
, 0);
13401 arg0
= TREE_VALUE (arglist
);
13402 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13403 emit_move_insn (adjust_address (target
, DFmode
, 0),
13404 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13405 emit_move_insn (adjust_address (target
, DFmode
, 8),
13406 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13407 op0
= gen_reg_rtx (V2DFmode
);
13408 emit_insn (gen_sse2_movapd (op0
, target
));
13411 case IX86_BUILTIN_LOADRPD
:
13412 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13413 gen_reg_rtx (V2DFmode
), 1);
13414 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
13417 case IX86_BUILTIN_LOADPD1
:
13418 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13419 gen_reg_rtx (V2DFmode
), 1);
13420 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13423 case IX86_BUILTIN_STOREPD1
:
13424 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13425 case IX86_BUILTIN_STORERPD
:
13426 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13428 case IX86_BUILTIN_CLRPD
:
13429 target
= gen_reg_rtx (V2DFmode
);
13430 emit_insn (gen_sse_clrv2df (target
));
13433 case IX86_BUILTIN_MFENCE
:
13434 emit_insn (gen_sse2_mfence ());
13436 case IX86_BUILTIN_LFENCE
:
13437 emit_insn (gen_sse2_lfence ());
13440 case IX86_BUILTIN_CLFLUSH
:
13441 arg0
= TREE_VALUE (arglist
);
13442 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13443 icode
= CODE_FOR_sse2_clflush
;
13444 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13445 op0
= copy_to_mode_reg (Pmode
, op0
);
13447 emit_insn (gen_sse2_clflush (op0
));
13450 case IX86_BUILTIN_MOVNTPD
:
13451 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13452 case IX86_BUILTIN_MOVNTDQ
:
13453 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13454 case IX86_BUILTIN_MOVNTI
:
13455 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13457 case IX86_BUILTIN_LOADDQA
:
13458 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
13459 case IX86_BUILTIN_LOADDQU
:
13460 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13461 case IX86_BUILTIN_LOADD
:
13462 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13464 case IX86_BUILTIN_STOREDQA
:
13465 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
13466 case IX86_BUILTIN_STOREDQU
:
13467 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13468 case IX86_BUILTIN_STORED
:
13469 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13471 case IX86_BUILTIN_MONITOR
:
13472 arg0
= TREE_VALUE (arglist
);
13473 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13474 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13475 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13476 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13477 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13479 op0
= copy_to_mode_reg (SImode
, op0
);
13481 op1
= copy_to_mode_reg (SImode
, op1
);
13483 op2
= copy_to_mode_reg (SImode
, op2
);
13484 emit_insn (gen_monitor (op0
, op1
, op2
));
13487 case IX86_BUILTIN_MWAIT
:
13488 arg0
= TREE_VALUE (arglist
);
13489 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13490 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13491 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13493 op0
= copy_to_mode_reg (SImode
, op0
);
13495 op1
= copy_to_mode_reg (SImode
, op1
);
13496 emit_insn (gen_mwait (op0
, op1
));
13499 case IX86_BUILTIN_LOADDDUP
:
13500 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
13502 case IX86_BUILTIN_LDDQU
:
13503 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
13510 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13511 if (d
->code
== fcode
)
13513 /* Compares are treated specially. */
13514 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13515 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13516 || d
->icode
== CODE_FOR_maskncmpv4sf3
13517 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13518 || d
->icode
== CODE_FOR_maskcmpv2df3
13519 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13520 || d
->icode
== CODE_FOR_maskncmpv2df3
13521 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13522 return ix86_expand_sse_compare (d
, arglist
, target
);
13524 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13527 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13528 if (d
->code
== fcode
)
13529 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13531 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13532 if (d
->code
== fcode
)
13533 return ix86_expand_sse_comi (d
, arglist
, target
);
13535 /* @@@ Should really do something sensible here. */
13539 /* Store OPERAND to the memory after reload is completed. This means
13540 that we can't easily use assign_stack_local. */
13542 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
13545 if (!reload_completed
)
13547 if (TARGET_RED_ZONE
)
13549 result
= gen_rtx_MEM (mode
,
13550 gen_rtx_PLUS (Pmode
,
13552 GEN_INT (-RED_ZONE_SIZE
)));
13553 emit_move_insn (result
, operand
);
13555 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
13561 operand
= gen_lowpart (DImode
, operand
);
13565 gen_rtx_SET (VOIDmode
,
13566 gen_rtx_MEM (DImode
,
13567 gen_rtx_PRE_DEC (DImode
,
13568 stack_pointer_rtx
)),
13574 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13583 split_di (&operand
, 1, operands
, operands
+ 1);
13585 gen_rtx_SET (VOIDmode
,
13586 gen_rtx_MEM (SImode
,
13587 gen_rtx_PRE_DEC (Pmode
,
13588 stack_pointer_rtx
)),
13591 gen_rtx_SET (VOIDmode
,
13592 gen_rtx_MEM (SImode
,
13593 gen_rtx_PRE_DEC (Pmode
,
13594 stack_pointer_rtx
)),
13599 /* It is better to store HImodes as SImodes. */
13600 if (!TARGET_PARTIAL_REG_STALL
)
13601 operand
= gen_lowpart (SImode
, operand
);
13605 gen_rtx_SET (VOIDmode
,
13606 gen_rtx_MEM (GET_MODE (operand
),
13607 gen_rtx_PRE_DEC (SImode
,
13608 stack_pointer_rtx
)),
13614 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13619 /* Free operand from the memory. */
13621 ix86_free_from_memory (enum machine_mode mode
)
13623 if (!TARGET_RED_ZONE
)
13627 if (mode
== DImode
|| TARGET_64BIT
)
13629 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13633 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13634 to pop or add instruction if registers are available. */
13635 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13636 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13641 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13642 QImode must go into class Q_REGS.
13643 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13644 movdf to do mem-to-mem moves through integer regs. */
13646 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
13648 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
13650 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13652 /* SSE can't load any constant directly yet. */
13653 if (SSE_CLASS_P (class))
13655 /* Floats can load 0 and 1. */
13656 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13658 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13659 if (MAYBE_SSE_CLASS_P (class))
13660 return (reg_class_subset_p (class, GENERAL_REGS
)
13661 ? GENERAL_REGS
: FLOAT_REGS
);
13665 /* General regs can load everything. */
13666 if (reg_class_subset_p (class, GENERAL_REGS
))
13667 return GENERAL_REGS
;
13668 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13669 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13672 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13674 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13679 /* If we are copying between general and FP registers, we need a memory
13680 location. The same is true for SSE and MMX registers.
13682 The macro can't work reliably when one of the CLASSES is class containing
13683 registers from multiple units (SSE, MMX, integer). We avoid this by never
13684 combining those units in single alternative in the machine description.
13685 Ensure that this constraint holds to avoid unexpected surprises.
13687 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13688 enforce these sanity checks. */
13690 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
13691 enum machine_mode mode
, int strict
)
13693 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13694 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13695 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13696 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13697 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13698 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13705 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13706 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13707 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
13708 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
13709 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
13711 /* Return the cost of moving data from a register in class CLASS1 to
13712 one in class CLASS2.
13714 It is not required that the cost always equal 2 when FROM is the same as TO;
13715 on some machines it is expensive to move between registers if they are not
13716 general registers. */
13718 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
13719 enum reg_class class2
)
13721 /* In case we require secondary memory, compute cost of the store followed
13722 by load. In order to avoid bad register allocation choices, we need
13723 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13725 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13729 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
13730 MEMORY_MOVE_COST (mode
, class1
, 1));
13731 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
13732 MEMORY_MOVE_COST (mode
, class2
, 1));
13734 /* In case of copying from general_purpose_register we may emit multiple
13735 stores followed by single load causing memory size mismatch stall.
13736 Count this as arbitrarily high cost of 20. */
13737 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13740 /* In the case of FP/MMX moves, the registers actually overlap, and we
13741 have to switch modes in order to treat them differently. */
13742 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
13743 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
13749 /* Moves between SSE/MMX and integer unit are expensive. */
13750 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13751 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13752 return ix86_cost
->mmxsse_to_integer
;
13753 if (MAYBE_FLOAT_CLASS_P (class1
))
13754 return ix86_cost
->fp_move
;
13755 if (MAYBE_SSE_CLASS_P (class1
))
13756 return ix86_cost
->sse_move
;
13757 if (MAYBE_MMX_CLASS_P (class1
))
13758 return ix86_cost
->mmx_move
;
13762 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13764 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
13766 /* Flags and only flags can only hold CCmode values. */
13767 if (CC_REGNO_P (regno
))
13768 return GET_MODE_CLASS (mode
) == MODE_CC
;
13769 if (GET_MODE_CLASS (mode
) == MODE_CC
13770 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13771 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13773 if (FP_REGNO_P (regno
))
13774 return VALID_FP_MODE_P (mode
);
13775 if (SSE_REGNO_P (regno
))
13776 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
13777 if (MMX_REGNO_P (regno
))
13779 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
13780 /* We handle both integer and floats in the general purpose registers.
13781 In future we should be able to handle vector modes as well. */
13782 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13784 /* Take care for QImode values - they can be in non-QI regs, but then
13785 they do cause partial register stalls. */
13786 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13788 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13791 /* Return the cost of moving data of mode M between a
13792 register and memory. A value of 2 is the default; this cost is
13793 relative to those in `REGISTER_MOVE_COST'.
13795 If moving between registers and memory is more expensive than
13796 between two registers, you should define this macro to express the
13799 Model also increased moving costs of QImode registers in non
13803 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
13805 if (FLOAT_CLASS_P (class))
13822 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13824 if (SSE_CLASS_P (class))
13827 switch (GET_MODE_SIZE (mode
))
13841 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13843 if (MMX_CLASS_P (class))
13846 switch (GET_MODE_SIZE (mode
))
13857 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13859 switch (GET_MODE_SIZE (mode
))
13863 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13864 : ix86_cost
->movzbl_load
);
13866 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13867 : ix86_cost
->int_store
[0] + 4);
13870 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13872 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13873 if (mode
== TFmode
)
13875 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13876 * (((int) GET_MODE_SIZE (mode
)
13877 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
13881 /* Compute a (partial) cost for rtx X. Return true if the complete
13882 cost has been computed, and false if subexpressions should be
13883 scanned. In either case, *TOTAL contains the cost result. */
13886 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
13888 enum machine_mode mode
= GET_MODE (x
);
13896 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
13898 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
13900 else if (flag_pic
&& SYMBOLIC_CONST (x
)
13902 || (!GET_CODE (x
) != LABEL_REF
13903 && (GET_CODE (x
) != SYMBOL_REF
13904 || !SYMBOL_REF_LOCAL_P (x
)))))
13911 if (mode
== VOIDmode
)
13914 switch (standard_80387_constant_p (x
))
13919 default: /* Other constants */
13924 /* Start with (MEM (SYMBOL_REF)), since that's where
13925 it'll probably end up. Add a penalty for size. */
13926 *total
= (COSTS_N_INSNS (1)
13927 + (flag_pic
!= 0 && !TARGET_64BIT
)
13928 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
13934 /* The zero extensions is often completely free on x86_64, so make
13935 it as cheap as possible. */
13936 if (TARGET_64BIT
&& mode
== DImode
13937 && GET_MODE (XEXP (x
, 0)) == SImode
)
13939 else if (TARGET_ZERO_EXTEND_WITH_AND
)
13940 *total
= COSTS_N_INSNS (ix86_cost
->add
);
13942 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
13946 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
13950 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
13951 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
13953 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
13956 *total
= COSTS_N_INSNS (ix86_cost
->add
);
13959 if ((value
== 2 || value
== 3)
13960 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
13962 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
13972 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
13974 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
13976 if (INTVAL (XEXP (x
, 1)) > 32)
13977 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
13979 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
13983 if (GET_CODE (XEXP (x
, 1)) == AND
)
13984 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
13986 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
13991 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
13992 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
13994 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
13999 if (FLOAT_MODE_P (mode
))
14001 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
14006 rtx op0
= XEXP (x
, 0);
14007 rtx op1
= XEXP (x
, 1);
14009 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14011 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14012 for (nbits
= 0; value
!= 0; value
&= value
- 1)
14016 /* This is arbitrary. */
14019 /* Compute costs correctly for widening multiplication. */
14020 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
14021 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
14022 == GET_MODE_SIZE (mode
))
14024 int is_mulwiden
= 0;
14025 enum machine_mode inner_mode
= GET_MODE (op0
);
14027 if (GET_CODE (op0
) == GET_CODE (op1
))
14028 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
14029 else if (GET_CODE (op1
) == CONST_INT
)
14031 if (GET_CODE (op0
) == SIGN_EXTEND
)
14032 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
14035 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
14039 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
14042 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14043 + nbits
* ix86_cost
->mult_bit
)
14044 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
14053 if (FLOAT_MODE_P (mode
))
14054 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
14056 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
14060 if (FLOAT_MODE_P (mode
))
14061 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14062 else if (GET_MODE_CLASS (mode
) == MODE_INT
14063 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
14065 if (GET_CODE (XEXP (x
, 0)) == PLUS
14066 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
14067 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
14068 && CONSTANT_P (XEXP (x
, 1)))
14070 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
14071 if (val
== 2 || val
== 4 || val
== 8)
14073 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14074 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14075 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
14077 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14081 else if (GET_CODE (XEXP (x
, 0)) == MULT
14082 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
14084 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
14085 if (val
== 2 || val
== 4 || val
== 8)
14087 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14088 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14089 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14093 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14095 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14096 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14097 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14098 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14105 if (FLOAT_MODE_P (mode
))
14107 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14115 if (!TARGET_64BIT
&& mode
== DImode
)
14117 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
14118 + (rtx_cost (XEXP (x
, 0), outer_code
)
14119 << (GET_MODE (XEXP (x
, 0)) != DImode
))
14120 + (rtx_cost (XEXP (x
, 1), outer_code
)
14121 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
14127 if (FLOAT_MODE_P (mode
))
14129 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
14135 if (!TARGET_64BIT
&& mode
== DImode
)
14136 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
14138 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14142 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
14147 if (FLOAT_MODE_P (mode
))
14148 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
14152 if (FLOAT_MODE_P (mode
))
14153 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
14157 if (XINT (x
, 1) == UNSPEC_TP
)
14166 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14168 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
14171 fputs ("\tpushl $", asm_out_file
);
14172 assemble_name (asm_out_file
, XSTR (symbol
, 0));
14173 fputc ('\n', asm_out_file
);
14179 static int current_machopic_label_num
;
14181 /* Given a symbol name and its associated stub, write out the
14182 definition of the stub. */
14185 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
14187 unsigned int length
;
14188 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
14189 int label
= ++current_machopic_label_num
;
14191 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14192 symb
= (*targetm
.strip_name_encoding
) (symb
);
14194 length
= strlen (stub
);
14195 binder_name
= alloca (length
+ 32);
14196 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
14198 length
= strlen (symb
);
14199 symbol_name
= alloca (length
+ 32);
14200 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
14202 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14205 machopic_picsymbol_stub_section ();
14207 machopic_symbol_stub_section ();
14209 fprintf (file
, "%s:\n", stub
);
14210 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14214 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14215 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14216 fprintf (file
, "\tjmp %%edx\n");
14219 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14221 fprintf (file
, "%s:\n", binder_name
);
14225 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14226 fprintf (file
, "\tpushl %%eax\n");
14229 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14231 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14233 machopic_lazy_symbol_ptr_section ();
14234 fprintf (file
, "%s:\n", lazy_ptr_name
);
14235 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14236 fprintf (file
, "\t.long %s\n", binder_name
);
14238 #endif /* TARGET_MACHO */
14240 /* Order the registers for register allocator. */
14243 x86_order_regs_for_local_alloc (void)
14248 /* First allocate the local general purpose registers. */
14249 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14250 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14251 reg_alloc_order
[pos
++] = i
;
14253 /* Global general purpose registers. */
14254 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14255 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14256 reg_alloc_order
[pos
++] = i
;
14258 /* x87 registers come first in case we are doing FP math
14260 if (!TARGET_SSE_MATH
)
14261 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14262 reg_alloc_order
[pos
++] = i
;
14264 /* SSE registers. */
14265 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14266 reg_alloc_order
[pos
++] = i
;
14267 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14268 reg_alloc_order
[pos
++] = i
;
14270 /* x87 registers. */
14271 if (TARGET_SSE_MATH
)
14272 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14273 reg_alloc_order
[pos
++] = i
;
14275 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14276 reg_alloc_order
[pos
++] = i
;
14278 /* Initialize the rest of array as we do not allocate some registers
14280 while (pos
< FIRST_PSEUDO_REGISTER
)
14281 reg_alloc_order
[pos
++] = 0;
14284 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14285 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14288 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14289 struct attribute_spec.handler. */
14291 ix86_handle_struct_attribute (tree
*node
, tree name
,
14292 tree args ATTRIBUTE_UNUSED
,
14293 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
14296 if (DECL_P (*node
))
14298 if (TREE_CODE (*node
) == TYPE_DECL
)
14299 type
= &TREE_TYPE (*node
);
14304 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
14305 || TREE_CODE (*type
) == UNION_TYPE
)))
14307 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
14308 *no_add_attrs
= true;
14311 else if ((is_attribute_p ("ms_struct", name
)
14312 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
14313 || ((is_attribute_p ("gcc_struct", name
)
14314 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
14316 warning ("`%s' incompatible attribute ignored",
14317 IDENTIFIER_POINTER (name
));
14318 *no_add_attrs
= true;
14325 ix86_ms_bitfield_layout_p (tree record_type
)
14327 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
14328 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
14329 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
14332 /* Returns an expression indicating where the this parameter is
14333 located on entry to the FUNCTION. */
14336 x86_this_parameter (tree function
)
14338 tree type
= TREE_TYPE (function
);
14342 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
14343 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14346 if (ix86_function_regparm (type
, function
) > 0)
14350 parm
= TYPE_ARG_TYPES (type
);
14351 /* Figure out whether or not the function has a variable number of
14353 for (; parm
; parm
= TREE_CHAIN (parm
))
14354 if (TREE_VALUE (parm
) == void_type_node
)
14356 /* If not, the this parameter is in the first argument. */
14360 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
14362 return gen_rtx_REG (SImode
, regno
);
14366 if (aggregate_value_p (TREE_TYPE (type
), type
))
14367 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14369 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14372 /* Determine whether x86_output_mi_thunk can succeed. */
14375 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
14376 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
14377 HOST_WIDE_INT vcall_offset
, tree function
)
14379 /* 64-bit can handle anything. */
14383 /* For 32-bit, everything's fine if we have one free register. */
14384 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
14387 /* Need a free register for vcall_offset. */
14391 /* Need a free register for GOT references. */
14392 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14395 /* Otherwise ok. */
14399 /* Output the assembler code for a thunk function. THUNK_DECL is the
14400 declaration for the thunk function itself, FUNCTION is the decl for
14401 the target function. DELTA is an immediate constant offset to be
14402 added to THIS. If VCALL_OFFSET is nonzero, the word at
14403 *(*this + vcall_offset) should be added to THIS. */
14406 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
14407 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
14408 HOST_WIDE_INT vcall_offset
, tree function
)
14411 rtx
this = x86_this_parameter (function
);
14414 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14415 pull it in now and let DELTA benefit. */
14418 else if (vcall_offset
)
14420 /* Put the this parameter into %eax. */
14422 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14423 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14426 this_reg
= NULL_RTX
;
14428 /* Adjust the this parameter by a fixed constant. */
14431 xops
[0] = GEN_INT (delta
);
14432 xops
[1] = this_reg
? this_reg
: this;
14435 if (!x86_64_general_operand (xops
[0], DImode
))
14437 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14439 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14443 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14446 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14449 /* Adjust the this parameter by a value stored in the vtable. */
14453 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14456 int tmp_regno
= 2 /* ECX */;
14457 if (lookup_attribute ("fastcall",
14458 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
14459 tmp_regno
= 0 /* EAX */;
14460 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
14463 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14466 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14468 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14470 /* Adjust the this parameter. */
14471 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14472 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14474 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14475 xops
[0] = GEN_INT (vcall_offset
);
14477 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14478 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14480 xops
[1] = this_reg
;
14482 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14484 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14487 /* If necessary, drop THIS back to its stack slot. */
14488 if (this_reg
&& this_reg
!= this)
14490 xops
[0] = this_reg
;
14492 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14495 xops
[0] = XEXP (DECL_RTL (function
), 0);
14498 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14499 output_asm_insn ("jmp\t%P0", xops
);
14502 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
14503 tmp
= gen_rtx_CONST (Pmode
, tmp
);
14504 tmp
= gen_rtx_MEM (QImode
, tmp
);
14506 output_asm_insn ("jmp\t%A0", xops
);
14511 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14512 output_asm_insn ("jmp\t%P0", xops
);
14517 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
14518 tmp
= (gen_rtx_SYMBOL_REF
14520 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
14521 tmp
= gen_rtx_MEM (QImode
, tmp
);
14523 output_asm_insn ("jmp\t%0", xops
);
14526 #endif /* TARGET_MACHO */
14528 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14529 output_set_got (tmp
);
14532 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
14533 output_asm_insn ("jmp\t{*}%1", xops
);
14539 x86_file_start (void)
14541 default_file_start ();
14542 if (X86_FILE_START_VERSION_DIRECTIVE
)
14543 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
14544 if (X86_FILE_START_FLTUSED
)
14545 fputs ("\t.global\t__fltused\n", asm_out_file
);
14546 if (ix86_asm_dialect
== ASM_INTEL
)
14547 fputs ("\t.intel_syntax\n", asm_out_file
);
14551 x86_field_alignment (tree field
, int computed
)
14553 enum machine_mode mode
;
14554 tree type
= TREE_TYPE (field
);
14556 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
14558 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
14559 ? get_inner_array_type (type
) : type
);
14560 if (mode
== DFmode
|| mode
== DCmode
14561 || GET_MODE_CLASS (mode
) == MODE_INT
14562 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
14563 return MIN (32, computed
);
14567 /* Output assembler code to FILE to increment profiler label # LABELNO
14568 for profiling a function entry. */
14570 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
14575 #ifndef NO_PROFILE_COUNTERS
14576 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
14578 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
14582 #ifndef NO_PROFILE_COUNTERS
14583 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
14585 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14589 #ifndef NO_PROFILE_COUNTERS
14590 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14591 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
14593 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
14597 #ifndef NO_PROFILE_COUNTERS
14598 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
14599 PROFILE_COUNT_REGISTER
);
14601 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14605 /* We don't have exact information about the insn sizes, but we may assume
14606 quite safely that we are informed about all 1 byte insns and memory
14607 address sizes. This is enough to eliminate unnecessary padding in
14611 min_insn_size (rtx insn
)
14615 if (!INSN_P (insn
) || !active_insn_p (insn
))
14618 /* Discard alignments we've emit and jump instructions. */
14619 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
14620 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
14622 if (GET_CODE (insn
) == JUMP_INSN
14623 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
14624 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
14627 /* Important case - calls are always 5 bytes.
14628 It is common to have many calls in the row. */
14629 if (GET_CODE (insn
) == CALL_INSN
14630 && symbolic_reference_mentioned_p (PATTERN (insn
))
14631 && !SIBLING_CALL_P (insn
))
14633 if (get_attr_length (insn
) <= 1)
14636 /* For normal instructions we may rely on the sizes of addresses
14637 and the presence of symbol to require 4 bytes of encoding.
14638 This is not the case for jumps where references are PC relative. */
14639 if (GET_CODE (insn
) != JUMP_INSN
)
14641 l
= get_attr_length_address (insn
);
14642 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
14651 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14655 ix86_avoid_jump_misspredicts (void)
14657 rtx insn
, start
= get_insns ();
14658 int nbytes
= 0, njumps
= 0;
14661 /* Look for all minimal intervals of instructions containing 4 jumps.
14662 The intervals are bounded by START and INSN. NBYTES is the total
14663 size of instructions in the interval including INSN and not including
14664 START. When the NBYTES is smaller than 16 bytes, it is possible
14665 that the end of START and INSN ends up in the same 16byte page.
14667 The smallest offset in the page INSN can start is the case where START
14668 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14669 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14671 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14674 nbytes
+= min_insn_size (insn
);
14676 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
14677 INSN_UID (insn
), min_insn_size (insn
));
14678 if ((GET_CODE (insn
) == JUMP_INSN
14679 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
14680 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
14681 || GET_CODE (insn
) == CALL_INSN
)
14688 start
= NEXT_INSN (start
);
14689 if ((GET_CODE (start
) == JUMP_INSN
14690 && GET_CODE (PATTERN (start
)) != ADDR_VEC
14691 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
14692 || GET_CODE (start
) == CALL_INSN
)
14693 njumps
--, isjump
= 1;
14696 nbytes
-= min_insn_size (start
);
14701 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
14702 INSN_UID (start
), INSN_UID (insn
), nbytes
);
14704 if (njumps
== 3 && isjump
&& nbytes
< 16)
14706 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
14709 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
14710 INSN_UID (insn
), padsize
);
14711 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
14716 /* AMD Athlon works faster
14717 when RET is not destination of conditional jump or directly preceded
14718 by other jump instruction. We avoid the penalty by inserting NOP just
14719 before the RET instructions in such cases. */
14721 ix86_pad_returns (void)
14725 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
14727 basic_block bb
= e
->src
;
14728 rtx ret
= BB_END (bb
);
14730 bool replace
= false;
14732 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
14733 || !maybe_hot_bb_p (bb
))
14735 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
14736 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
14738 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
14741 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
14742 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
14743 && !(e
->flags
& EDGE_FALLTHRU
))
14748 prev
= prev_active_insn (ret
);
14750 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
14751 || GET_CODE (prev
) == CALL_INSN
))
14753 /* Empty functions get branch mispredict even when the jump destination
14754 is not visible to us. */
14755 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
14760 emit_insn_before (gen_return_internal_long (), ret
);
14766 /* Implement machine specific optimizations. We implement padding of returns
14767 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14771 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
14772 ix86_pad_returns ();
14773 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
14774 ix86_avoid_jump_misspredicts ();
14777 /* Return nonzero when QImode register that must be represented via REX prefix
14780 x86_extended_QIreg_mentioned_p (rtx insn
)
14783 extract_insn_cached (insn
);
14784 for (i
= 0; i
< recog_data
.n_operands
; i
++)
14785 if (REG_P (recog_data
.operand
[i
])
14786 && REGNO (recog_data
.operand
[i
]) >= 4)
14791 /* Return nonzero when P points to register encoded via REX prefix.
14792 Called via for_each_rtx. */
14794 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
14796 unsigned int regno
;
14799 regno
= REGNO (*p
);
14800 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
14803 /* Return true when INSN mentions register that must be encoded using REX
14806 x86_extended_reg_mentioned_p (rtx insn
)
14808 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
14811 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
14812 optabs would emit if we didn't have TFmode patterns. */
14815 x86_emit_floatuns (rtx operands
[2])
14817 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
14818 enum machine_mode mode
, inmode
;
14820 inmode
= GET_MODE (operands
[1]);
14821 if (inmode
!= SImode
14822 && inmode
!= DImode
)
14826 in
= force_reg (inmode
, operands
[1]);
14827 mode
= GET_MODE (out
);
14828 neglab
= gen_label_rtx ();
14829 donelab
= gen_label_rtx ();
14830 i1
= gen_reg_rtx (Pmode
);
14831 f0
= gen_reg_rtx (mode
);
14833 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
14835 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
14836 emit_jump_insn (gen_jump (donelab
));
14839 emit_label (neglab
);
14841 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
14842 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
14843 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
14844 expand_float (f0
, i0
, 0);
14845 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
14847 emit_label (donelab
);
14850 /* Initialize vector TARGET via VALS. */
14852 ix86_expand_vector_init (rtx target
, rtx vals
)
14854 enum machine_mode mode
= GET_MODE (target
);
14855 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
14856 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
14859 for (i
= n_elts
- 1; i
>= 0; i
--)
14860 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
14861 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
14864 /* Few special cases first...
14865 ... constants are best loaded from constant pool. */
14868 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
14872 /* ... values where only first field is non-constant are best loaded
14873 from the pool and overwritten via move later. */
14876 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
14877 GET_MODE_INNER (mode
), 0);
14879 op
= force_reg (mode
, op
);
14880 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
14881 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
14882 switch (GET_MODE (target
))
14885 emit_insn (gen_sse2_movsd (target
, target
, op
));
14888 emit_insn (gen_sse_movss (target
, target
, op
));
14896 /* And the busy sequence doing rotations. */
14897 switch (GET_MODE (target
))
14902 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
14904 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
14906 vecop0
= force_reg (V2DFmode
, vecop0
);
14907 vecop1
= force_reg (V2DFmode
, vecop1
);
14908 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
14914 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
14916 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
14918 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
14920 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
14921 rtx tmp1
= gen_reg_rtx (V4SFmode
);
14922 rtx tmp2
= gen_reg_rtx (V4SFmode
);
14924 vecop0
= force_reg (V4SFmode
, vecop0
);
14925 vecop1
= force_reg (V4SFmode
, vecop1
);
14926 vecop2
= force_reg (V4SFmode
, vecop2
);
14927 vecop3
= force_reg (V4SFmode
, vecop3
);
14928 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
14929 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
14930 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
14938 /* Implements target hook vector_mode_supported_p. */
14940 ix86_vector_mode_supported_p (enum machine_mode mode
)
14943 && VALID_SSE_REG_MODE (mode
))
14946 else if (TARGET_MMX
14947 && VALID_MMX_REG_MODE (mode
))
14950 else if (TARGET_3DNOW
14951 && VALID_MMX_REG_MODE_3DNOW (mode
))
14958 /* Worker function for TARGET_MD_ASM_CLOBBERS.
14960 We do this in the new i386 backend to maintain source compatibility
14961 with the old cc0-based compiler. */
14964 ix86_md_asm_clobbers (tree clobbers
)
14966 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
14968 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
14970 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
14975 /* Worker function for REVERSE_CONDITION. */
14978 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
14980 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
14981 ? reverse_condition (code
)
14982 : reverse_condition_maybe_unordered (code
));
14985 /* Output code to perform an x87 FP register move, from OPERANDS[1]
14989 output_387_reg_move (rtx insn
, rtx
*operands
)
14991 if (REG_P (operands
[1])
14992 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14994 if (REGNO (operands
[0]) == FIRST_STACK_REG
14995 && TARGET_USE_FFREEP
)
14996 return "ffreep\t%y0";
14997 return "fstp\t%y0";
14999 if (STACK_TOP_P (operands
[0]))
15000 return "fld%z1\t%y1";
15004 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15005 FP status register is set. */
15008 ix86_emit_fp_unordered_jump (rtx label
)
15010 rtx reg
= gen_reg_rtx (HImode
);
15013 emit_insn (gen_x86_fnstsw_1 (reg
));
15015 if (TARGET_USE_SAHF
)
15017 emit_insn (gen_x86_sahf_1 (reg
));
15019 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
15020 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
15024 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
15026 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15027 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
15030 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
15031 gen_rtx_LABEL_REF (VOIDmode
, label
),
15033 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
15034 emit_jump_insn (temp
);
15037 /* Output code to perform a log1p XFmode calculation. */
15039 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
15041 rtx label1
= gen_label_rtx ();
15042 rtx label2
= gen_label_rtx ();
15044 rtx tmp
= gen_reg_rtx (XFmode
);
15045 rtx tmp2
= gen_reg_rtx (XFmode
);
15047 emit_insn (gen_absxf2 (tmp
, op1
));
15048 emit_insn (gen_cmpxf (tmp
,
15049 CONST_DOUBLE_FROM_REAL_VALUE (
15050 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
15052 emit_jump_insn (gen_bge (label1
));
15054 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15055 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
15056 emit_jump (label2
);
15058 emit_label (label1
);
15059 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
15060 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
15061 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15062 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
15064 emit_label (label2
);
15067 #include "gt-i386.h"