1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_3dnow_a
= m_ATHLON_K8
;
529 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
530 const int x86_branch_hints
= m_PENT4
| m_NOCONA
;
531 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
532 const int x86_partial_reg_stall
= m_PPRO
;
533 const int x86_use_loop
= m_K6
;
534 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
535 const int x86_use_mov0
= m_K6
;
536 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
537 const int x86_read_modify_write
= ~m_PENT
;
538 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
539 const int x86_split_long_moves
= m_PPRO
;
540 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
541 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
542 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
543 const int x86_qimode_math
= ~(0);
544 const int x86_promote_qi_regs
= 0;
545 const int x86_himode_math
= ~(m_PPRO
);
546 const int x86_promote_hi_regs
= m_PPRO
;
547 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
548 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
549 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
550 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
551 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
552 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
553 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
554 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
555 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
556 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
557 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
558 const int x86_shift1
= ~m_486
;
559 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
560 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs
= m_ATHLON_K8
;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
568 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
569 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
570 const int x86_use_ffreep
= m_ATHLON_K8
;
571 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
572 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
573 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
585 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
586 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
594 AREG
, DREG
, CREG
, BREG
,
596 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
598 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
599 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
604 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
606 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
608 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
609 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
610 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers
[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
633 static int const x86_64_int_return_registers
[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0
= NULL_RTX
;
719 rtx ix86_compare_op1
= NULL_RTX
;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry
GTY(())
732 struct stack_local_entry
*next
;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size
;
764 HOST_WIDE_INT to_allocate
;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset
;
767 HOST_WIDE_INT hard_frame_pointer_offset
;
768 HOST_WIDE_INT stack_pointer_offset
;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov
;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string
;
780 enum cmodel ix86_cmodel
;
782 const char *ix86_asm_string
;
783 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
785 const char *ix86_tls_dialect_string
;
786 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath
;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune
;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch
;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string
; /* for -mtune=<xxx> */
798 const char *ix86_arch_string
; /* for -march=<xxx> */
799 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string
;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse
;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string
;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string
;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string
;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary
;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost
;
826 const char *ix86_branch_cost_string
;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string
;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 char internal_label_prefix
[16];
833 int internal_label_prefix_len
;
835 static void output_pic_addr_const (FILE *, rtx
, int);
836 static void put_condition_code (enum rtx_code
, enum machine_mode
,
838 static const char *get_some_local_dynamic_name (void);
839 static int get_some_local_dynamic_name_1 (rtx
*, void *);
840 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
841 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
843 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
844 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
846 static rtx
get_thread_pointer (int);
847 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
848 static void get_pc_thunk_name (char [32], unsigned int);
849 static rtx
gen_push (rtx
);
850 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
851 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
852 static struct machine_function
* ix86_init_machine_status (void);
853 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
854 static int ix86_nsaved_regs (void);
855 static void ix86_emit_save_regs (void);
856 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
857 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
858 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
859 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
860 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
861 static rtx
ix86_expand_aligntest (rtx
, int);
862 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
863 static int ix86_issue_rate (void);
864 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
865 static int ia32_multipass_dfa_lookahead (void);
866 static void ix86_init_mmx_sse_builtins (void);
867 static rtx
x86_this_parameter (tree
);
868 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
869 HOST_WIDE_INT
, tree
);
870 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
871 static void x86_file_start (void);
872 static void ix86_reorg (void);
873 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
874 static tree
ix86_build_builtin_va_list (void);
875 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
877 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
879 static int ix86_address_cost (rtx
);
880 static bool ix86_cannot_force_const_mem (rtx
);
881 static rtx
ix86_delegitimize_address (rtx
);
883 struct builtin_description
;
884 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
886 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
888 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
889 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
890 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
891 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
892 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
893 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
894 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
895 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
896 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
897 static int ix86_fp_comparison_cost (enum rtx_code code
);
898 static unsigned int ix86_select_alt_pic_regnum (void);
899 static int ix86_save_reg (unsigned int, int);
900 static void ix86_compute_frame_layout (struct ix86_frame
*);
901 static int ix86_comp_type_attributes (tree
, tree
);
902 static int ix86_function_regparm (tree
, tree
);
903 const struct attribute_spec ix86_attribute_table
[];
904 static bool ix86_function_ok_for_sibcall (tree
, tree
);
905 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
906 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
907 static int ix86_value_regno (enum machine_mode
);
908 static bool contains_128bit_aligned_vector_p (tree
);
909 static rtx
ix86_struct_value_rtx (tree
, int);
910 static bool ix86_ms_bitfield_layout_p (tree
);
911 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
912 static int extended_reg_mentioned_1 (rtx
*, void *);
913 static bool ix86_rtx_costs (rtx
, int, int, int *);
914 static int min_insn_size (rtx
);
915 static tree
ix86_md_asm_clobbers (tree clobbers
);
916 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
917 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
920 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
921 static void ix86_svr3_asm_out_constructor (rtx
, int);
924 /* Register class used for passing given 64bit part of the argument.
925 These represent classes as documented by the PS ABI, with the exception
926 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
927 use SF or DFmode move instead of DImode to avoid reformatting penalties.
929 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
930 whenever possible (upper half does contain padding).
932 enum x86_64_reg_class
935 X86_64_INTEGER_CLASS
,
936 X86_64_INTEGERSI_CLASS
,
945 static const char * const x86_64_reg_class_name
[] =
946 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
948 #define MAX_CLASSES 4
949 static int classify_argument (enum machine_mode
, tree
,
950 enum x86_64_reg_class
[MAX_CLASSES
], int);
951 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
952 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
954 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
955 enum x86_64_reg_class
);
957 /* Table of constants used by fldpi, fldln2, etc.... */
958 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
959 static bool ext_80387_constants_init
= 0;
960 static void init_ext_80387_constants (void);
962 /* Initialize the GCC target structure. */
963 #undef TARGET_ATTRIBUTE_TABLE
964 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
965 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
966 # undef TARGET_MERGE_DECL_ATTRIBUTES
967 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
970 #undef TARGET_COMP_TYPE_ATTRIBUTES
971 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973 #undef TARGET_INIT_BUILTINS
974 #define TARGET_INIT_BUILTINS ix86_init_builtins
976 #undef TARGET_EXPAND_BUILTIN
977 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979 #undef TARGET_ASM_FUNCTION_EPILOGUE
980 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
982 #undef TARGET_ASM_OPEN_PAREN
983 #define TARGET_ASM_OPEN_PAREN ""
984 #undef TARGET_ASM_CLOSE_PAREN
985 #define TARGET_ASM_CLOSE_PAREN ""
987 #undef TARGET_ASM_ALIGNED_HI_OP
988 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
989 #undef TARGET_ASM_ALIGNED_SI_OP
990 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
992 #undef TARGET_ASM_ALIGNED_DI_OP
993 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
996 #undef TARGET_ASM_UNALIGNED_HI_OP
997 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
998 #undef TARGET_ASM_UNALIGNED_SI_OP
999 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1000 #undef TARGET_ASM_UNALIGNED_DI_OP
1001 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1003 #undef TARGET_SCHED_ADJUST_COST
1004 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1005 #undef TARGET_SCHED_ISSUE_RATE
1006 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1007 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1008 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1009 ia32_multipass_dfa_lookahead
1011 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1012 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1015 #undef TARGET_HAVE_TLS
1016 #define TARGET_HAVE_TLS true
1018 #undef TARGET_CANNOT_FORCE_CONST_MEM
1019 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1021 #undef TARGET_DELEGITIMIZE_ADDRESS
1022 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1024 #undef TARGET_MS_BITFIELD_LAYOUT_P
1025 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1027 #undef TARGET_ASM_OUTPUT_MI_THUNK
1028 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1029 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1030 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1032 #undef TARGET_ASM_FILE_START
1033 #define TARGET_ASM_FILE_START x86_file_start
1035 #undef TARGET_RTX_COSTS
1036 #define TARGET_RTX_COSTS ix86_rtx_costs
1037 #undef TARGET_ADDRESS_COST
1038 #define TARGET_ADDRESS_COST ix86_address_cost
1040 #undef TARGET_FIXED_CONDITION_CODE_REGS
1041 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1042 #undef TARGET_CC_MODES_COMPATIBLE
1043 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1045 #undef TARGET_MACHINE_DEPENDENT_REORG
1046 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1048 #undef TARGET_BUILD_BUILTIN_VA_LIST
1049 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1051 #undef TARGET_MD_ASM_CLOBBERS
1052 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1054 #undef TARGET_PROMOTE_PROTOTYPES
1055 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1056 #undef TARGET_STRUCT_VALUE_RTX
1057 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1058 #undef TARGET_SETUP_INCOMING_VARARGS
1059 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1060 #undef TARGET_MUST_PASS_IN_STACK
1061 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1062 #undef TARGET_PASS_BY_REFERENCE
1063 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1065 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1066 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1068 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1069 #undef TARGET_INSERT_ATTRIBUTES
1070 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1073 struct gcc_target targetm
= TARGET_INITIALIZER
;
1076 /* The svr4 ABI for the i386 says that records and unions are returned
1078 #ifndef DEFAULT_PCC_STRUCT_RETURN
1079 #define DEFAULT_PCC_STRUCT_RETURN 1
1082 /* Sometimes certain combinations of command options do not make
1083 sense on a particular target machine. You can define a macro
1084 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1085 defined, is executed once just after all the command options have
1088 Don't use this macro to turn on various extra optimizations for
1089 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1092 override_options (void)
1095 int ix86_tune_defaulted
= 0;
1097 /* Comes from final.c -- no real reason to change it. */
1098 #define MAX_CODE_ALIGN 16
1102 const struct processor_costs
*cost
; /* Processor costs */
1103 const int target_enable
; /* Target flags to enable. */
1104 const int target_disable
; /* Target flags to disable. */
1105 const int align_loop
; /* Default alignments. */
1106 const int align_loop_max_skip
;
1107 const int align_jump
;
1108 const int align_jump_max_skip
;
1109 const int align_func
;
1111 const processor_target_table
[PROCESSOR_max
] =
1113 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1114 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1115 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1116 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1117 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1118 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1120 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1121 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1124 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1127 const char *const name
; /* processor name or nickname. */
1128 const enum processor_type processor
;
1129 const enum pta_flags
1135 PTA_PREFETCH_SSE
= 16,
1141 const processor_alias_table
[] =
1143 {"i386", PROCESSOR_I386
, 0},
1144 {"i486", PROCESSOR_I486
, 0},
1145 {"i586", PROCESSOR_PENTIUM
, 0},
1146 {"pentium", PROCESSOR_PENTIUM
, 0},
1147 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1148 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1149 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1150 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1151 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1152 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1153 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1154 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1155 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1156 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1157 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1158 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1159 | PTA_MMX
| PTA_PREFETCH_SSE
},
1160 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1161 | PTA_MMX
| PTA_PREFETCH_SSE
},
1162 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1163 | PTA_MMX
| PTA_PREFETCH_SSE
},
1164 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1165 | PTA_MMX
| PTA_PREFETCH_SSE
},
1166 {"k6", PROCESSOR_K6
, PTA_MMX
},
1167 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1168 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1169 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1171 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1172 | PTA_3DNOW
| PTA_3DNOW_A
},
1173 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1174 | PTA_3DNOW_A
| PTA_SSE
},
1175 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1176 | PTA_3DNOW_A
| PTA_SSE
},
1177 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1178 | PTA_3DNOW_A
| PTA_SSE
},
1179 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1180 | PTA_SSE
| PTA_SSE2
},
1181 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1182 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1183 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1184 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1185 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1186 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1187 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1188 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1191 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1193 /* Set the default values for switches whose default depends on TARGET_64BIT
1194 in case they weren't overwritten by command line options. */
1197 if (flag_omit_frame_pointer
== 2)
1198 flag_omit_frame_pointer
= 1;
1199 if (flag_asynchronous_unwind_tables
== 2)
1200 flag_asynchronous_unwind_tables
= 1;
1201 if (flag_pcc_struct_return
== 2)
1202 flag_pcc_struct_return
= 0;
1206 if (flag_omit_frame_pointer
== 2)
1207 flag_omit_frame_pointer
= 0;
1208 if (flag_asynchronous_unwind_tables
== 2)
1209 flag_asynchronous_unwind_tables
= 0;
1210 if (flag_pcc_struct_return
== 2)
1211 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1214 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1215 SUBTARGET_OVERRIDE_OPTIONS
;
1218 if (!ix86_tune_string
&& ix86_arch_string
)
1219 ix86_tune_string
= ix86_arch_string
;
1220 if (!ix86_tune_string
)
1222 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1223 ix86_tune_defaulted
= 1;
1225 if (!ix86_arch_string
)
1226 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1228 if (ix86_cmodel_string
!= 0)
1230 if (!strcmp (ix86_cmodel_string
, "small"))
1231 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1234 else if (!strcmp (ix86_cmodel_string
, "32"))
1235 ix86_cmodel
= CM_32
;
1236 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1237 ix86_cmodel
= CM_KERNEL
;
1238 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1239 ix86_cmodel
= CM_MEDIUM
;
1240 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1241 ix86_cmodel
= CM_LARGE
;
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1247 ix86_cmodel
= CM_32
;
1249 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1251 if (ix86_asm_string
!= 0)
1253 if (!strcmp (ix86_asm_string
, "intel"))
1254 ix86_asm_dialect
= ASM_INTEL
;
1255 else if (!strcmp (ix86_asm_string
, "att"))
1256 ix86_asm_dialect
= ASM_ATT
;
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1260 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1263 if (ix86_cmodel
== CM_LARGE
)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags
& MASK_64BIT
) ? 64 : 32);
1269 for (i
= 0; i
< pta_size
; i
++)
1270 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1272 ix86_arch
= processor_alias_table
[i
].processor
;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune
= ix86_arch
;
1275 if (processor_alias_table
[i
].flags
& PTA_MMX
1276 && !(target_flags_explicit
& MASK_MMX
))
1277 target_flags
|= MASK_MMX
;
1278 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1279 && !(target_flags_explicit
& MASK_3DNOW
))
1280 target_flags
|= MASK_3DNOW
;
1281 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1282 && !(target_flags_explicit
& MASK_3DNOW_A
))
1283 target_flags
|= MASK_3DNOW_A
;
1284 if (processor_alias_table
[i
].flags
& PTA_SSE
1285 && !(target_flags_explicit
& MASK_SSE
))
1286 target_flags
|= MASK_SSE
;
1287 if (processor_alias_table
[i
].flags
& PTA_SSE2
1288 && !(target_flags_explicit
& MASK_SSE2
))
1289 target_flags
|= MASK_SSE2
;
1290 if (processor_alias_table
[i
].flags
& PTA_SSE3
1291 && !(target_flags_explicit
& MASK_SSE3
))
1292 target_flags
|= MASK_SSE3
;
1293 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1294 x86_prefetch_sse
= true;
1295 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1297 if (ix86_tune_defaulted
)
1299 ix86_tune_string
= "x86-64";
1300 for (i
= 0; i
< pta_size
; i
++)
1301 if (! strcmp (ix86_tune_string
,
1302 processor_alias_table
[i
].name
))
1304 ix86_tune
= processor_alias_table
[i
].processor
;
1307 error ("CPU you selected does not support x86-64 "
1314 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1316 for (i
= 0; i
< pta_size
; i
++)
1317 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1319 ix86_tune
= processor_alias_table
[i
].processor
;
1320 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1321 error ("CPU you selected does not support x86-64 instruction set");
1323 /* Intel CPUs have always interpreted SSE prefetch instructions as
1324 NOPs; so, we can enable SSE prefetch instructions even when
1325 -mtune (rather than -march) points us to a processor that has them.
1326 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1327 higher processors. */
1328 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1329 x86_prefetch_sse
= true;
1333 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1336 ix86_cost
= &size_cost
;
1338 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1339 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1340 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1342 /* Arrange to set up i386_stack_locals for all functions. */
1343 init_machine_status
= ix86_init_machine_status
;
1345 /* Validate -mregparm= value. */
1346 if (ix86_regparm_string
)
1348 i
= atoi (ix86_regparm_string
);
1349 if (i
< 0 || i
> REGPARM_MAX
)
1350 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1356 ix86_regparm
= REGPARM_MAX
;
1358 /* If the user has provided any of the -malign-* options,
1359 warn and use that value only if -falign-* is not set.
1360 Remove this code in GCC 3.2 or later. */
1361 if (ix86_align_loops_string
)
1363 warning ("-malign-loops is obsolete, use -falign-loops");
1364 if (align_loops
== 0)
1366 i
= atoi (ix86_align_loops_string
);
1367 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1368 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1370 align_loops
= 1 << i
;
1374 if (ix86_align_jumps_string
)
1376 warning ("-malign-jumps is obsolete, use -falign-jumps");
1377 if (align_jumps
== 0)
1379 i
= atoi (ix86_align_jumps_string
);
1380 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1381 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1383 align_jumps
= 1 << i
;
1387 if (ix86_align_funcs_string
)
1389 warning ("-malign-functions is obsolete, use -falign-functions");
1390 if (align_functions
== 0)
1392 i
= atoi (ix86_align_funcs_string
);
1393 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1394 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1396 align_functions
= 1 << i
;
1400 /* Default align_* from the processor table. */
1401 if (align_loops
== 0)
1403 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1404 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1406 if (align_jumps
== 0)
1408 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1409 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1411 if (align_functions
== 0)
1413 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1416 /* Validate -mpreferred-stack-boundary= value, or provide default.
1417 The default of 128 bits is for Pentium III's SSE __m128, but we
1418 don't want additional code to keep the stack aligned when
1419 optimizing for code size. */
1420 ix86_preferred_stack_boundary
= (optimize_size
1421 ? TARGET_64BIT
? 128 : 32
1423 if (ix86_preferred_stack_boundary_string
)
1425 i
= atoi (ix86_preferred_stack_boundary_string
);
1426 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1427 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1428 TARGET_64BIT
? 4 : 2);
1430 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1433 /* Validate -mbranch-cost= value, or provide default. */
1434 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1435 if (ix86_branch_cost_string
)
1437 i
= atoi (ix86_branch_cost_string
);
1439 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1441 ix86_branch_cost
= i
;
1444 if (ix86_tls_dialect_string
)
1446 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1447 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1448 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1449 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1451 error ("bad value (%s) for -mtls-dialect= switch",
1452 ix86_tls_dialect_string
);
1455 /* Keep nonleaf frame pointers. */
1456 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1457 flag_omit_frame_pointer
= 1;
1459 /* If we're doing fast math, we don't care about comparison order
1460 wrt NaNs. This lets us use a shorter comparison sequence. */
1461 if (flag_unsafe_math_optimizations
)
1462 target_flags
&= ~MASK_IEEE_FP
;
1464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1465 since the insns won't need emulation. */
1466 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1467 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1469 /* Turn on SSE2 builtins for -msse3. */
1471 target_flags
|= MASK_SSE2
;
1473 /* Turn on SSE builtins for -msse2. */
1475 target_flags
|= MASK_SSE
;
1479 if (TARGET_ALIGN_DOUBLE
)
1480 error ("-malign-double makes no sense in the 64bit mode");
1482 error ("-mrtd calling convention not supported in the 64bit mode");
1483 /* Enable by default the SSE and MMX builtins. */
1484 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1485 ix86_fpmath
= FPMATH_SSE
;
1489 ix86_fpmath
= FPMATH_387
;
1490 /* i386 ABI does not specify red zone. It still makes sense to use it
1491 when programmer takes care to stack from being destroyed. */
1492 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1493 target_flags
|= MASK_NO_RED_ZONE
;
1496 if (ix86_fpmath_string
!= 0)
1498 if (! strcmp (ix86_fpmath_string
, "387"))
1499 ix86_fpmath
= FPMATH_387
;
1500 else if (! strcmp (ix86_fpmath_string
, "sse"))
1504 warning ("SSE instruction set disabled, using 387 arithmetics");
1505 ix86_fpmath
= FPMATH_387
;
1508 ix86_fpmath
= FPMATH_SSE
;
1510 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1511 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1515 warning ("SSE instruction set disabled, using 387 arithmetics");
1516 ix86_fpmath
= FPMATH_387
;
1518 else if (!TARGET_80387
)
1520 warning ("387 instruction set disabled, using SSE arithmetics");
1521 ix86_fpmath
= FPMATH_SSE
;
1524 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1527 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1530 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1534 target_flags
|= MASK_MMX
;
1535 x86_prefetch_sse
= true;
1538 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1541 target_flags
|= MASK_MMX
;
1542 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1543 extensions it adds. */
1544 if (x86_3dnow_a
& (1 << ix86_arch
))
1545 target_flags
|= MASK_3DNOW_A
;
1547 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1548 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1550 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1552 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1555 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1556 p
= strchr (internal_label_prefix
, 'X');
1557 internal_label_prefix_len
= p
- internal_label_prefix
;
1563 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1565 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1566 make the problem with not enough registers even worse. */
1567 #ifdef INSN_SCHEDULING
1569 flag_schedule_insns
= 0;
1572 /* The default values of these switches depend on the TARGET_64BIT
1573 that is not known at this moment. Mark these values with 2 and
1574 let user the to override these. In case there is no command line option
1575 specifying them, we will set the defaults in override_options. */
1577 flag_omit_frame_pointer
= 2;
1578 flag_pcc_struct_return
= 2;
1579 flag_asynchronous_unwind_tables
= 2;
1582 /* Table of valid machine attributes. */
1583 const struct attribute_spec ix86_attribute_table
[] =
1585 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1586 /* Stdcall attribute says callee is responsible for popping arguments
1587 if they are not variable. */
1588 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1589 /* Fastcall attribute says callee is responsible for popping arguments
1590 if they are not variable. */
1591 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1592 /* Cdecl attribute says the callee is a normal C declaration */
1593 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1594 /* Regparm attribute specifies how many integer arguments are to be
1595 passed in registers. */
1596 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1598 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
1599 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
1600 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1602 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1603 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1604 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1605 SUBTARGET_ATTRIBUTE_TABLE
,
1607 { NULL
, 0, 0, false, false, false, NULL
}
1610 /* Decide whether we can make a sibling call to a function. DECL is the
1611 declaration of the function being targeted by the call and EXP is the
1612 CALL_EXPR representing the call. */
1615 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1617 /* If we are generating position-independent code, we cannot sibcall
1618 optimize any indirect call, or a direct call to a global function,
1619 as the PLT requires %ebx be live. */
1620 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1623 /* If we are returning floats on the 80387 register stack, we cannot
1624 make a sibcall from a function that doesn't return a float to a
1625 function that does or, conversely, from a function that does return
1626 a float to a function that doesn't; the necessary stack adjustment
1627 would not be executed. */
1628 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1629 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1632 /* If this call is indirect, we'll need to be able to use a call-clobbered
1633 register for the address of the target function. Make sure that all
1634 such registers are not used for passing parameters. */
1635 if (!decl
&& !TARGET_64BIT
)
1639 /* We're looking at the CALL_EXPR, we need the type of the function. */
1640 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1641 type
= TREE_TYPE (type
); /* pointer type */
1642 type
= TREE_TYPE (type
); /* function type */
1644 if (ix86_function_regparm (type
, NULL
) >= 3)
1646 /* ??? Need to count the actual number of registers to be used,
1647 not the possible number of registers. Fix later. */
1652 /* Otherwise okay. That also includes certain types of indirect calls. */
1656 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1657 arguments as in struct attribute_spec.handler. */
1659 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1660 tree args ATTRIBUTE_UNUSED
,
1661 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1663 if (TREE_CODE (*node
) != FUNCTION_TYPE
1664 && TREE_CODE (*node
) != METHOD_TYPE
1665 && TREE_CODE (*node
) != FIELD_DECL
1666 && TREE_CODE (*node
) != TYPE_DECL
)
1668 warning ("`%s' attribute only applies to functions",
1669 IDENTIFIER_POINTER (name
));
1670 *no_add_attrs
= true;
1674 if (is_attribute_p ("fastcall", name
))
1676 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1678 error ("fastcall and stdcall attributes are not compatible");
1680 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1682 error ("fastcall and regparm attributes are not compatible");
1685 else if (is_attribute_p ("stdcall", name
))
1687 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1689 error ("fastcall and stdcall attributes are not compatible");
1696 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1697 *no_add_attrs
= true;
1703 /* Handle a "regparm" attribute;
1704 arguments as in struct attribute_spec.handler. */
1706 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1707 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1709 if (TREE_CODE (*node
) != FUNCTION_TYPE
1710 && TREE_CODE (*node
) != METHOD_TYPE
1711 && TREE_CODE (*node
) != FIELD_DECL
1712 && TREE_CODE (*node
) != TYPE_DECL
)
1714 warning ("`%s' attribute only applies to functions",
1715 IDENTIFIER_POINTER (name
));
1716 *no_add_attrs
= true;
1722 cst
= TREE_VALUE (args
);
1723 if (TREE_CODE (cst
) != INTEGER_CST
)
1725 warning ("`%s' attribute requires an integer constant argument",
1726 IDENTIFIER_POINTER (name
));
1727 *no_add_attrs
= true;
1729 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1731 warning ("argument to `%s' attribute larger than %d",
1732 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1733 *no_add_attrs
= true;
1736 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1738 error ("fastcall and regparm attributes are not compatible");
1745 /* Return 0 if the attributes for two types are incompatible, 1 if they
1746 are compatible, and 2 if they are nearly compatible (which causes a
1747 warning to be generated). */
1750 ix86_comp_type_attributes (tree type1
, tree type2
)
1752 /* Check for mismatch of non-default calling convention. */
1753 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1755 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1758 /* Check for mismatched fastcall types */
1759 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1760 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1763 /* Check for mismatched return types (cdecl vs stdcall). */
1764 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1765 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1767 if (ix86_function_regparm (type1
, NULL
)
1768 != ix86_function_regparm (type2
, NULL
))
1773 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1774 DECL may be NULL when calling function indirectly
1775 or considering a libcall. */
1778 ix86_function_regparm (tree type
, tree decl
)
1781 int regparm
= ix86_regparm
;
1782 bool user_convention
= false;
1786 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1789 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1790 user_convention
= true;
1793 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1796 user_convention
= true;
1799 /* Use register calling convention for local functions when possible. */
1800 if (!TARGET_64BIT
&& !user_convention
&& decl
1801 && flag_unit_at_a_time
&& !profile_flag
)
1803 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1806 /* We can't use regparm(3) for nested functions as these use
1807 static chain pointer in third argument. */
1808 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1818 /* Return true if EAX is live at the start of the function. Used by
1819 ix86_expand_prologue to determine if we need special help before
1820 calling allocate_stack_worker. */
1823 ix86_eax_live_at_start_p (void)
1825 /* Cheat. Don't bother working forward from ix86_function_regparm
1826 to the function type to whether an actual argument is located in
1827 eax. Instead just look at cfg info, which is still close enough
1828 to correct at this point. This gives false positives for broken
1829 functions that might use uninitialized data that happens to be
1830 allocated in eax, but who cares? */
1831 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1834 /* Value is the number of bytes of arguments automatically
1835 popped when returning from a subroutine call.
1836 FUNDECL is the declaration node of the function (as a tree),
1837 FUNTYPE is the data type of the function (as a tree),
1838 or for a library call it is an identifier node for the subroutine name.
1839 SIZE is the number of bytes of arguments passed on the stack.
1841 On the 80386, the RTD insn may be used to pop them if the number
1842 of args is fixed, but if the number is variable then the caller
1843 must pop them all. RTD can't be used for library calls now
1844 because the library is compiled with the Unix compiler.
1845 Use of RTD is a selectable option, since it is incompatible with
1846 standard Unix calling sequences. If the option is not selected,
1847 the caller must always pop the args.
1849 The attribute stdcall is equivalent to RTD on a per module basis. */
1852 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1854 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1856 /* Cdecl functions override -mrtd, and never pop the stack. */
1857 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1859 /* Stdcall and fastcall functions will pop the stack if not
1861 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1862 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1866 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1867 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1868 == void_type_node
)))
1872 /* Lose any fake structure return argument if it is passed on the stack. */
1873 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1876 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1879 return GET_MODE_SIZE (Pmode
);
1885 /* Argument support functions. */
1887 /* Return true when register may be used to pass function parameters. */
1889 ix86_function_arg_regno_p (int regno
)
1893 return (regno
< REGPARM_MAX
1894 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1895 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1897 /* RAX is used as hidden argument to va_arg functions. */
1900 for (i
= 0; i
< REGPARM_MAX
; i
++)
1901 if (regno
== x86_64_int_parameter_registers
[i
])
1906 /* Return if we do not know how to pass TYPE solely in registers. */
1909 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
1911 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
1913 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
1916 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1917 for a call to a function whose data type is FNTYPE.
1918 For a library call, FNTYPE is 0. */
1921 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1922 tree fntype
, /* tree ptr for function decl */
1923 rtx libname
, /* SYMBOL_REF of library name or 0 */
1926 static CUMULATIVE_ARGS zero_cum
;
1927 tree param
, next_param
;
1929 if (TARGET_DEBUG_ARG
)
1931 fprintf (stderr
, "\ninit_cumulative_args (");
1933 fprintf (stderr
, "fntype code = %s, ret code = %s",
1934 tree_code_name
[(int) TREE_CODE (fntype
)],
1935 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1937 fprintf (stderr
, "no fntype");
1940 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1945 /* Set up the number of registers to use for passing arguments. */
1947 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1949 cum
->nregs
= ix86_regparm
;
1951 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1953 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1954 cum
->warn_sse
= true;
1955 cum
->warn_mmx
= true;
1956 cum
->maybe_vaarg
= false;
1958 /* Use ecx and edx registers if function has fastcall attribute */
1959 if (fntype
&& !TARGET_64BIT
)
1961 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1968 /* Determine if this function has variable arguments. This is
1969 indicated by the last argument being 'void_type_mode' if there
1970 are no variable arguments. If there are variable arguments, then
1971 we won't pass anything in registers in 32-bit mode. */
1973 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
1975 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1976 param
!= 0; param
= next_param
)
1978 next_param
= TREE_CHAIN (param
);
1979 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1990 cum
->maybe_vaarg
= true;
1994 if ((!fntype
&& !libname
)
1995 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1996 cum
->maybe_vaarg
= 1;
1998 if (TARGET_DEBUG_ARG
)
1999 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2004 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2005 of this code is to classify each 8bytes of incoming argument by the register
2006 class and assign registers accordingly. */
2008 /* Return the union class of CLASS1 and CLASS2.
2009 See the x86-64 PS ABI for details. */
2011 static enum x86_64_reg_class
2012 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2014 /* Rule #1: If both classes are equal, this is the resulting class. */
2015 if (class1
== class2
)
2018 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2020 if (class1
== X86_64_NO_CLASS
)
2022 if (class2
== X86_64_NO_CLASS
)
2025 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2026 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2027 return X86_64_MEMORY_CLASS
;
2029 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2030 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2031 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2032 return X86_64_INTEGERSI_CLASS
;
2033 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2034 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2035 return X86_64_INTEGER_CLASS
;
2037 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2038 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
2039 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
2040 return X86_64_MEMORY_CLASS
;
2042 /* Rule #6: Otherwise class SSE is used. */
2043 return X86_64_SSE_CLASS
;
2046 /* Classify the argument of type TYPE and mode MODE.
2047 CLASSES will be filled by the register class used to pass each word
2048 of the operand. The number of words is returned. In case the parameter
2049 should be passed in memory, 0 is returned. As a special case for zero
2050 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2052 BIT_OFFSET is used internally for handling records and specifies offset
2053 of the offset in bits modulo 256 to avoid overflow cases.
2055 See the x86-64 PS ABI for details.
2059 classify_argument (enum machine_mode mode
, tree type
,
2060 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2062 HOST_WIDE_INT bytes
=
2063 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2064 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2066 /* Variable sized entities are always passed/returned in memory. */
2070 if (mode
!= VOIDmode
2071 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2074 if (type
&& AGGREGATE_TYPE_P (type
))
2078 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2080 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2084 for (i
= 0; i
< words
; i
++)
2085 classes
[i
] = X86_64_NO_CLASS
;
2087 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2088 signalize memory class, so handle it as special case. */
2091 classes
[0] = X86_64_NO_CLASS
;
2095 /* Classify each field of record and merge classes. */
2096 if (TREE_CODE (type
) == RECORD_TYPE
)
2098 /* For classes first merge in the field of the subclasses. */
2099 if (TYPE_BINFO (type
))
2101 tree binfo
, base_binfo
;
2104 for (binfo
= TYPE_BINFO (type
), i
= 0;
2105 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2108 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2109 tree type
= BINFO_TYPE (base_binfo
);
2111 num
= classify_argument (TYPE_MODE (type
),
2113 (offset
+ bit_offset
) % 256);
2116 for (i
= 0; i
< num
; i
++)
2118 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2120 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2124 /* And now merge the fields of structure. */
2125 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2127 if (TREE_CODE (field
) == FIELD_DECL
)
2131 /* Bitfields are always classified as integer. Handle them
2132 early, since later code would consider them to be
2133 misaligned integers. */
2134 if (DECL_BIT_FIELD (field
))
2136 for (i
= int_bit_position (field
) / 8 / 8;
2137 i
< (int_bit_position (field
)
2138 + tree_low_cst (DECL_SIZE (field
), 0)
2141 merge_classes (X86_64_INTEGER_CLASS
,
2146 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2147 TREE_TYPE (field
), subclasses
,
2148 (int_bit_position (field
)
2149 + bit_offset
) % 256);
2152 for (i
= 0; i
< num
; i
++)
2155 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2157 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2163 /* Arrays are handled as small records. */
2164 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2167 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2168 TREE_TYPE (type
), subclasses
, bit_offset
);
2172 /* The partial classes are now full classes. */
2173 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2174 subclasses
[0] = X86_64_SSE_CLASS
;
2175 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2176 subclasses
[0] = X86_64_INTEGER_CLASS
;
2178 for (i
= 0; i
< words
; i
++)
2179 classes
[i
] = subclasses
[i
% num
];
2181 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2182 else if (TREE_CODE (type
) == UNION_TYPE
2183 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2185 /* For classes first merge in the field of the subclasses. */
2186 if (TYPE_BINFO (type
))
2188 tree binfo
, base_binfo
;
2191 for (binfo
= TYPE_BINFO (type
), i
= 0;
2192 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2195 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2196 tree type
= BINFO_TYPE (base_binfo
);
2198 num
= classify_argument (TYPE_MODE (type
),
2200 (offset
+ (bit_offset
% 64)) % 256);
2203 for (i
= 0; i
< num
; i
++)
2205 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2207 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2211 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2213 if (TREE_CODE (field
) == FIELD_DECL
)
2216 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2217 TREE_TYPE (field
), subclasses
,
2221 for (i
= 0; i
< num
; i
++)
2222 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2226 else if (TREE_CODE (type
) == SET_TYPE
)
2230 classes
[0] = X86_64_INTEGERSI_CLASS
;
2233 else if (bytes
<= 8)
2235 classes
[0] = X86_64_INTEGER_CLASS
;
2238 else if (bytes
<= 12)
2240 classes
[0] = X86_64_INTEGER_CLASS
;
2241 classes
[1] = X86_64_INTEGERSI_CLASS
;
2246 classes
[0] = X86_64_INTEGER_CLASS
;
2247 classes
[1] = X86_64_INTEGER_CLASS
;
2254 /* Final merger cleanup. */
2255 for (i
= 0; i
< words
; i
++)
2257 /* If one class is MEMORY, everything should be passed in
2259 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2262 /* The X86_64_SSEUP_CLASS should be always preceded by
2263 X86_64_SSE_CLASS. */
2264 if (classes
[i
] == X86_64_SSEUP_CLASS
2265 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2266 classes
[i
] = X86_64_SSE_CLASS
;
2268 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2269 if (classes
[i
] == X86_64_X87UP_CLASS
2270 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2271 classes
[i
] = X86_64_SSE_CLASS
;
2276 /* Compute alignment needed. We align all types to natural boundaries with
2277 exception of XFmode that is aligned to 64bits. */
2278 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2280 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2283 mode_alignment
= 128;
2284 else if (mode
== XCmode
)
2285 mode_alignment
= 256;
2286 if (COMPLEX_MODE_P (mode
))
2287 mode_alignment
/= 2;
2288 /* Misaligned fields are always returned in memory. */
2289 if (bit_offset
% mode_alignment
)
2293 /* for V1xx modes, just use the base mode */
2294 if (VECTOR_MODE_P (mode
)
2295 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2296 mode
= GET_MODE_INNER (mode
);
2298 /* Classification of atomic types. */
2308 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2309 classes
[0] = X86_64_INTEGERSI_CLASS
;
2311 classes
[0] = X86_64_INTEGER_CLASS
;
2315 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2320 if (!(bit_offset
% 64))
2321 classes
[0] = X86_64_SSESF_CLASS
;
2323 classes
[0] = X86_64_SSE_CLASS
;
2326 classes
[0] = X86_64_SSEDF_CLASS
;
2329 classes
[0] = X86_64_X87_CLASS
;
2330 classes
[1] = X86_64_X87UP_CLASS
;
2333 classes
[0] = X86_64_SSE_CLASS
;
2334 classes
[1] = X86_64_SSEUP_CLASS
;
2337 classes
[0] = X86_64_SSE_CLASS
;
2340 classes
[0] = X86_64_SSEDF_CLASS
;
2341 classes
[1] = X86_64_SSEDF_CLASS
;
2345 /* These modes are larger than 16 bytes. */
2353 classes
[0] = X86_64_SSE_CLASS
;
2354 classes
[1] = X86_64_SSEUP_CLASS
;
2360 classes
[0] = X86_64_SSE_CLASS
;
2366 if (VECTOR_MODE_P (mode
))
2370 if (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
)
2372 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2373 classes
[0] = X86_64_INTEGERSI_CLASS
;
2375 classes
[0] = X86_64_INTEGER_CLASS
;
2376 classes
[1] = X86_64_INTEGER_CLASS
;
2377 return 1 + (bytes
> 8);
2384 /* Examine the argument and return set number of register required in each
2385 class. Return 0 iff parameter should be passed in memory. */
2387 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2388 int *int_nregs
, int *sse_nregs
)
2390 enum x86_64_reg_class
class[MAX_CLASSES
];
2391 int n
= classify_argument (mode
, type
, class, 0);
2397 for (n
--; n
>= 0; n
--)
2400 case X86_64_INTEGER_CLASS
:
2401 case X86_64_INTEGERSI_CLASS
:
2404 case X86_64_SSE_CLASS
:
2405 case X86_64_SSESF_CLASS
:
2406 case X86_64_SSEDF_CLASS
:
2409 case X86_64_NO_CLASS
:
2410 case X86_64_SSEUP_CLASS
:
2412 case X86_64_X87_CLASS
:
2413 case X86_64_X87UP_CLASS
:
2417 case X86_64_MEMORY_CLASS
:
2422 /* Construct container for the argument used by GCC interface. See
2423 FUNCTION_ARG for the detailed description. */
2425 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2426 int nintregs
, int nsseregs
, const int * intreg
,
2429 enum machine_mode tmpmode
;
2431 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2432 enum x86_64_reg_class
class[MAX_CLASSES
];
2436 int needed_sseregs
, needed_intregs
;
2437 rtx exp
[MAX_CLASSES
];
2440 n
= classify_argument (mode
, type
, class, 0);
2441 if (TARGET_DEBUG_ARG
)
2444 fprintf (stderr
, "Memory class\n");
2447 fprintf (stderr
, "Classes:");
2448 for (i
= 0; i
< n
; i
++)
2450 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2452 fprintf (stderr
, "\n");
2457 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2459 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2462 /* First construct simple cases. Avoid SCmode, since we want to use
2463 single register to pass this type. */
2464 if (n
== 1 && mode
!= SCmode
)
2467 case X86_64_INTEGER_CLASS
:
2468 case X86_64_INTEGERSI_CLASS
:
2469 return gen_rtx_REG (mode
, intreg
[0]);
2470 case X86_64_SSE_CLASS
:
2471 case X86_64_SSESF_CLASS
:
2472 case X86_64_SSEDF_CLASS
:
2473 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2474 case X86_64_X87_CLASS
:
2475 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2476 case X86_64_NO_CLASS
:
2477 /* Zero sized array, struct or class. */
2482 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2484 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2486 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2487 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2488 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2489 && class[1] == X86_64_INTEGER_CLASS
2490 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2491 && intreg
[0] + 1 == intreg
[1])
2492 return gen_rtx_REG (mode
, intreg
[0]);
2494 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2495 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
2497 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2499 /* Otherwise figure out the entries of the PARALLEL. */
2500 for (i
= 0; i
< n
; i
++)
2504 case X86_64_NO_CLASS
:
2506 case X86_64_INTEGER_CLASS
:
2507 case X86_64_INTEGERSI_CLASS
:
2508 /* Merge TImodes on aligned occasions here too. */
2509 if (i
* 8 + 8 > bytes
)
2510 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2511 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2515 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2516 if (tmpmode
== BLKmode
)
2518 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2519 gen_rtx_REG (tmpmode
, *intreg
),
2523 case X86_64_SSESF_CLASS
:
2524 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2525 gen_rtx_REG (SFmode
,
2526 SSE_REGNO (sse_regno
)),
2530 case X86_64_SSEDF_CLASS
:
2531 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2532 gen_rtx_REG (DFmode
,
2533 SSE_REGNO (sse_regno
)),
2537 case X86_64_SSE_CLASS
:
2538 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2542 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2543 gen_rtx_REG (tmpmode
,
2544 SSE_REGNO (sse_regno
)),
2546 if (tmpmode
== TImode
)
2554 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2555 for (i
= 0; i
< nexps
; i
++)
2556 XVECEXP (ret
, 0, i
) = exp
[i
];
2560 /* Update the data in CUM to advance over an argument
2561 of mode MODE and data type TYPE.
2562 (TYPE is null for libcalls where that information may not be available.) */
2565 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2566 enum machine_mode mode
, /* current arg mode */
2567 tree type
, /* type of the argument or 0 if lib support */
2568 int named
) /* whether or not the argument was named */
2571 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2572 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2574 if (TARGET_DEBUG_ARG
)
2576 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2577 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2580 int int_nregs
, sse_nregs
;
2581 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2582 cum
->words
+= words
;
2583 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2585 cum
->nregs
-= int_nregs
;
2586 cum
->sse_nregs
-= sse_nregs
;
2587 cum
->regno
+= int_nregs
;
2588 cum
->sse_regno
+= sse_nregs
;
2591 cum
->words
+= words
;
2595 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2596 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2598 cum
->sse_words
+= words
;
2599 cum
->sse_nregs
-= 1;
2600 cum
->sse_regno
+= 1;
2601 if (cum
->sse_nregs
<= 0)
2607 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2608 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2610 cum
->mmx_words
+= words
;
2611 cum
->mmx_nregs
-= 1;
2612 cum
->mmx_regno
+= 1;
2613 if (cum
->mmx_nregs
<= 0)
2621 cum
->words
+= words
;
2622 cum
->nregs
-= words
;
2623 cum
->regno
+= words
;
2625 if (cum
->nregs
<= 0)
2635 /* Define where to put the arguments to a function.
2636 Value is zero to push the argument on the stack,
2637 or a hard register in which to store the argument.
2639 MODE is the argument's machine mode.
2640 TYPE is the data type of the argument (as a tree).
2641 This is null for libcalls where that information may
2643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2644 the preceding args and about the function being called.
2645 NAMED is nonzero if this argument is a named parameter
2646 (otherwise it is an extra parameter matching an ellipsis). */
2649 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2650 enum machine_mode mode
, /* current arg mode */
2651 tree type
, /* type of the argument or 0 if lib support */
2652 int named
) /* != 0 for normal args, == 0 for ... args */
2656 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2657 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2658 static bool warnedsse
, warnedmmx
;
2660 /* To simplify the code below, represent vector types with a vector mode
2661 even if MMX/SSE are not active. */
2663 && TREE_CODE (type
) == VECTOR_TYPE
2664 && (bytes
== 8 || bytes
== 16)
2665 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_INT
2666 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_FLOAT
)
2668 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2669 enum machine_mode newmode
2670 = TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
2671 ? MIN_MODE_VECTOR_FLOAT
: MIN_MODE_VECTOR_INT
;
2673 /* Get the mode which has this inner mode and number of units. */
2674 for (; newmode
!= VOIDmode
; newmode
= GET_MODE_WIDER_MODE (newmode
))
2675 if (GET_MODE_NUNITS (newmode
) == TYPE_VECTOR_SUBPARTS (type
)
2676 && GET_MODE_INNER (newmode
) == innermode
)
2683 /* Handle a hidden AL argument containing number of registers for varargs
2684 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2686 if (mode
== VOIDmode
)
2689 return GEN_INT (cum
->maybe_vaarg
2690 ? (cum
->sse_nregs
< 0
2698 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2699 &x86_64_int_parameter_registers
[cum
->regno
],
2704 /* For now, pass fp/complex values on the stack. */
2716 if (words
<= cum
->nregs
)
2718 int regno
= cum
->regno
;
2720 /* Fastcall allocates the first two DWORD (SImode) or
2721 smaller arguments to ECX and EDX. */
2724 if (mode
== BLKmode
|| mode
== DImode
)
2727 /* ECX not EAX is the first allocated register. */
2731 ret
= gen_rtx_REG (mode
, regno
);
2741 if (!type
|| !AGGREGATE_TYPE_P (type
))
2743 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
2746 warning ("SSE vector argument without SSE enabled "
2750 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2757 if (!type
|| !AGGREGATE_TYPE_P (type
))
2759 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2762 warning ("MMX vector argument without MMX enabled "
2766 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2771 if (TARGET_DEBUG_ARG
)
2774 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2775 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2778 print_simple_rtl (stderr
, ret
);
2780 fprintf (stderr
, ", stack");
2782 fprintf (stderr
, " )\n");
2788 /* A C expression that indicates when an argument must be passed by
2789 reference. If nonzero for an argument, a copy of that argument is
2790 made in memory and a pointer to the argument is passed instead of
2791 the argument itself. The pointer is passed in whatever way is
2792 appropriate for passing a pointer to that type. */
2795 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2796 enum machine_mode mode ATTRIBUTE_UNUSED
,
2797 tree type
, bool named ATTRIBUTE_UNUSED
)
2802 if (type
&& int_size_in_bytes (type
) == -1)
2804 if (TARGET_DEBUG_ARG
)
2805 fprintf (stderr
, "function_arg_pass_by_reference\n");
2812 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2813 ABI. Only called if TARGET_SSE. */
2815 contains_128bit_aligned_vector_p (tree type
)
2817 enum machine_mode mode
= TYPE_MODE (type
);
2818 if (SSE_REG_MODE_P (mode
)
2819 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2821 if (TYPE_ALIGN (type
) < 128)
2824 if (AGGREGATE_TYPE_P (type
))
2826 /* Walk the aggregates recursively. */
2827 if (TREE_CODE (type
) == RECORD_TYPE
2828 || TREE_CODE (type
) == UNION_TYPE
2829 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2833 if (TYPE_BINFO (type
))
2835 tree binfo
, base_binfo
;
2838 for (binfo
= TYPE_BINFO (type
), i
= 0;
2839 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2840 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo
)))
2843 /* And now merge the fields of structure. */
2844 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2846 if (TREE_CODE (field
) == FIELD_DECL
2847 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2851 /* Just for use if some languages passes arrays by value. */
2852 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2854 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2863 /* Gives the alignment boundary, in bits, of an argument with the
2864 specified mode and type. */
2867 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2871 align
= TYPE_ALIGN (type
);
2873 align
= GET_MODE_ALIGNMENT (mode
);
2874 if (align
< PARM_BOUNDARY
)
2875 align
= PARM_BOUNDARY
;
2878 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2879 make an exception for SSE modes since these require 128bit
2882 The handling here differs from field_alignment. ICC aligns MMX
2883 arguments to 4 byte boundaries, while structure fields are aligned
2884 to 8 byte boundaries. */
2886 align
= PARM_BOUNDARY
;
2889 if (!SSE_REG_MODE_P (mode
))
2890 align
= PARM_BOUNDARY
;
2894 if (!contains_128bit_aligned_vector_p (type
))
2895 align
= PARM_BOUNDARY
;
2903 /* Return true if N is a possible register number of function value. */
2905 ix86_function_value_regno_p (int regno
)
2909 return ((regno
) == 0
2910 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2911 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2913 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2914 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2915 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2918 /* Define how to find the value returned by a function.
2919 VALTYPE is the data type of the value (as a tree).
2920 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2921 otherwise, FUNC is 0. */
2923 ix86_function_value (tree valtype
)
2927 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2928 REGPARM_MAX
, SSE_REGPARM_MAX
,
2929 x86_64_int_return_registers
, 0);
2930 /* For zero sized structures, construct_container return NULL, but we need
2931 to keep rest of compiler happy by returning meaningful value. */
2933 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2937 return gen_rtx_REG (TYPE_MODE (valtype
),
2938 ix86_value_regno (TYPE_MODE (valtype
)));
2941 /* Return false iff type is returned in memory. */
2943 ix86_return_in_memory (tree type
)
2945 int needed_intregs
, needed_sseregs
, size
;
2946 enum machine_mode mode
= TYPE_MODE (type
);
2949 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2951 if (mode
== BLKmode
)
2954 size
= int_size_in_bytes (type
);
2956 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2959 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2961 /* User-created vectors small enough to fit in EAX. */
2965 /* MMX/3dNow values are returned on the stack, since we've
2966 got to EMMS/FEMMS before returning. */
2970 /* SSE values are returned in XMM0, except when it doesn't exist. */
2972 return (TARGET_SSE
? 0 : 1);
2983 /* When returning SSE vector types, we have a choice of either
2984 (1) being abi incompatible with a -march switch, or
2985 (2) generating an error.
2986 Given no good solution, I think the safest thing is one warning.
2987 The user won't be able to use -Werror, but....
2989 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2990 called in response to actually generating a caller or callee that
2991 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2992 via aggregate_value_p for general type probing from tree-ssa. */
2995 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
2999 if (!TARGET_SSE
&& type
&& !warned
)
3001 /* Look at the return type of the function, not the function type. */
3002 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3005 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3008 warning ("SSE vector return without SSE enabled changes the ABI");
3015 /* Define how to find the value returned by a library function
3016 assuming the value has mode MODE. */
3018 ix86_libcall_value (enum machine_mode mode
)
3029 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3031 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3036 return gen_rtx_REG (mode
, 0);
3040 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
3043 /* Given a mode, return the register to use for a return value. */
3046 ix86_value_regno (enum machine_mode mode
)
3048 /* Floating point return values in %st(0). */
3049 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
3050 return FIRST_FLOAT_REG
;
3051 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3052 we prevent this case when sse is not available. */
3053 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3054 return FIRST_SSE_REG
;
3055 /* Everything else in %eax. */
3059 /* Create the va_list data type. */
3062 ix86_build_builtin_va_list (void)
3064 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3066 /* For i386 we use plain pointer to argument area. */
3068 return build_pointer_type (char_type_node
);
3070 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3071 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3073 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3074 unsigned_type_node
);
3075 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3076 unsigned_type_node
);
3077 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3079 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3082 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3083 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3084 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3085 DECL_FIELD_CONTEXT (f_sav
) = record
;
3087 TREE_CHAIN (record
) = type_decl
;
3088 TYPE_NAME (record
) = type_decl
;
3089 TYPE_FIELDS (record
) = f_gpr
;
3090 TREE_CHAIN (f_gpr
) = f_fpr
;
3091 TREE_CHAIN (f_fpr
) = f_ovf
;
3092 TREE_CHAIN (f_ovf
) = f_sav
;
3094 layout_type (record
);
3096 /* The correct type is an array type of one element. */
3097 return build_array_type (record
, build_index_type (size_zero_node
));
3100 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3103 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3104 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3107 CUMULATIVE_ARGS next_cum
;
3108 rtx save_area
= NULL_RTX
, mem
;
3121 /* Indicate to allocate space on the stack for varargs save area. */
3122 ix86_save_varrargs_registers
= 1;
3124 cfun
->stack_alignment_needed
= 128;
3126 fntype
= TREE_TYPE (current_function_decl
);
3127 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3128 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3129 != void_type_node
));
3131 /* For varargs, we do not want to skip the dummy va_dcl argument.
3132 For stdargs, we do want to skip the last named argument. */
3135 function_arg_advance (&next_cum
, mode
, type
, 1);
3138 save_area
= frame_pointer_rtx
;
3140 set
= get_varargs_alias_set ();
3142 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3144 mem
= gen_rtx_MEM (Pmode
,
3145 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3146 set_mem_alias_set (mem
, set
);
3147 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3148 x86_64_int_parameter_registers
[i
]));
3151 if (next_cum
.sse_nregs
)
3153 /* Now emit code to save SSE registers. The AX parameter contains number
3154 of SSE parameter registers used to call this function. We use
3155 sse_prologue_save insn template that produces computed jump across
3156 SSE saves. We need some preparation work to get this working. */
3158 label
= gen_label_rtx ();
3159 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3161 /* Compute address to jump to :
3162 label - 5*eax + nnamed_sse_arguments*5 */
3163 tmp_reg
= gen_reg_rtx (Pmode
);
3164 nsse_reg
= gen_reg_rtx (Pmode
);
3165 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3166 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3167 gen_rtx_MULT (Pmode
, nsse_reg
,
3169 if (next_cum
.sse_regno
)
3172 gen_rtx_CONST (DImode
,
3173 gen_rtx_PLUS (DImode
,
3175 GEN_INT (next_cum
.sse_regno
* 4))));
3177 emit_move_insn (nsse_reg
, label_ref
);
3178 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3180 /* Compute address of memory block we save into. We always use pointer
3181 pointing 127 bytes after first byte to store - this is needed to keep
3182 instruction size limited by 4 bytes. */
3183 tmp_reg
= gen_reg_rtx (Pmode
);
3184 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3185 plus_constant (save_area
,
3186 8 * REGPARM_MAX
+ 127)));
3187 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3188 set_mem_alias_set (mem
, set
);
3189 set_mem_align (mem
, BITS_PER_WORD
);
3191 /* And finally do the dirty job! */
3192 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3193 GEN_INT (next_cum
.sse_regno
), label
));
3198 /* Implement va_start. */
3201 ix86_va_start (tree valist
, rtx nextarg
)
3203 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3204 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3205 tree gpr
, fpr
, ovf
, sav
, t
;
3207 /* Only 64bit target needs something special. */
3210 std_expand_builtin_va_start (valist
, nextarg
);
3214 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3215 f_fpr
= TREE_CHAIN (f_gpr
);
3216 f_ovf
= TREE_CHAIN (f_fpr
);
3217 f_sav
= TREE_CHAIN (f_ovf
);
3219 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3220 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3221 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3222 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3223 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3225 /* Count number of gp and fp argument registers used. */
3226 words
= current_function_args_info
.words
;
3227 n_gpr
= current_function_args_info
.regno
;
3228 n_fpr
= current_function_args_info
.sse_regno
;
3230 if (TARGET_DEBUG_ARG
)
3231 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3232 (int) words
, (int) n_gpr
, (int) n_fpr
);
3234 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3235 build_int_cst (NULL_TREE
, n_gpr
* 8, 0));
3236 TREE_SIDE_EFFECTS (t
) = 1;
3237 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3239 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3240 build_int_cst (NULL_TREE
, n_fpr
* 16 + 8*REGPARM_MAX
, 0));
3241 TREE_SIDE_EFFECTS (t
) = 1;
3242 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3244 /* Find the overflow area. */
3245 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3247 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3248 build_int_cst (NULL_TREE
, words
* UNITS_PER_WORD
, 0));
3249 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3250 TREE_SIDE_EFFECTS (t
) = 1;
3251 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3253 /* Find the register save area.
3254 Prologue of the function save it right above stack frame. */
3255 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3256 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3257 TREE_SIDE_EFFECTS (t
) = 1;
3258 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3261 /* Implement va_arg. */
3264 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3266 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3267 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3268 tree gpr
, fpr
, ovf
, sav
, t
;
3270 tree lab_false
, lab_over
= NULL_TREE
;
3276 /* Only 64bit target needs something special. */
3278 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3280 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3281 f_fpr
= TREE_CHAIN (f_gpr
);
3282 f_ovf
= TREE_CHAIN (f_fpr
);
3283 f_sav
= TREE_CHAIN (f_ovf
);
3285 valist
= build_fold_indirect_ref (valist
);
3286 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3287 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3288 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3289 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3291 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
3293 type
= build_pointer_type (type
);
3294 size
= int_size_in_bytes (type
);
3295 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3297 container
= construct_container (TYPE_MODE (type
), type
, 0,
3298 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3300 * Pull the value out of the saved registers ...
3303 addr
= create_tmp_var (ptr_type_node
, "addr");
3304 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3308 int needed_intregs
, needed_sseregs
;
3310 tree int_addr
, sse_addr
;
3312 lab_false
= create_artificial_label ();
3313 lab_over
= create_artificial_label ();
3315 examine_argument (TYPE_MODE (type
), type
, 0,
3316 &needed_intregs
, &needed_sseregs
);
3318 need_temp
= (!REG_P (container
)
3319 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3320 || TYPE_ALIGN (type
) > 128));
3322 /* In case we are passing structure, verify that it is consecutive block
3323 on the register save area. If not we need to do moves. */
3324 if (!need_temp
&& !REG_P (container
))
3326 /* Verify that all registers are strictly consecutive */
3327 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3331 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3333 rtx slot
= XVECEXP (container
, 0, i
);
3334 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3335 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3343 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3345 rtx slot
= XVECEXP (container
, 0, i
);
3346 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3347 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3359 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3360 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3361 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3362 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3364 /* First ensure that we fit completely in registers. */
3367 t
= build_int_cst (TREE_TYPE (gpr
),
3368 (REGPARM_MAX
- needed_intregs
+ 1) * 8, 0);
3369 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3370 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3371 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3372 gimplify_and_add (t
, pre_p
);
3376 t
= build_int_cst (TREE_TYPE (fpr
),
3377 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3378 + REGPARM_MAX
* 8, 0);
3379 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3380 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3381 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3382 gimplify_and_add (t
, pre_p
);
3385 /* Compute index to start of area used for integer regs. */
3388 /* int_addr = gpr + sav; */
3389 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3390 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3391 gimplify_and_add (t
, pre_p
);
3395 /* sse_addr = fpr + sav; */
3396 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3397 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3398 gimplify_and_add (t
, pre_p
);
3403 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3406 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3407 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3408 gimplify_and_add (t
, pre_p
);
3410 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3412 rtx slot
= XVECEXP (container
, 0, i
);
3413 rtx reg
= XEXP (slot
, 0);
3414 enum machine_mode mode
= GET_MODE (reg
);
3415 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3416 tree addr_type
= build_pointer_type (piece_type
);
3419 tree dest_addr
, dest
;
3421 if (SSE_REGNO_P (REGNO (reg
)))
3423 src_addr
= sse_addr
;
3424 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3428 src_addr
= int_addr
;
3429 src_offset
= REGNO (reg
) * 8;
3431 src_addr
= fold_convert (addr_type
, src_addr
);
3432 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3433 size_int (src_offset
)));
3434 src
= build_fold_indirect_ref (src_addr
);
3436 dest_addr
= fold_convert (addr_type
, addr
);
3437 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3438 size_int (INTVAL (XEXP (slot
, 1)))));
3439 dest
= build_fold_indirect_ref (dest_addr
);
3441 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3442 gimplify_and_add (t
, pre_p
);
3448 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3449 build_int_cst (NULL_TREE
, needed_intregs
* 8, 0));
3450 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3451 gimplify_and_add (t
, pre_p
);
3455 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3456 build_int_cst (NULL_TREE
, needed_sseregs
* 16, 0));
3457 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3458 gimplify_and_add (t
, pre_p
);
3461 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3462 gimplify_and_add (t
, pre_p
);
3464 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3465 append_to_statement_list (t
, pre_p
);
3468 /* ... otherwise out of the overflow area. */
3470 /* Care for on-stack alignment if needed. */
3471 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3475 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3476 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
3477 build_int_cst (NULL_TREE
, align
- 1, 0));
3478 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3479 build_int_cst (NULL_TREE
, -align
, -1));
3481 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3483 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3484 gimplify_and_add (t2
, pre_p
);
3486 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3487 build_int_cst (NULL_TREE
, rsize
* UNITS_PER_WORD
, 0));
3488 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3489 gimplify_and_add (t
, pre_p
);
3493 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3494 append_to_statement_list (t
, pre_p
);
3497 ptrtype
= build_pointer_type (type
);
3498 addr
= fold_convert (ptrtype
, addr
);
3501 addr
= build_fold_indirect_ref (addr
);
3502 return build_fold_indirect_ref (addr
);
3505 /* Return nonzero if OPNUM's MEM should be matched
3506 in movabs* patterns. */
3509 ix86_check_movabs (rtx insn
, int opnum
)
3513 set
= PATTERN (insn
);
3514 if (GET_CODE (set
) == PARALLEL
)
3515 set
= XVECEXP (set
, 0, 0);
3516 if (GET_CODE (set
) != SET
)
3518 mem
= XEXP (set
, opnum
);
3519 while (GET_CODE (mem
) == SUBREG
)
3520 mem
= SUBREG_REG (mem
);
3521 if (GET_CODE (mem
) != MEM
)
3523 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3526 /* Initialize the table of extra 80387 mathematical constants. */
3529 init_ext_80387_constants (void)
3531 static const char * cst
[5] =
3533 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3534 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3535 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3536 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3537 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3541 for (i
= 0; i
< 5; i
++)
3543 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
3544 /* Ensure each constant is rounded to XFmode precision. */
3545 real_convert (&ext_80387_constants_table
[i
],
3546 XFmode
, &ext_80387_constants_table
[i
]);
3549 ext_80387_constants_init
= 1;
3552 /* Return true if the constant is something that can be loaded with
3553 a special instruction. */
3556 standard_80387_constant_p (rtx x
)
3558 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3561 if (x
== CONST0_RTX (GET_MODE (x
)))
3563 if (x
== CONST1_RTX (GET_MODE (x
)))
3566 /* For XFmode constants, try to find a special 80387 instruction when
3567 optimizing for size or on those CPUs that benefit from them. */
3568 if (GET_MODE (x
) == XFmode
3569 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
3574 if (! ext_80387_constants_init
)
3575 init_ext_80387_constants ();
3577 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3578 for (i
= 0; i
< 5; i
++)
3579 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
3586 /* Return the opcode of the special instruction to be used to load
3590 standard_80387_constant_opcode (rtx x
)
3592 switch (standard_80387_constant_p (x
))
3612 /* Return the CONST_DOUBLE representing the 80387 constant that is
3613 loaded by the specified special instruction. The argument IDX
3614 matches the return value from standard_80387_constant_p. */
3617 standard_80387_constant_rtx (int idx
)
3621 if (! ext_80387_constants_init
)
3622 init_ext_80387_constants ();
3638 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
3642 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3645 standard_sse_constant_p (rtx x
)
3647 if (x
== const0_rtx
)
3649 return (x
== CONST0_RTX (GET_MODE (x
)));
3652 /* Returns 1 if OP contains a symbol reference */
3655 symbolic_reference_mentioned_p (rtx op
)
3660 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3663 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3664 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3670 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3671 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3675 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3682 /* Return 1 if it is appropriate to emit `ret' instructions in the
3683 body of a function. Do this only if the epilogue is simple, needing a
3684 couple of insns. Prior to reloading, we can't tell how many registers
3685 must be saved, so return 0 then. Return 0 if there is no frame
3686 marker to de-allocate.
3688 If NON_SAVING_SETJMP is defined and true, then it is not possible
3689 for the epilogue to be simple, so return 0. This is a special case
3690 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3691 until final, but jump_optimize may need to know sooner if a
3695 ix86_can_use_return_insn_p (void)
3697 struct ix86_frame frame
;
3699 #ifdef NON_SAVING_SETJMP
3700 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3704 if (! reload_completed
|| frame_pointer_needed
)
3707 /* Don't allow more than 32 pop, since that's all we can do
3708 with one instruction. */
3709 if (current_function_pops_args
3710 && current_function_args_size
>= 32768)
3713 ix86_compute_frame_layout (&frame
);
3714 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3717 /* Value should be nonzero if functions must have frame pointers.
3718 Zero means the frame pointer need not be set up (and parms may
3719 be accessed via the stack pointer) in functions that seem suitable. */
3722 ix86_frame_pointer_required (void)
3724 /* If we accessed previous frames, then the generated code expects
3725 to be able to access the saved ebp value in our frame. */
3726 if (cfun
->machine
->accesses_prev_frame
)
3729 /* Several x86 os'es need a frame pointer for other reasons,
3730 usually pertaining to setjmp. */
3731 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3734 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3735 the frame pointer by default. Turn it back on now if we've not
3736 got a leaf function. */
3737 if (TARGET_OMIT_LEAF_FRAME_POINTER
3738 && (!current_function_is_leaf
))
3741 if (current_function_profile
)
3747 /* Record that the current function accesses previous call frames. */
3750 ix86_setup_frame_addresses (void)
3752 cfun
->machine
->accesses_prev_frame
= 1;
3755 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3756 # define USE_HIDDEN_LINKONCE 1
3758 # define USE_HIDDEN_LINKONCE 0
3761 static int pic_labels_used
;
3763 /* Fills in the label name that should be used for a pc thunk for
3764 the given register. */
3767 get_pc_thunk_name (char name
[32], unsigned int regno
)
3769 if (USE_HIDDEN_LINKONCE
)
3770 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3772 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3776 /* This function generates code for -fpic that loads %ebx with
3777 the return address of the caller and then returns. */
3780 ix86_file_end (void)
3785 for (regno
= 0; regno
< 8; ++regno
)
3789 if (! ((pic_labels_used
>> regno
) & 1))
3792 get_pc_thunk_name (name
, regno
);
3794 if (USE_HIDDEN_LINKONCE
)
3798 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3800 TREE_PUBLIC (decl
) = 1;
3801 TREE_STATIC (decl
) = 1;
3802 DECL_ONE_ONLY (decl
) = 1;
3804 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3805 named_section (decl
, NULL
, 0);
3807 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
3808 fputs ("\t.hidden\t", asm_out_file
);
3809 assemble_name (asm_out_file
, name
);
3810 fputc ('\n', asm_out_file
);
3811 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
3816 ASM_OUTPUT_LABEL (asm_out_file
, name
);
3819 xops
[0] = gen_rtx_REG (SImode
, regno
);
3820 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3821 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3822 output_asm_insn ("ret", xops
);
3825 if (NEED_INDICATE_EXEC_STACK
)
3826 file_end_indicate_exec_stack ();
3829 /* Emit code for the SET_GOT patterns. */
3832 output_set_got (rtx dest
)
3837 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
3839 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
3841 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
3844 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3846 output_asm_insn ("call\t%a2", xops
);
3849 /* Output the "canonical" label name ("Lxx$pb") here too. This
3850 is what will be referred to by the Mach-O PIC subsystem. */
3851 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
3853 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
3854 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
3857 output_asm_insn ("pop{l}\t%0", xops
);
3862 get_pc_thunk_name (name
, REGNO (dest
));
3863 pic_labels_used
|= 1 << REGNO (dest
);
3865 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
3866 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
3867 output_asm_insn ("call\t%X2", xops
);
3870 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
3871 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
3872 else if (!TARGET_MACHO
)
3873 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
3878 /* Generate an "push" pattern for input ARG. */
3883 return gen_rtx_SET (VOIDmode
,
3885 gen_rtx_PRE_DEC (Pmode
,
3886 stack_pointer_rtx
)),
3890 /* Return >= 0 if there is an unused call-clobbered register available
3891 for the entire function. */
3894 ix86_select_alt_pic_regnum (void)
3896 if (current_function_is_leaf
&& !current_function_profile
)
3899 for (i
= 2; i
>= 0; --i
)
3900 if (!regs_ever_live
[i
])
3904 return INVALID_REGNUM
;
3907 /* Return 1 if we need to save REGNO. */
3909 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
3911 if (pic_offset_table_rtx
3912 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
3913 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
3914 || current_function_profile
3915 || current_function_calls_eh_return
3916 || current_function_uses_const_pool
))
3918 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
3923 if (current_function_calls_eh_return
&& maybe_eh_return
)
3928 unsigned test
= EH_RETURN_DATA_REGNO (i
);
3929 if (test
== INVALID_REGNUM
)
3936 return (regs_ever_live
[regno
]
3937 && !call_used_regs
[regno
]
3938 && !fixed_regs
[regno
]
3939 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
3942 /* Return number of registers to be saved on the stack. */
3945 ix86_nsaved_regs (void)
3950 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3951 if (ix86_save_reg (regno
, true))
3956 /* Return the offset between two registers, one to be eliminated, and the other
3957 its replacement, at the start of a routine. */
3960 ix86_initial_elimination_offset (int from
, int to
)
3962 struct ix86_frame frame
;
3963 ix86_compute_frame_layout (&frame
);
3965 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3966 return frame
.hard_frame_pointer_offset
;
3967 else if (from
== FRAME_POINTER_REGNUM
3968 && to
== HARD_FRAME_POINTER_REGNUM
)
3969 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
3972 if (to
!= STACK_POINTER_REGNUM
)
3974 else if (from
== ARG_POINTER_REGNUM
)
3975 return frame
.stack_pointer_offset
;
3976 else if (from
!= FRAME_POINTER_REGNUM
)
3979 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
3983 /* Fill structure ix86_frame about frame of currently computed function. */
3986 ix86_compute_frame_layout (struct ix86_frame
*frame
)
3988 HOST_WIDE_INT total_size
;
3989 unsigned int stack_alignment_needed
;
3990 HOST_WIDE_INT offset
;
3991 unsigned int preferred_alignment
;
3992 HOST_WIDE_INT size
= get_frame_size ();
3994 frame
->nregs
= ix86_nsaved_regs ();
3997 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
3998 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4000 /* During reload iteration the amount of registers saved can change.
4001 Recompute the value as needed. Do not recompute when amount of registers
4002 didn't change as reload does mutiple calls to the function and does not
4003 expect the decision to change within single iteration. */
4005 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4007 int count
= frame
->nregs
;
4009 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4010 /* The fast prologue uses move instead of push to save registers. This
4011 is significantly longer, but also executes faster as modern hardware
4012 can execute the moves in parallel, but can't do that for push/pop.
4014 Be careful about choosing what prologue to emit: When function takes
4015 many instructions to execute we may use slow version as well as in
4016 case function is known to be outside hot spot (this is known with
4017 feedback only). Weight the size of function by number of registers
4018 to save as it is cheap to use one or two push instructions but very
4019 slow to use many of them. */
4021 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4022 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4023 || (flag_branch_probabilities
4024 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4025 cfun
->machine
->use_fast_prologue_epilogue
= false;
4027 cfun
->machine
->use_fast_prologue_epilogue
4028 = !expensive_function_p (count
);
4030 if (TARGET_PROLOGUE_USING_MOVE
4031 && cfun
->machine
->use_fast_prologue_epilogue
)
4032 frame
->save_regs_using_mov
= true;
4034 frame
->save_regs_using_mov
= false;
4037 /* Skip return address and saved base pointer. */
4038 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4040 frame
->hard_frame_pointer_offset
= offset
;
4042 /* Do some sanity checking of stack_alignment_needed and
4043 preferred_alignment, since i386 port is the only using those features
4044 that may break easily. */
4046 if (size
&& !stack_alignment_needed
)
4048 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4050 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4052 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4055 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4056 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4058 /* Register save area */
4059 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4062 if (ix86_save_varrargs_registers
)
4064 offset
+= X86_64_VARARGS_SIZE
;
4065 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4068 frame
->va_arg_size
= 0;
4070 /* Align start of frame for local function. */
4071 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4072 & -stack_alignment_needed
) - offset
;
4074 offset
+= frame
->padding1
;
4076 /* Frame pointer points here. */
4077 frame
->frame_pointer_offset
= offset
;
4081 /* Add outgoing arguments area. Can be skipped if we eliminated
4082 all the function calls as dead code.
4083 Skipping is however impossible when function calls alloca. Alloca
4084 expander assumes that last current_function_outgoing_args_size
4085 of stack frame are unused. */
4086 if (ACCUMULATE_OUTGOING_ARGS
4087 && (!current_function_is_leaf
|| current_function_calls_alloca
))
4089 offset
+= current_function_outgoing_args_size
;
4090 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4093 frame
->outgoing_arguments_size
= 0;
4095 /* Align stack boundary. Only needed if we're calling another function
4097 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4098 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4099 & -preferred_alignment
) - offset
;
4101 frame
->padding2
= 0;
4103 offset
+= frame
->padding2
;
4105 /* We've reached end of stack frame. */
4106 frame
->stack_pointer_offset
= offset
;
4108 /* Size prologue needs to allocate. */
4109 frame
->to_allocate
=
4110 (size
+ frame
->padding1
+ frame
->padding2
4111 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4113 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
4114 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
4115 frame
->save_regs_using_mov
= false;
4117 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4118 && current_function_is_leaf
)
4120 frame
->red_zone_size
= frame
->to_allocate
;
4121 if (frame
->save_regs_using_mov
)
4122 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
4123 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4124 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4127 frame
->red_zone_size
= 0;
4128 frame
->to_allocate
-= frame
->red_zone_size
;
4129 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4131 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4132 fprintf (stderr
, "size: %i\n", size
);
4133 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4134 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4135 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4136 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4137 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4138 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4139 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4140 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4141 frame
->hard_frame_pointer_offset
);
4142 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4146 /* Emit code to save registers in the prologue. */
4149 ix86_emit_save_regs (void)
4154 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4155 if (ix86_save_reg (regno
, true))
4157 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4158 RTX_FRAME_RELATED_P (insn
) = 1;
4162 /* Emit code to save registers using MOV insns. First register
4163 is restored from POINTER + OFFSET. */
4165 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
4170 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4171 if (ix86_save_reg (regno
, true))
4173 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4175 gen_rtx_REG (Pmode
, regno
));
4176 RTX_FRAME_RELATED_P (insn
) = 1;
4177 offset
+= UNITS_PER_WORD
;
4181 /* Expand prologue or epilogue stack adjustment.
4182 The pattern exist to put a dependency on all ebp-based memory accesses.
4183 STYLE should be negative if instructions should be marked as frame related,
4184 zero if %r11 register is live and cannot be freely used and positive
4188 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
4193 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
4194 else if (x86_64_immediate_operand (offset
, DImode
))
4195 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
4199 /* r11 is used by indirect sibcall return as well, set before the
4200 epilogue and used after the epilogue. ATM indirect sibcall
4201 shouldn't be used together with huge frame sizes in one
4202 function because of the frame_size check in sibcall.c. */
4205 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4206 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
4208 RTX_FRAME_RELATED_P (insn
) = 1;
4209 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
4213 RTX_FRAME_RELATED_P (insn
) = 1;
4216 /* Expand the prologue into a bunch of separate insns. */
4219 ix86_expand_prologue (void)
4223 struct ix86_frame frame
;
4224 HOST_WIDE_INT allocate
;
4226 ix86_compute_frame_layout (&frame
);
4228 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4229 slower on all targets. Also sdb doesn't like it. */
4231 if (frame_pointer_needed
)
4233 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4234 RTX_FRAME_RELATED_P (insn
) = 1;
4236 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4237 RTX_FRAME_RELATED_P (insn
) = 1;
4240 allocate
= frame
.to_allocate
;
4242 if (!frame
.save_regs_using_mov
)
4243 ix86_emit_save_regs ();
4245 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4247 /* When using red zone we may start register saving before allocating
4248 the stack frame saving one cycle of the prologue. */
4249 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
4250 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
4251 : stack_pointer_rtx
,
4252 -frame
.nregs
* UNITS_PER_WORD
);
4256 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4257 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4258 GEN_INT (-allocate
), -1);
4261 /* Only valid for Win32. */
4262 rtx eax
= gen_rtx_REG (SImode
, 0);
4263 bool eax_live
= ix86_eax_live_at_start_p ();
4270 emit_insn (gen_push (eax
));
4274 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
4275 RTX_FRAME_RELATED_P (insn
) = 1;
4277 insn
= emit_insn (gen_allocate_stack_worker (eax
));
4278 RTX_FRAME_RELATED_P (insn
) = 1;
4283 if (frame_pointer_needed
)
4284 t
= plus_constant (hard_frame_pointer_rtx
,
4287 - frame
.nregs
* UNITS_PER_WORD
);
4289 t
= plus_constant (stack_pointer_rtx
, allocate
);
4290 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
4294 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
4296 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4297 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4299 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4300 -frame
.nregs
* UNITS_PER_WORD
);
4303 pic_reg_used
= false;
4304 if (pic_offset_table_rtx
4305 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4306 || current_function_profile
))
4308 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4310 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4311 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4313 pic_reg_used
= true;
4318 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4320 /* Even with accurate pre-reload life analysis, we can wind up
4321 deleting all references to the pic register after reload.
4322 Consider if cross-jumping unifies two sides of a branch
4323 controlled by a comparison vs the only read from a global.
4324 In which case, allow the set_got to be deleted, though we're
4325 too late to do anything about the ebx save in the prologue. */
4326 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4329 /* Prevent function calls from be scheduled before the call to mcount.
4330 In the pic_reg_used case, make sure that the got load isn't deleted. */
4331 if (current_function_profile
)
4332 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4335 /* Emit code to restore saved registers using MOV insns. First register
4336 is restored from POINTER + OFFSET. */
4338 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
4339 int maybe_eh_return
)
4342 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
4344 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4345 if (ix86_save_reg (regno
, maybe_eh_return
))
4347 /* Ensure that adjust_address won't be forced to produce pointer
4348 out of range allowed by x86-64 instruction set. */
4349 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
4353 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4354 emit_move_insn (r11
, GEN_INT (offset
));
4355 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
4356 base_address
= gen_rtx_MEM (Pmode
, r11
);
4359 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4360 adjust_address (base_address
, Pmode
, offset
));
4361 offset
+= UNITS_PER_WORD
;
4365 /* Restore function stack, frame, and registers. */
4368 ix86_expand_epilogue (int style
)
4371 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4372 struct ix86_frame frame
;
4373 HOST_WIDE_INT offset
;
4375 ix86_compute_frame_layout (&frame
);
4377 /* Calculate start of saved registers relative to ebp. Special care
4378 must be taken for the normal return case of a function using
4379 eh_return: the eax and edx registers are marked as saved, but not
4380 restored along this path. */
4381 offset
= frame
.nregs
;
4382 if (current_function_calls_eh_return
&& style
!= 2)
4384 offset
*= -UNITS_PER_WORD
;
4386 /* If we're only restoring one register and sp is not valid then
4387 using a move instruction to restore the register since it's
4388 less work than reloading sp and popping the register.
4390 The default code result in stack adjustment using add/lea instruction,
4391 while this code results in LEAVE instruction (or discrete equivalent),
4392 so it is profitable in some other cases as well. Especially when there
4393 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4394 and there is exactly one register to pop. This heuristic may need some
4395 tuning in future. */
4396 if ((!sp_valid
&& frame
.nregs
<= 1)
4397 || (TARGET_EPILOGUE_USING_MOVE
4398 && cfun
->machine
->use_fast_prologue_epilogue
4399 && (frame
.nregs
> 1 || frame
.to_allocate
))
4400 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4401 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4402 && cfun
->machine
->use_fast_prologue_epilogue
4403 && frame
.nregs
== 1)
4404 || current_function_calls_eh_return
)
4406 /* Restore registers. We can use ebp or esp to address the memory
4407 locations. If both are available, default to ebp, since offsets
4408 are known to be small. Only exception is esp pointing directly to the
4409 end of block of saved registers, where we may simplify addressing
4412 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4413 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4414 frame
.to_allocate
, style
== 2);
4416 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4417 offset
, style
== 2);
4419 /* eh_return epilogues need %ecx added to the stack pointer. */
4422 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4424 if (frame_pointer_needed
)
4426 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4427 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4428 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4430 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4431 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4433 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
4438 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4439 tmp
= plus_constant (tmp
, (frame
.to_allocate
4440 + frame
.nregs
* UNITS_PER_WORD
));
4441 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4444 else if (!frame_pointer_needed
)
4445 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4446 GEN_INT (frame
.to_allocate
4447 + frame
.nregs
* UNITS_PER_WORD
),
4449 /* If not an i386, mov & pop is faster than "leave". */
4450 else if (TARGET_USE_LEAVE
|| optimize_size
4451 || !cfun
->machine
->use_fast_prologue_epilogue
)
4452 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4455 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4456 hard_frame_pointer_rtx
,
4459 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4461 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4466 /* First step is to deallocate the stack frame so that we can
4467 pop the registers. */
4470 if (!frame_pointer_needed
)
4472 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4473 hard_frame_pointer_rtx
,
4474 GEN_INT (offset
), style
);
4476 else if (frame
.to_allocate
)
4477 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4478 GEN_INT (frame
.to_allocate
), style
);
4480 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4481 if (ix86_save_reg (regno
, false))
4484 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4486 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4488 if (frame_pointer_needed
)
4490 /* Leave results in shorter dependency chains on CPUs that are
4491 able to grok it fast. */
4492 if (TARGET_USE_LEAVE
)
4493 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4494 else if (TARGET_64BIT
)
4495 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4497 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4501 /* Sibcall epilogues don't want a return instruction. */
4505 if (current_function_pops_args
&& current_function_args_size
)
4507 rtx popc
= GEN_INT (current_function_pops_args
);
4509 /* i386 can only pop 64K bytes. If asked to pop more, pop
4510 return address, do explicit add, and jump indirectly to the
4513 if (current_function_pops_args
>= 65536)
4515 rtx ecx
= gen_rtx_REG (SImode
, 2);
4517 /* There is no "pascal" calling convention in 64bit ABI. */
4521 emit_insn (gen_popsi1 (ecx
));
4522 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4523 emit_jump_insn (gen_return_indirect_internal (ecx
));
4526 emit_jump_insn (gen_return_pop_internal (popc
));
4529 emit_jump_insn (gen_return_internal ());
4532 /* Reset from the function's potential modifications. */
4535 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4536 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4538 if (pic_offset_table_rtx
)
4539 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4542 /* Extract the parts of an RTL expression that is a valid memory address
4543 for an instruction. Return 0 if the structure of the address is
4544 grossly off. Return -1 if the address contains ASHIFT, so it is not
4545 strictly valid, but still used for computing length of lea instruction. */
4548 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
4550 rtx base
= NULL_RTX
;
4551 rtx index
= NULL_RTX
;
4552 rtx disp
= NULL_RTX
;
4553 HOST_WIDE_INT scale
= 1;
4554 rtx scale_rtx
= NULL_RTX
;
4556 enum ix86_address_seg seg
= SEG_DEFAULT
;
4558 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4560 else if (GET_CODE (addr
) == PLUS
)
4570 addends
[n
++] = XEXP (op
, 1);
4573 while (GET_CODE (op
) == PLUS
);
4578 for (i
= n
; i
>= 0; --i
)
4581 switch (GET_CODE (op
))
4586 index
= XEXP (op
, 0);
4587 scale_rtx
= XEXP (op
, 1);
4591 if (XINT (op
, 1) == UNSPEC_TP
4592 && TARGET_TLS_DIRECT_SEG_REFS
4593 && seg
== SEG_DEFAULT
)
4594 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
4623 else if (GET_CODE (addr
) == MULT
)
4625 index
= XEXP (addr
, 0); /* index*scale */
4626 scale_rtx
= XEXP (addr
, 1);
4628 else if (GET_CODE (addr
) == ASHIFT
)
4632 /* We're called for lea too, which implements ashift on occasion. */
4633 index
= XEXP (addr
, 0);
4634 tmp
= XEXP (addr
, 1);
4635 if (GET_CODE (tmp
) != CONST_INT
)
4637 scale
= INTVAL (tmp
);
4638 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4644 disp
= addr
; /* displacement */
4646 /* Extract the integral value of scale. */
4649 if (GET_CODE (scale_rtx
) != CONST_INT
)
4651 scale
= INTVAL (scale_rtx
);
4654 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4655 if (base
&& index
&& scale
== 1
4656 && (index
== arg_pointer_rtx
4657 || index
== frame_pointer_rtx
4658 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
4665 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4666 if ((base
== hard_frame_pointer_rtx
4667 || base
== frame_pointer_rtx
4668 || base
== arg_pointer_rtx
) && !disp
)
4671 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4672 Avoid this by transforming to [%esi+0]. */
4673 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
4674 && base
&& !index
&& !disp
4676 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4679 /* Special case: encode reg+reg instead of reg*2. */
4680 if (!base
&& index
&& scale
&& scale
== 2)
4681 base
= index
, scale
= 1;
4683 /* Special case: scaling cannot be encoded without base or displacement. */
4684 if (!base
&& !disp
&& index
&& scale
!= 1)
4696 /* Return cost of the memory address x.
4697 For i386, it is better to use a complex address than let gcc copy
4698 the address into a reg and make a new pseudo. But not if the address
4699 requires to two regs - that would mean more pseudos with longer
4702 ix86_address_cost (rtx x
)
4704 struct ix86_address parts
;
4707 if (!ix86_decompose_address (x
, &parts
))
4710 /* More complex memory references are better. */
4711 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4713 if (parts
.seg
!= SEG_DEFAULT
)
4716 /* Attempt to minimize number of registers in the address. */
4718 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4720 && (!REG_P (parts
.index
)
4721 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4725 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4727 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4728 && parts
.base
!= parts
.index
)
4731 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4732 since it's predecode logic can't detect the length of instructions
4733 and it degenerates to vector decoded. Increase cost of such
4734 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4735 to split such addresses or even refuse such addresses at all.
4737 Following addressing modes are affected:
4742 The first and last case may be avoidable by explicitly coding the zero in
4743 memory address, but I don't have AMD-K6 machine handy to check this
4747 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4748 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4749 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4755 /* If X is a machine specific address (i.e. a symbol or label being
4756 referenced as a displacement from the GOT implemented using an
4757 UNSPEC), then return the base term. Otherwise return X. */
4760 ix86_find_base_term (rtx x
)
4766 if (GET_CODE (x
) != CONST
)
4769 if (GET_CODE (term
) == PLUS
4770 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4771 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4772 term
= XEXP (term
, 0);
4773 if (GET_CODE (term
) != UNSPEC
4774 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4777 term
= XVECEXP (term
, 0, 0);
4779 if (GET_CODE (term
) != SYMBOL_REF
4780 && GET_CODE (term
) != LABEL_REF
)
4786 term
= ix86_delegitimize_address (x
);
4788 if (GET_CODE (term
) != SYMBOL_REF
4789 && GET_CODE (term
) != LABEL_REF
)
4795 /* Determine if a given RTX is a valid constant. We already know this
4796 satisfies CONSTANT_P. */
4799 legitimate_constant_p (rtx x
)
4803 switch (GET_CODE (x
))
4806 /* TLS symbols are not constant. */
4807 if (tls_symbolic_operand (x
, Pmode
))
4812 inner
= XEXP (x
, 0);
4814 /* Offsets of TLS symbols are never valid.
4815 Discourage CSE from creating them. */
4816 if (GET_CODE (inner
) == PLUS
4817 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
4820 if (GET_CODE (inner
) == PLUS
4821 || GET_CODE (inner
) == MINUS
)
4823 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
4825 inner
= XEXP (inner
, 0);
4828 /* Only some unspecs are valid as "constants". */
4829 if (GET_CODE (inner
) == UNSPEC
)
4830 switch (XINT (inner
, 1))
4834 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4836 return local_dynamic_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4846 /* Otherwise we handle everything else in the move patterns. */
4850 /* Determine if it's legal to put X into the constant pool. This
4851 is not possible for the address of thread-local symbols, which
4852 is checked above. */
4855 ix86_cannot_force_const_mem (rtx x
)
4857 return !legitimate_constant_p (x
);
4860 /* Determine if a given RTX is a valid constant address. */
4863 constant_address_p (rtx x
)
4865 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
4868 /* Nonzero if the constant value X is a legitimate general operand
4869 when generating PIC code. It is given that flag_pic is on and
4870 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4873 legitimate_pic_operand_p (rtx x
)
4877 switch (GET_CODE (x
))
4880 inner
= XEXP (x
, 0);
4882 /* Only some unspecs are valid as "constants". */
4883 if (GET_CODE (inner
) == UNSPEC
)
4884 switch (XINT (inner
, 1))
4887 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4895 return legitimate_pic_address_disp_p (x
);
4902 /* Determine if a given CONST RTX is a valid memory displacement
4906 legitimate_pic_address_disp_p (rtx disp
)
4910 /* In 64bit mode we can allow direct addresses of symbols and labels
4911 when they are not dynamic symbols. */
4914 /* TLS references should always be enclosed in UNSPEC. */
4915 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
4917 if (GET_CODE (disp
) == SYMBOL_REF
4918 && ix86_cmodel
== CM_SMALL_PIC
4919 && SYMBOL_REF_LOCAL_P (disp
))
4921 if (GET_CODE (disp
) == LABEL_REF
)
4923 if (GET_CODE (disp
) == CONST
4924 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
4926 rtx op0
= XEXP (XEXP (disp
, 0), 0);
4927 rtx op1
= XEXP (XEXP (disp
, 0), 1);
4929 /* TLS references should always be enclosed in UNSPEC. */
4930 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
4932 if (((GET_CODE (op0
) == SYMBOL_REF
4933 && ix86_cmodel
== CM_SMALL_PIC
4934 && SYMBOL_REF_LOCAL_P (op0
))
4935 || GET_CODE (op0
) == LABEL_REF
)
4936 && GET_CODE (op1
) == CONST_INT
4937 && INTVAL (op1
) < 16*1024*1024
4938 && INTVAL (op1
) >= -16*1024*1024)
4942 if (GET_CODE (disp
) != CONST
)
4944 disp
= XEXP (disp
, 0);
4948 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4949 of GOT tables. We should not need these anyway. */
4950 if (GET_CODE (disp
) != UNSPEC
4951 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
4954 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4955 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4961 if (GET_CODE (disp
) == PLUS
)
4963 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4965 disp
= XEXP (disp
, 0);
4969 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4970 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
4972 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
4973 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
4974 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
4976 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
4977 if (! strcmp (sym_name
, "<pic base>"))
4982 if (GET_CODE (disp
) != UNSPEC
)
4985 switch (XINT (disp
, 1))
4990 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4992 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
4993 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
4994 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4996 case UNSPEC_GOTTPOFF
:
4997 case UNSPEC_GOTNTPOFF
:
4998 case UNSPEC_INDNTPOFF
:
5001 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5003 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5005 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5011 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5012 memory address for an instruction. The MODE argument is the machine mode
5013 for the MEM expression that wants to use this address.
5015 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5016 convert common non-canonical forms to canonical form so that they will
5020 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5022 struct ix86_address parts
;
5023 rtx base
, index
, disp
;
5024 HOST_WIDE_INT scale
;
5025 const char *reason
= NULL
;
5026 rtx reason_rtx
= NULL_RTX
;
5028 if (TARGET_DEBUG_ADDR
)
5031 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5032 GET_MODE_NAME (mode
), strict
);
5036 if (ix86_decompose_address (addr
, &parts
) <= 0)
5038 reason
= "decomposition failed";
5043 index
= parts
.index
;
5045 scale
= parts
.scale
;
5047 /* Validate base register.
5049 Don't allow SUBREG's here, it can lead to spill failures when the base
5050 is one word out of a two word structure, which is represented internally
5057 if (GET_CODE (base
) != REG
)
5059 reason
= "base is not a register";
5063 if (GET_MODE (base
) != Pmode
)
5065 reason
= "base is not in Pmode";
5069 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
5070 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
5072 reason
= "base is not valid";
5077 /* Validate index register.
5079 Don't allow SUBREG's here, it can lead to spill failures when the index
5080 is one word out of a two word structure, which is represented internally
5087 if (GET_CODE (index
) != REG
)
5089 reason
= "index is not a register";
5093 if (GET_MODE (index
) != Pmode
)
5095 reason
= "index is not in Pmode";
5099 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
5100 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
5102 reason
= "index is not valid";
5107 /* Validate scale factor. */
5110 reason_rtx
= GEN_INT (scale
);
5113 reason
= "scale without index";
5117 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5119 reason
= "scale is not a valid multiplier";
5124 /* Validate displacement. */
5129 if (GET_CODE (disp
) == CONST
5130 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5131 switch (XINT (XEXP (disp
, 0), 1))
5135 case UNSPEC_GOTPCREL
:
5138 goto is_legitimate_pic
;
5140 case UNSPEC_GOTTPOFF
:
5141 case UNSPEC_GOTNTPOFF
:
5142 case UNSPEC_INDNTPOFF
:
5148 reason
= "invalid address unspec";
5152 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5154 && !machopic_operand_p (disp
)
5159 if (TARGET_64BIT
&& (index
|| base
))
5161 /* foo@dtpoff(%rX) is ok. */
5162 if (GET_CODE (disp
) != CONST
5163 || GET_CODE (XEXP (disp
, 0)) != PLUS
5164 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5165 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5166 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5167 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5169 reason
= "non-constant pic memory reference";
5173 else if (! legitimate_pic_address_disp_p (disp
))
5175 reason
= "displacement is an invalid pic construct";
5179 /* This code used to verify that a symbolic pic displacement
5180 includes the pic_offset_table_rtx register.
5182 While this is good idea, unfortunately these constructs may
5183 be created by "adds using lea" optimization for incorrect
5192 This code is nonsensical, but results in addressing
5193 GOT table with pic_offset_table_rtx base. We can't
5194 just refuse it easily, since it gets matched by
5195 "addsi3" pattern, that later gets split to lea in the
5196 case output register differs from input. While this
5197 can be handled by separate addsi pattern for this case
5198 that never results in lea, this seems to be easier and
5199 correct fix for crash to disable this test. */
5201 else if (GET_CODE (disp
) != LABEL_REF
5202 && GET_CODE (disp
) != CONST_INT
5203 && (GET_CODE (disp
) != CONST
5204 || !legitimate_constant_p (disp
))
5205 && (GET_CODE (disp
) != SYMBOL_REF
5206 || !legitimate_constant_p (disp
)))
5208 reason
= "displacement is not constant";
5211 else if (TARGET_64BIT
5212 && !x86_64_immediate_operand (disp
, VOIDmode
))
5214 reason
= "displacement is out of range";
5219 /* Everything looks valid. */
5220 if (TARGET_DEBUG_ADDR
)
5221 fprintf (stderr
, "Success.\n");
5225 if (TARGET_DEBUG_ADDR
)
5227 fprintf (stderr
, "Error: %s\n", reason
);
5228 debug_rtx (reason_rtx
);
5233 /* Return an unique alias set for the GOT. */
5235 static HOST_WIDE_INT
5236 ix86_GOT_alias_set (void)
5238 static HOST_WIDE_INT set
= -1;
5240 set
= new_alias_set ();
5244 /* Return a legitimate reference for ORIG (an address) using the
5245 register REG. If REG is 0, a new pseudo is generated.
5247 There are two types of references that must be handled:
5249 1. Global data references must load the address from the GOT, via
5250 the PIC reg. An insn is emitted to do this load, and the reg is
5253 2. Static data references, constant pool addresses, and code labels
5254 compute the address as an offset from the GOT, whose base is in
5255 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5256 differentiate them from global data objects. The returned
5257 address is the PIC reg + an unspec constant.
5259 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5260 reg also appears in the address. */
5263 legitimize_pic_address (rtx orig
, rtx reg
)
5271 reg
= gen_reg_rtx (Pmode
);
5272 /* Use the generic Mach-O PIC machinery. */
5273 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5276 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5278 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5280 /* This symbol may be referenced via a displacement from the PIC
5281 base address (@GOTOFF). */
5283 if (reload_in_progress
)
5284 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5285 if (GET_CODE (addr
) == CONST
)
5286 addr
= XEXP (addr
, 0);
5287 if (GET_CODE (addr
) == PLUS
)
5289 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
5290 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
5293 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5294 new = gen_rtx_CONST (Pmode
, new);
5295 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5299 emit_move_insn (reg
, new);
5303 else if (GET_CODE (addr
) == SYMBOL_REF
)
5307 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5308 new = gen_rtx_CONST (Pmode
, new);
5309 new = gen_rtx_MEM (Pmode
, new);
5310 MEM_READONLY_P (new) = 1;
5311 set_mem_alias_set (new, ix86_GOT_alias_set ());
5314 reg
= gen_reg_rtx (Pmode
);
5315 /* Use directly gen_movsi, otherwise the address is loaded
5316 into register for CSE. We don't want to CSE this addresses,
5317 instead we CSE addresses from the GOT table, so skip this. */
5318 emit_insn (gen_movsi (reg
, new));
5323 /* This symbol must be referenced via a load from the
5324 Global Offset Table (@GOT). */
5326 if (reload_in_progress
)
5327 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5328 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5329 new = gen_rtx_CONST (Pmode
, new);
5330 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5331 new = gen_rtx_MEM (Pmode
, new);
5332 MEM_READONLY_P (new) = 1;
5333 set_mem_alias_set (new, ix86_GOT_alias_set ());
5336 reg
= gen_reg_rtx (Pmode
);
5337 emit_move_insn (reg
, new);
5343 if (GET_CODE (addr
) == CONST
)
5345 addr
= XEXP (addr
, 0);
5347 /* We must match stuff we generate before. Assume the only
5348 unspecs that can get here are ours. Not that we could do
5349 anything with them anyway.... */
5350 if (GET_CODE (addr
) == UNSPEC
5351 || (GET_CODE (addr
) == PLUS
5352 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5354 if (GET_CODE (addr
) != PLUS
)
5357 if (GET_CODE (addr
) == PLUS
)
5359 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5361 /* Check first to see if this is a constant offset from a @GOTOFF
5362 symbol reference. */
5363 if (local_symbolic_operand (op0
, Pmode
)
5364 && GET_CODE (op1
) == CONST_INT
)
5368 if (reload_in_progress
)
5369 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5370 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5372 new = gen_rtx_PLUS (Pmode
, new, op1
);
5373 new = gen_rtx_CONST (Pmode
, new);
5374 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5378 emit_move_insn (reg
, new);
5384 if (INTVAL (op1
) < -16*1024*1024
5385 || INTVAL (op1
) >= 16*1024*1024)
5386 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
5391 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5392 new = legitimize_pic_address (XEXP (addr
, 1),
5393 base
== reg
? NULL_RTX
: reg
);
5395 if (GET_CODE (new) == CONST_INT
)
5396 new = plus_constant (base
, INTVAL (new));
5399 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5401 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5402 new = XEXP (new, 1);
5404 new = gen_rtx_PLUS (Pmode
, base
, new);
5412 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5415 get_thread_pointer (int to_reg
)
5419 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5423 reg
= gen_reg_rtx (Pmode
);
5424 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
5425 insn
= emit_insn (insn
);
5430 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5431 false if we expect this to be used for a memory address and true if
5432 we expect to load the address into a register. */
5435 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
5437 rtx dest
, base
, off
, pic
;
5442 case TLS_MODEL_GLOBAL_DYNAMIC
:
5443 dest
= gen_reg_rtx (Pmode
);
5446 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5449 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5450 insns
= get_insns ();
5453 emit_libcall_block (insns
, dest
, rax
, x
);
5456 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5459 case TLS_MODEL_LOCAL_DYNAMIC
:
5460 base
= gen_reg_rtx (Pmode
);
5463 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5466 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5467 insns
= get_insns ();
5470 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5471 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5472 emit_libcall_block (insns
, base
, rax
, note
);
5475 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5477 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5478 off
= gen_rtx_CONST (Pmode
, off
);
5480 return gen_rtx_PLUS (Pmode
, base
, off
);
5482 case TLS_MODEL_INITIAL_EXEC
:
5486 type
= UNSPEC_GOTNTPOFF
;
5490 if (reload_in_progress
)
5491 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5492 pic
= pic_offset_table_rtx
;
5493 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5495 else if (!TARGET_GNU_TLS
)
5497 pic
= gen_reg_rtx (Pmode
);
5498 emit_insn (gen_set_got (pic
));
5499 type
= UNSPEC_GOTTPOFF
;
5504 type
= UNSPEC_INDNTPOFF
;
5507 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5508 off
= gen_rtx_CONST (Pmode
, off
);
5510 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5511 off
= gen_rtx_MEM (Pmode
, off
);
5512 MEM_READONLY_P (off
) = 1;
5513 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5515 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5517 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5518 off
= force_reg (Pmode
, off
);
5519 return gen_rtx_PLUS (Pmode
, base
, off
);
5523 base
= get_thread_pointer (true);
5524 dest
= gen_reg_rtx (Pmode
);
5525 emit_insn (gen_subsi3 (dest
, base
, off
));
5529 case TLS_MODEL_LOCAL_EXEC
:
5530 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5531 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5532 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5533 off
= gen_rtx_CONST (Pmode
, off
);
5535 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5537 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5538 return gen_rtx_PLUS (Pmode
, base
, off
);
5542 base
= get_thread_pointer (true);
5543 dest
= gen_reg_rtx (Pmode
);
5544 emit_insn (gen_subsi3 (dest
, base
, off
));
5555 /* Try machine-dependent ways of modifying an illegitimate address
5556 to be legitimate. If we find one, return the new, valid address.
5557 This macro is used in only one place: `memory_address' in explow.c.
5559 OLDX is the address as it was before break_out_memory_refs was called.
5560 In some cases it is useful to look at this to decide what needs to be done.
5562 MODE and WIN are passed so that this macro can use
5563 GO_IF_LEGITIMATE_ADDRESS.
5565 It is always safe for this macro to do nothing. It exists to recognize
5566 opportunities to optimize the output.
5568 For the 80386, we handle X+REG by loading X into a register R and
5569 using R+REG. R will go in a general reg and indexing will be used.
5570 However, if REG is a broken-out memory address or multiplication,
5571 nothing needs to be done because REG can certainly go in a general reg.
5573 When -fpic is used, special handling is needed for symbolic references.
5574 See comments by legitimize_pic_address in i386.c for details. */
5577 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
5582 if (TARGET_DEBUG_ADDR
)
5584 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5585 GET_MODE_NAME (mode
));
5589 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
5591 return legitimize_tls_address (x
, log
, false);
5592 if (GET_CODE (x
) == CONST
5593 && GET_CODE (XEXP (x
, 0)) == PLUS
5594 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
5595 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
5597 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
5598 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
5601 if (flag_pic
&& SYMBOLIC_CONST (x
))
5602 return legitimize_pic_address (x
, 0);
5604 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5605 if (GET_CODE (x
) == ASHIFT
5606 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5607 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5610 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5611 GEN_INT (1 << log
));
5614 if (GET_CODE (x
) == PLUS
)
5616 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5618 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5619 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5620 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5623 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5624 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5625 GEN_INT (1 << log
));
5628 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5629 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5630 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5633 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5634 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5635 GEN_INT (1 << log
));
5638 /* Put multiply first if it isn't already. */
5639 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5641 rtx tmp
= XEXP (x
, 0);
5642 XEXP (x
, 0) = XEXP (x
, 1);
5647 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5648 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5649 created by virtual register instantiation, register elimination, and
5650 similar optimizations. */
5651 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5654 x
= gen_rtx_PLUS (Pmode
,
5655 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5656 XEXP (XEXP (x
, 1), 0)),
5657 XEXP (XEXP (x
, 1), 1));
5661 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5662 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5663 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5664 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5665 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5666 && CONSTANT_P (XEXP (x
, 1)))
5669 rtx other
= NULL_RTX
;
5671 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5673 constant
= XEXP (x
, 1);
5674 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5676 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5678 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5679 other
= XEXP (x
, 1);
5687 x
= gen_rtx_PLUS (Pmode
,
5688 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5689 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5690 plus_constant (other
, INTVAL (constant
)));
5694 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5697 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5700 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5703 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5706 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5710 && GET_CODE (XEXP (x
, 1)) == REG
5711 && GET_CODE (XEXP (x
, 0)) == REG
)
5714 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5717 x
= legitimize_pic_address (x
, 0);
5720 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5723 if (GET_CODE (XEXP (x
, 0)) == REG
)
5725 rtx temp
= gen_reg_rtx (Pmode
);
5726 rtx val
= force_operand (XEXP (x
, 1), temp
);
5728 emit_move_insn (temp
, val
);
5734 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5736 rtx temp
= gen_reg_rtx (Pmode
);
5737 rtx val
= force_operand (XEXP (x
, 0), temp
);
5739 emit_move_insn (temp
, val
);
5749 /* Print an integer constant expression in assembler syntax. Addition
5750 and subtraction are the only arithmetic that may appear in these
5751 expressions. FILE is the stdio stream to write to, X is the rtx, and
5752 CODE is the operand print code from the output string. */
5755 output_pic_addr_const (FILE *file
, rtx x
, int code
)
5759 switch (GET_CODE (x
))
5769 /* Mark the decl as referenced so that cgraph will output the function. */
5770 if (SYMBOL_REF_DECL (x
))
5771 mark_decl_referenced (SYMBOL_REF_DECL (x
));
5773 assemble_name (file
, XSTR (x
, 0));
5774 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
5775 fputs ("@PLT", file
);
5782 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5783 assemble_name (asm_out_file
, buf
);
5787 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5791 /* This used to output parentheses around the expression,
5792 but that does not work on the 386 (either ATT or BSD assembler). */
5793 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5797 if (GET_MODE (x
) == VOIDmode
)
5799 /* We can use %d if the number is <32 bits and positive. */
5800 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5801 fprintf (file
, "0x%lx%08lx",
5802 (unsigned long) CONST_DOUBLE_HIGH (x
),
5803 (unsigned long) CONST_DOUBLE_LOW (x
));
5805 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5808 /* We can't handle floating point constants;
5809 PRINT_OPERAND must handle them. */
5810 output_operand_lossage ("floating constant misused");
5814 /* Some assemblers need integer constants to appear first. */
5815 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5817 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5819 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5821 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5823 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5825 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5833 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5834 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5836 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5838 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5842 if (XVECLEN (x
, 0) != 1)
5844 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5845 switch (XINT (x
, 1))
5848 fputs ("@GOT", file
);
5851 fputs ("@GOTOFF", file
);
5853 case UNSPEC_GOTPCREL
:
5854 fputs ("@GOTPCREL(%rip)", file
);
5856 case UNSPEC_GOTTPOFF
:
5857 /* FIXME: This might be @TPOFF in Sun ld too. */
5858 fputs ("@GOTTPOFF", file
);
5861 fputs ("@TPOFF", file
);
5865 fputs ("@TPOFF", file
);
5867 fputs ("@NTPOFF", file
);
5870 fputs ("@DTPOFF", file
);
5872 case UNSPEC_GOTNTPOFF
:
5874 fputs ("@GOTTPOFF(%rip)", file
);
5876 fputs ("@GOTNTPOFF", file
);
5878 case UNSPEC_INDNTPOFF
:
5879 fputs ("@INDNTPOFF", file
);
5882 output_operand_lossage ("invalid UNSPEC as operand");
5888 output_operand_lossage ("invalid expression as operand");
5892 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5893 We need to handle our special PIC relocations. */
5896 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
5899 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5903 fprintf (file
, "%s", ASM_LONG
);
5906 output_pic_addr_const (file
, x
, '\0');
5908 output_addr_const (file
, x
);
5912 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5913 We need to emit DTP-relative relocations. */
5916 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5918 fputs (ASM_LONG
, file
);
5919 output_addr_const (file
, x
);
5920 fputs ("@DTPOFF", file
);
5926 fputs (", 0", file
);
5933 /* In the name of slightly smaller debug output, and to cater to
5934 general assembler losage, recognize PIC+GOTOFF and turn it back
5935 into a direct symbol reference. */
5938 ix86_delegitimize_address (rtx orig_x
)
5942 if (GET_CODE (x
) == MEM
)
5947 if (GET_CODE (x
) != CONST
5948 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5949 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
5950 || GET_CODE (orig_x
) != MEM
)
5952 return XVECEXP (XEXP (x
, 0), 0, 0);
5955 if (GET_CODE (x
) != PLUS
5956 || GET_CODE (XEXP (x
, 1)) != CONST
)
5959 if (GET_CODE (XEXP (x
, 0)) == REG
5960 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5961 /* %ebx + GOT/GOTOFF */
5963 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5965 /* %ebx + %reg * scale + GOT/GOTOFF */
5967 if (GET_CODE (XEXP (y
, 0)) == REG
5968 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5970 else if (GET_CODE (XEXP (y
, 1)) == REG
5971 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5975 if (GET_CODE (y
) != REG
5976 && GET_CODE (y
) != MULT
5977 && GET_CODE (y
) != ASHIFT
)
5983 x
= XEXP (XEXP (x
, 1), 0);
5984 if (GET_CODE (x
) == UNSPEC
5985 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5986 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
5989 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
5990 return XVECEXP (x
, 0, 0);
5993 if (GET_CODE (x
) == PLUS
5994 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5995 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5996 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5997 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
5998 && GET_CODE (orig_x
) != MEM
)))
6000 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6002 return gen_rtx_PLUS (Pmode
, y
, x
);
6010 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6015 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6017 enum rtx_code second_code
, bypass_code
;
6018 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6019 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6021 code
= ix86_fp_compare_code_to_integer (code
);
6025 code
= reverse_condition (code
);
6036 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6041 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6042 Those same assemblers have the same but opposite losage on cmov. */
6045 suffix
= fp
? "nbe" : "a";
6048 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6050 else if (mode
== CCmode
|| mode
== CCGCmode
)
6061 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6063 else if (mode
== CCmode
|| mode
== CCGCmode
)
6072 suffix
= fp
? "nb" : "ae";
6075 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6085 suffix
= fp
? "u" : "p";
6088 suffix
= fp
? "nu" : "np";
6093 fputs (suffix
, file
);
6096 /* Print the name of register X to FILE based on its machine mode and number.
6097 If CODE is 'w', pretend the mode is HImode.
6098 If CODE is 'b', pretend the mode is QImode.
6099 If CODE is 'k', pretend the mode is SImode.
6100 If CODE is 'q', pretend the mode is DImode.
6101 If CODE is 'h', pretend the reg is the `high' byte register.
6102 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6105 print_reg (rtx x
, int code
, FILE *file
)
6107 if (REGNO (x
) == ARG_POINTER_REGNUM
6108 || REGNO (x
) == FRAME_POINTER_REGNUM
6109 || REGNO (x
) == FLAGS_REG
6110 || REGNO (x
) == FPSR_REG
)
6113 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6116 if (code
== 'w' || MMX_REG_P (x
))
6118 else if (code
== 'b')
6120 else if (code
== 'k')
6122 else if (code
== 'q')
6124 else if (code
== 'y')
6126 else if (code
== 'h')
6129 code
= GET_MODE_SIZE (GET_MODE (x
));
6131 /* Irritatingly, AMD extended registers use different naming convention
6132 from the normal registers. */
6133 if (REX_INT_REG_P (x
))
6140 error ("extended registers have no high halves");
6143 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6146 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6149 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6152 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6155 error ("unsupported operand size for extended register");
6163 if (STACK_TOP_P (x
))
6165 fputs ("st(0)", file
);
6172 if (! ANY_FP_REG_P (x
))
6173 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6178 fputs (hi_reg_name
[REGNO (x
)], file
);
6181 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
6183 fputs (qi_reg_name
[REGNO (x
)], file
);
6186 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
6188 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6195 /* Locate some local-dynamic symbol still in use by this function
6196 so that we can print its name in some tls_local_dynamic_base
6200 get_some_local_dynamic_name (void)
6204 if (cfun
->machine
->some_ld_name
)
6205 return cfun
->machine
->some_ld_name
;
6207 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6209 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6210 return cfun
->machine
->some_ld_name
;
6216 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
6220 if (GET_CODE (x
) == SYMBOL_REF
6221 && local_dynamic_symbolic_operand (x
, Pmode
))
6223 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6231 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6232 C -- print opcode suffix for set/cmov insn.
6233 c -- like C, but print reversed condition
6234 F,f -- likewise, but for floating-point.
6235 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6237 R -- print the prefix for register names.
6238 z -- print the opcode suffix for the size of the current operand.
6239 * -- print a star (in certain assembler syntax)
6240 A -- print an absolute memory reference.
6241 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6242 s -- print a shift double count, followed by the assemblers argument
6244 b -- print the QImode name of the register for the indicated operand.
6245 %b0 would print %al if operands[0] is reg 0.
6246 w -- likewise, print the HImode name of the register.
6247 k -- likewise, print the SImode name of the register.
6248 q -- likewise, print the DImode name of the register.
6249 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6250 y -- print "st(0)" instead of "st" as a register.
6251 D -- print condition for SSE cmp instruction.
6252 P -- if PIC, print an @PLT suffix.
6253 X -- don't print any sort of PIC '@' suffix for a symbol.
6254 & -- print some in-use local-dynamic symbol name.
6258 print_operand (FILE *file
, rtx x
, int code
)
6265 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6270 assemble_name (file
, get_some_local_dynamic_name ());
6274 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6276 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6278 /* Intel syntax. For absolute addresses, registers should not
6279 be surrounded by braces. */
6280 if (GET_CODE (x
) != REG
)
6283 PRINT_OPERAND (file
, x
, 0);
6291 PRINT_OPERAND (file
, x
, 0);
6296 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6301 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6306 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6311 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6316 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6321 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6326 /* 387 opcodes don't get size suffixes if the operands are
6328 if (STACK_REG_P (x
))
6331 /* Likewise if using Intel opcodes. */
6332 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6335 /* This is the size of op from size of operand. */
6336 switch (GET_MODE_SIZE (GET_MODE (x
)))
6339 #ifdef HAVE_GAS_FILDS_FISTS
6345 if (GET_MODE (x
) == SFmode
)
6360 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6362 #ifdef GAS_MNEMONICS
6388 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6390 PRINT_OPERAND (file
, x
, 0);
6396 /* Little bit of braindamage here. The SSE compare instructions
6397 does use completely different names for the comparisons that the
6398 fp conditional moves. */
6399 switch (GET_CODE (x
))
6414 fputs ("unord", file
);
6418 fputs ("neq", file
);
6422 fputs ("nlt", file
);
6426 fputs ("nle", file
);
6429 fputs ("ord", file
);
6437 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6438 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6440 switch (GET_MODE (x
))
6442 case HImode
: putc ('w', file
); break;
6444 case SFmode
: putc ('l', file
); break;
6446 case DFmode
: putc ('q', file
); break;
6454 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6457 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6458 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6461 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6464 /* Like above, but reverse condition */
6466 /* Check to see if argument to %c is really a constant
6467 and not a condition code which needs to be reversed. */
6468 if (!COMPARISON_P (x
))
6470 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6473 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6476 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6477 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6480 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6486 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6489 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6492 int pred_val
= INTVAL (XEXP (x
, 0));
6494 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6495 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6497 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6498 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6500 /* Emit hints only in the case default branch prediction
6501 heuristics would fail. */
6502 if (taken
!= cputaken
)
6504 /* We use 3e (DS) prefix for taken branches and
6505 2e (CS) prefix for not taken branches. */
6507 fputs ("ds ; ", file
);
6509 fputs ("cs ; ", file
);
6516 output_operand_lossage ("invalid operand code `%c'", code
);
6520 if (GET_CODE (x
) == REG
)
6521 print_reg (x
, code
, file
);
6523 else if (GET_CODE (x
) == MEM
)
6525 /* No `byte ptr' prefix for call instructions. */
6526 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6529 switch (GET_MODE_SIZE (GET_MODE (x
)))
6531 case 1: size
= "BYTE"; break;
6532 case 2: size
= "WORD"; break;
6533 case 4: size
= "DWORD"; break;
6534 case 8: size
= "QWORD"; break;
6535 case 12: size
= "XWORD"; break;
6536 case 16: size
= "XMMWORD"; break;
6541 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6544 else if (code
== 'w')
6546 else if (code
== 'k')
6550 fputs (" PTR ", file
);
6554 /* Avoid (%rip) for call operands. */
6555 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6556 && GET_CODE (x
) != CONST_INT
)
6557 output_addr_const (file
, x
);
6558 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6559 output_operand_lossage ("invalid constraints for operand");
6564 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6569 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6570 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6572 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6574 fprintf (file
, "0x%08lx", l
);
6577 /* These float cases don't actually occur as immediate operands. */
6578 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6582 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6583 fprintf (file
, "%s", dstr
);
6586 else if (GET_CODE (x
) == CONST_DOUBLE
6587 && GET_MODE (x
) == XFmode
)
6591 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6592 fprintf (file
, "%s", dstr
);
6599 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6601 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6604 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6605 || GET_CODE (x
) == LABEL_REF
)
6607 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6610 fputs ("OFFSET FLAT:", file
);
6613 if (GET_CODE (x
) == CONST_INT
)
6614 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6616 output_pic_addr_const (file
, x
, code
);
6618 output_addr_const (file
, x
);
6622 /* Print a memory operand whose address is ADDR. */
6625 print_operand_address (FILE *file
, rtx addr
)
6627 struct ix86_address parts
;
6628 rtx base
, index
, disp
;
6631 if (! ix86_decompose_address (addr
, &parts
))
6635 index
= parts
.index
;
6637 scale
= parts
.scale
;
6645 if (USER_LABEL_PREFIX
[0] == 0)
6647 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
6653 if (!base
&& !index
)
6655 /* Displacement only requires special attention. */
6657 if (GET_CODE (disp
) == CONST_INT
)
6659 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
6661 if (USER_LABEL_PREFIX
[0] == 0)
6663 fputs ("ds:", file
);
6665 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
6668 output_pic_addr_const (file
, disp
, 0);
6670 output_addr_const (file
, disp
);
6672 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6674 && ((GET_CODE (disp
) == SYMBOL_REF
6675 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
6676 || GET_CODE (disp
) == LABEL_REF
6677 || (GET_CODE (disp
) == CONST
6678 && GET_CODE (XEXP (disp
, 0)) == PLUS
6679 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
6680 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
6681 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
6682 fputs ("(%rip)", file
);
6686 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6691 output_pic_addr_const (file
, disp
, 0);
6692 else if (GET_CODE (disp
) == LABEL_REF
)
6693 output_asm_label (disp
);
6695 output_addr_const (file
, disp
);
6700 print_reg (base
, 0, file
);
6704 print_reg (index
, 0, file
);
6706 fprintf (file
, ",%d", scale
);
6712 rtx offset
= NULL_RTX
;
6716 /* Pull out the offset of a symbol; print any symbol itself. */
6717 if (GET_CODE (disp
) == CONST
6718 && GET_CODE (XEXP (disp
, 0)) == PLUS
6719 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6721 offset
= XEXP (XEXP (disp
, 0), 1);
6722 disp
= gen_rtx_CONST (VOIDmode
,
6723 XEXP (XEXP (disp
, 0), 0));
6727 output_pic_addr_const (file
, disp
, 0);
6728 else if (GET_CODE (disp
) == LABEL_REF
)
6729 output_asm_label (disp
);
6730 else if (GET_CODE (disp
) == CONST_INT
)
6733 output_addr_const (file
, disp
);
6739 print_reg (base
, 0, file
);
6742 if (INTVAL (offset
) >= 0)
6744 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6748 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6755 print_reg (index
, 0, file
);
6757 fprintf (file
, "*%d", scale
);
6765 output_addr_const_extra (FILE *file
, rtx x
)
6769 if (GET_CODE (x
) != UNSPEC
)
6772 op
= XVECEXP (x
, 0, 0);
6773 switch (XINT (x
, 1))
6775 case UNSPEC_GOTTPOFF
:
6776 output_addr_const (file
, op
);
6777 /* FIXME: This might be @TPOFF in Sun ld. */
6778 fputs ("@GOTTPOFF", file
);
6781 output_addr_const (file
, op
);
6782 fputs ("@TPOFF", file
);
6785 output_addr_const (file
, op
);
6787 fputs ("@TPOFF", file
);
6789 fputs ("@NTPOFF", file
);
6792 output_addr_const (file
, op
);
6793 fputs ("@DTPOFF", file
);
6795 case UNSPEC_GOTNTPOFF
:
6796 output_addr_const (file
, op
);
6798 fputs ("@GOTTPOFF(%rip)", file
);
6800 fputs ("@GOTNTPOFF", file
);
6802 case UNSPEC_INDNTPOFF
:
6803 output_addr_const (file
, op
);
6804 fputs ("@INDNTPOFF", file
);
6814 /* Split one or more DImode RTL references into pairs of SImode
6815 references. The RTL can be REG, offsettable MEM, integer constant, or
6816 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6817 split and "num" is its length. lo_half and hi_half are output arrays
6818 that parallel "operands". */
6821 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6825 rtx op
= operands
[num
];
6827 /* simplify_subreg refuse to split volatile memory addresses,
6828 but we still have to handle it. */
6829 if (GET_CODE (op
) == MEM
)
6831 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6832 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6836 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6837 GET_MODE (op
) == VOIDmode
6838 ? DImode
: GET_MODE (op
), 0);
6839 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6840 GET_MODE (op
) == VOIDmode
6841 ? DImode
: GET_MODE (op
), 4);
6845 /* Split one or more TImode RTL references into pairs of SImode
6846 references. The RTL can be REG, offsettable MEM, integer constant, or
6847 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6848 split and "num" is its length. lo_half and hi_half are output arrays
6849 that parallel "operands". */
6852 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6856 rtx op
= operands
[num
];
6858 /* simplify_subreg refuse to split volatile memory addresses, but we
6859 still have to handle it. */
6860 if (GET_CODE (op
) == MEM
)
6862 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6863 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6867 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6868 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6873 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6874 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6875 is the expression of the binary operation. The output may either be
6876 emitted here, or returned to the caller, like all output_* functions.
6878 There is no guarantee that the operands are the same mode, as they
6879 might be within FLOAT or FLOAT_EXTEND expressions. */
6881 #ifndef SYSV386_COMPAT
6882 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6883 wants to fix the assemblers because that causes incompatibility
6884 with gcc. No-one wants to fix gcc because that causes
6885 incompatibility with assemblers... You can use the option of
6886 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6887 #define SYSV386_COMPAT 1
6891 output_387_binary_op (rtx insn
, rtx
*operands
)
6893 static char buf
[30];
6896 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6898 #ifdef ENABLE_CHECKING
6899 /* Even if we do not want to check the inputs, this documents input
6900 constraints. Which helps in understanding the following code. */
6901 if (STACK_REG_P (operands
[0])
6902 && ((REG_P (operands
[1])
6903 && REGNO (operands
[0]) == REGNO (operands
[1])
6904 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6905 || (REG_P (operands
[2])
6906 && REGNO (operands
[0]) == REGNO (operands
[2])
6907 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6908 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6914 switch (GET_CODE (operands
[3]))
6917 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6918 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6926 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6927 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6935 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6936 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6944 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6945 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6959 if (GET_MODE (operands
[0]) == SFmode
)
6960 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6962 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6967 switch (GET_CODE (operands
[3]))
6971 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6973 rtx temp
= operands
[2];
6974 operands
[2] = operands
[1];
6978 /* know operands[0] == operands[1]. */
6980 if (GET_CODE (operands
[2]) == MEM
)
6986 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6988 if (STACK_TOP_P (operands
[0]))
6989 /* How is it that we are storing to a dead operand[2]?
6990 Well, presumably operands[1] is dead too. We can't
6991 store the result to st(0) as st(0) gets popped on this
6992 instruction. Instead store to operands[2] (which I
6993 think has to be st(1)). st(1) will be popped later.
6994 gcc <= 2.8.1 didn't have this check and generated
6995 assembly code that the Unixware assembler rejected. */
6996 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6998 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7002 if (STACK_TOP_P (operands
[0]))
7003 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7005 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7010 if (GET_CODE (operands
[1]) == MEM
)
7016 if (GET_CODE (operands
[2]) == MEM
)
7022 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7025 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7026 derived assemblers, confusingly reverse the direction of
7027 the operation for fsub{r} and fdiv{r} when the
7028 destination register is not st(0). The Intel assembler
7029 doesn't have this brain damage. Read !SYSV386_COMPAT to
7030 figure out what the hardware really does. */
7031 if (STACK_TOP_P (operands
[0]))
7032 p
= "{p\t%0, %2|rp\t%2, %0}";
7034 p
= "{rp\t%2, %0|p\t%0, %2}";
7036 if (STACK_TOP_P (operands
[0]))
7037 /* As above for fmul/fadd, we can't store to st(0). */
7038 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7040 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7045 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7048 if (STACK_TOP_P (operands
[0]))
7049 p
= "{rp\t%0, %1|p\t%1, %0}";
7051 p
= "{p\t%1, %0|rp\t%0, %1}";
7053 if (STACK_TOP_P (operands
[0]))
7054 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7056 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7061 if (STACK_TOP_P (operands
[0]))
7063 if (STACK_TOP_P (operands
[1]))
7064 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7066 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7069 else if (STACK_TOP_P (operands
[1]))
7072 p
= "{\t%1, %0|r\t%0, %1}";
7074 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7080 p
= "{r\t%2, %0|\t%0, %2}";
7082 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7095 /* Output code to initialize control word copies used by
7096 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7097 is set to control word rounding downwards. */
7099 emit_i387_cw_initialization (rtx normal
, rtx round_down
)
7101 rtx reg
= gen_reg_rtx (HImode
);
7103 emit_insn (gen_x86_fnstcw_1 (normal
));
7104 emit_move_insn (reg
, normal
);
7105 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7107 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7109 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7110 emit_move_insn (round_down
, reg
);
7113 /* Output code for INSN to convert a float to a signed int. OPERANDS
7114 are the insn operands. The output may be [HSD]Imode and the input
7115 operand may be [SDX]Fmode. */
7118 output_fix_trunc (rtx insn
, rtx
*operands
)
7120 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7121 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7123 /* Jump through a hoop or two for DImode, since the hardware has no
7124 non-popping instruction. We used to do this a different way, but
7125 that was somewhat fragile and broke with post-reload splitters. */
7126 if (dimode_p
&& !stack_top_dies
)
7127 output_asm_insn ("fld\t%y1", operands
);
7129 if (!STACK_TOP_P (operands
[1]))
7132 if (GET_CODE (operands
[0]) != MEM
)
7135 output_asm_insn ("fldcw\t%3", operands
);
7136 if (stack_top_dies
|| dimode_p
)
7137 output_asm_insn ("fistp%z0\t%0", operands
);
7139 output_asm_insn ("fist%z0\t%0", operands
);
7140 output_asm_insn ("fldcw\t%2", operands
);
7145 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7146 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7147 when fucom should be used. */
7150 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
7153 rtx cmp_op0
= operands
[0];
7154 rtx cmp_op1
= operands
[1];
7155 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7160 cmp_op1
= operands
[2];
7164 if (GET_MODE (operands
[0]) == SFmode
)
7166 return "ucomiss\t{%1, %0|%0, %1}";
7168 return "comiss\t{%1, %0|%0, %1}";
7171 return "ucomisd\t{%1, %0|%0, %1}";
7173 return "comisd\t{%1, %0|%0, %1}";
7176 if (! STACK_TOP_P (cmp_op0
))
7179 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7181 if (STACK_REG_P (cmp_op1
)
7183 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7184 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7186 /* If both the top of the 387 stack dies, and the other operand
7187 is also a stack register that dies, then this must be a
7188 `fcompp' float compare */
7192 /* There is no double popping fcomi variant. Fortunately,
7193 eflags is immune from the fstp's cc clobbering. */
7195 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7197 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7198 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
7205 return "fucompp\n\tfnstsw\t%0";
7207 return "fcompp\n\tfnstsw\t%0";
7220 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7222 static const char * const alt
[24] =
7234 "fcomi\t{%y1, %0|%0, %y1}",
7235 "fcomip\t{%y1, %0|%0, %y1}",
7236 "fucomi\t{%y1, %0|%0, %y1}",
7237 "fucomip\t{%y1, %0|%0, %y1}",
7244 "fcom%z2\t%y2\n\tfnstsw\t%0",
7245 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7246 "fucom%z2\t%y2\n\tfnstsw\t%0",
7247 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7249 "ficom%z2\t%y2\n\tfnstsw\t%0",
7250 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7258 mask
= eflags_p
<< 3;
7259 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7260 mask
|= unordered_p
<< 1;
7261 mask
|= stack_top_dies
;
7274 ix86_output_addr_vec_elt (FILE *file
, int value
)
7276 const char *directive
= ASM_LONG
;
7281 directive
= ASM_QUAD
;
7287 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7291 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
7294 fprintf (file
, "%s%s%d-%s%d\n",
7295 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7296 else if (HAVE_AS_GOTOFF_IN_DATA
)
7297 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7299 else if (TARGET_MACHO
)
7301 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
7302 machopic_output_function_base_name (file
);
7303 fprintf(file
, "\n");
7307 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7308 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7311 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7315 ix86_expand_clear (rtx dest
)
7319 /* We play register width games, which are only valid after reload. */
7320 if (!reload_completed
)
7323 /* Avoid HImode and its attendant prefix byte. */
7324 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7325 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7327 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7329 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7330 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7332 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7333 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7339 /* X is an unchanging MEM. If it is a constant pool reference, return
7340 the constant pool rtx, else NULL. */
7343 maybe_get_pool_constant (rtx x
)
7345 x
= ix86_delegitimize_address (XEXP (x
, 0));
7347 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7348 return get_pool_constant (x
);
7354 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
7356 int strict
= (reload_in_progress
|| reload_completed
);
7358 enum tls_model model
;
7363 model
= GET_CODE (op1
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (op1
) : 0;
7366 op1
= legitimize_tls_address (op1
, model
, true);
7367 op1
= force_operand (op1
, op0
);
7372 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7377 rtx temp
= ((reload_in_progress
7378 || ((op0
&& GET_CODE (op0
) == REG
)
7380 ? op0
: gen_reg_rtx (Pmode
));
7381 op1
= machopic_indirect_data_reference (op1
, temp
);
7382 op1
= machopic_legitimize_pic_address (op1
, mode
,
7383 temp
== op1
? 0 : temp
);
7385 else if (MACHOPIC_INDIRECT
)
7386 op1
= machopic_indirect_data_reference (op1
, 0);
7390 if (GET_CODE (op0
) == MEM
)
7391 op1
= force_reg (Pmode
, op1
);
7393 op1
= legitimize_address (op1
, op1
, Pmode
);
7394 #endif /* TARGET_MACHO */
7398 if (GET_CODE (op0
) == MEM
7399 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7400 || !push_operand (op0
, mode
))
7401 && GET_CODE (op1
) == MEM
)
7402 op1
= force_reg (mode
, op1
);
7404 if (push_operand (op0
, mode
)
7405 && ! general_no_elim_operand (op1
, mode
))
7406 op1
= copy_to_mode_reg (mode
, op1
);
7408 /* Force large constants in 64bit compilation into register
7409 to get them CSEed. */
7410 if (TARGET_64BIT
&& mode
== DImode
7411 && immediate_operand (op1
, mode
)
7412 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
7413 && !register_operand (op0
, mode
)
7414 && optimize
&& !reload_completed
&& !reload_in_progress
)
7415 op1
= copy_to_mode_reg (mode
, op1
);
7417 if (FLOAT_MODE_P (mode
))
7419 /* If we are loading a floating point constant to a register,
7420 force the value to memory now, since we'll get better code
7421 out the back end. */
7425 else if (GET_CODE (op1
) == CONST_DOUBLE
)
7427 op1
= validize_mem (force_const_mem (mode
, op1
));
7428 if (!register_operand (op0
, mode
))
7430 rtx temp
= gen_reg_rtx (mode
);
7431 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
7432 emit_move_insn (op0
, temp
);
7439 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7443 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
7445 /* Force constants other than zero into memory. We do not know how
7446 the instructions used to build constants modify the upper 64 bits
7447 of the register, once we have that information we may be able
7448 to handle some of them more efficiently. */
7449 if ((reload_in_progress
| reload_completed
) == 0
7450 && register_operand (operands
[0], mode
)
7451 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
7452 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
7454 /* Make operand1 a register if it isn't already. */
7456 && !register_operand (operands
[0], mode
)
7457 && !register_operand (operands
[1], mode
))
7459 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7460 emit_move_insn (operands
[0], temp
);
7464 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7467 /* Attempt to expand a binary operator. Make the expansion closer to the
7468 actual machine, then just general_operand, which will allow 3 separate
7469 memory references (one output, two input) in a single insn. */
7472 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
7475 int matching_memory
;
7476 rtx src1
, src2
, dst
, op
, clob
;
7482 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7483 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7484 && (rtx_equal_p (dst
, src2
)
7485 || immediate_operand (src1
, mode
)))
7492 /* If the destination is memory, and we do not have matching source
7493 operands, do things in registers. */
7494 matching_memory
= 0;
7495 if (GET_CODE (dst
) == MEM
)
7497 if (rtx_equal_p (dst
, src1
))
7498 matching_memory
= 1;
7499 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7500 && rtx_equal_p (dst
, src2
))
7501 matching_memory
= 2;
7503 dst
= gen_reg_rtx (mode
);
7506 /* Both source operands cannot be in memory. */
7507 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7509 if (matching_memory
!= 2)
7510 src2
= force_reg (mode
, src2
);
7512 src1
= force_reg (mode
, src1
);
7515 /* If the operation is not commutable, source 1 cannot be a constant
7516 or non-matching memory. */
7517 if ((CONSTANT_P (src1
)
7518 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7519 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7520 src1
= force_reg (mode
, src1
);
7522 /* If optimizing, copy to regs to improve CSE */
7523 if (optimize
&& ! no_new_pseudos
)
7525 if (GET_CODE (dst
) == MEM
)
7526 dst
= gen_reg_rtx (mode
);
7527 if (GET_CODE (src1
) == MEM
)
7528 src1
= force_reg (mode
, src1
);
7529 if (GET_CODE (src2
) == MEM
)
7530 src2
= force_reg (mode
, src2
);
7533 /* Emit the instruction. */
7535 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7536 if (reload_in_progress
)
7538 /* Reload doesn't know about the flags register, and doesn't know that
7539 it doesn't want to clobber it. We can only do this with PLUS. */
7546 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7547 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7550 /* Fix up the destination if needed. */
7551 if (dst
!= operands
[0])
7552 emit_move_insn (operands
[0], dst
);
7555 /* Return TRUE or FALSE depending on whether the binary operator meets the
7556 appropriate constraints. */
7559 ix86_binary_operator_ok (enum rtx_code code
,
7560 enum machine_mode mode ATTRIBUTE_UNUSED
,
7563 /* Both source operands cannot be in memory. */
7564 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7566 /* If the operation is not commutable, source 1 cannot be a constant. */
7567 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7569 /* If the destination is memory, we must have a matching source operand. */
7570 if (GET_CODE (operands
[0]) == MEM
7571 && ! (rtx_equal_p (operands
[0], operands
[1])
7572 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7573 && rtx_equal_p (operands
[0], operands
[2]))))
7575 /* If the operation is not commutable and the source 1 is memory, we must
7576 have a matching destination. */
7577 if (GET_CODE (operands
[1]) == MEM
7578 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
7579 && ! rtx_equal_p (operands
[0], operands
[1]))
7584 /* Attempt to expand a unary operator. Make the expansion closer to the
7585 actual machine, then just general_operand, which will allow 2 separate
7586 memory references (one output, one input) in a single insn. */
7589 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
7592 int matching_memory
;
7593 rtx src
, dst
, op
, clob
;
7598 /* If the destination is memory, and we do not have matching source
7599 operands, do things in registers. */
7600 matching_memory
= 0;
7601 if (GET_CODE (dst
) == MEM
)
7603 if (rtx_equal_p (dst
, src
))
7604 matching_memory
= 1;
7606 dst
= gen_reg_rtx (mode
);
7609 /* When source operand is memory, destination must match. */
7610 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7611 src
= force_reg (mode
, src
);
7613 /* If optimizing, copy to regs to improve CSE */
7614 if (optimize
&& ! no_new_pseudos
)
7616 if (GET_CODE (dst
) == MEM
)
7617 dst
= gen_reg_rtx (mode
);
7618 if (GET_CODE (src
) == MEM
)
7619 src
= force_reg (mode
, src
);
7622 /* Emit the instruction. */
7624 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7625 if (reload_in_progress
|| code
== NOT
)
7627 /* Reload doesn't know about the flags register, and doesn't know that
7628 it doesn't want to clobber it. */
7635 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7636 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7639 /* Fix up the destination if needed. */
7640 if (dst
!= operands
[0])
7641 emit_move_insn (operands
[0], dst
);
7644 /* Return TRUE or FALSE depending on whether the unary operator meets the
7645 appropriate constraints. */
7648 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
7649 enum machine_mode mode ATTRIBUTE_UNUSED
,
7650 rtx operands
[2] ATTRIBUTE_UNUSED
)
7652 /* If one of operands is memory, source and destination must match. */
7653 if ((GET_CODE (operands
[0]) == MEM
7654 || GET_CODE (operands
[1]) == MEM
)
7655 && ! rtx_equal_p (operands
[0], operands
[1]))
7660 /* Return TRUE or FALSE depending on whether the first SET in INSN
7661 has source and destination with matching CC modes, and that the
7662 CC mode is at least as constrained as REQ_MODE. */
7665 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
7668 enum machine_mode set_mode
;
7670 set
= PATTERN (insn
);
7671 if (GET_CODE (set
) == PARALLEL
)
7672 set
= XVECEXP (set
, 0, 0);
7673 if (GET_CODE (set
) != SET
)
7675 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7678 set_mode
= GET_MODE (SET_DEST (set
));
7682 if (req_mode
!= CCNOmode
7683 && (req_mode
!= CCmode
7684 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7688 if (req_mode
== CCGCmode
)
7692 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7696 if (req_mode
== CCZmode
)
7706 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7709 /* Generate insn patterns to do an integer compare of OPERANDS. */
7712 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
7714 enum machine_mode cmpmode
;
7717 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7718 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7720 /* This is very simple, but making the interface the same as in the
7721 FP case makes the rest of the code easier. */
7722 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7723 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7725 /* Return the test that should be put into the flags user, i.e.
7726 the bcc, scc, or cmov instruction. */
7727 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7730 /* Figure out whether to use ordered or unordered fp comparisons.
7731 Return the appropriate mode to use. */
7734 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
7736 /* ??? In order to make all comparisons reversible, we do all comparisons
7737 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7738 all forms trapping and nontrapping comparisons, we can make inequality
7739 comparisons trapping again, since it results in better code when using
7740 FCOM based compares. */
7741 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7745 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
7747 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7748 return ix86_fp_compare_mode (code
);
7751 /* Only zero flag is needed. */
7753 case NE
: /* ZF!=0 */
7755 /* Codes needing carry flag. */
7756 case GEU
: /* CF=0 */
7757 case GTU
: /* CF=0 & ZF=0 */
7758 case LTU
: /* CF=1 */
7759 case LEU
: /* CF=1 | ZF=1 */
7761 /* Codes possibly doable only with sign flag when
7762 comparing against zero. */
7763 case GE
: /* SF=OF or SF=0 */
7764 case LT
: /* SF<>OF or SF=1 */
7765 if (op1
== const0_rtx
)
7768 /* For other cases Carry flag is not required. */
7770 /* Codes doable only with sign flag when comparing
7771 against zero, but we miss jump instruction for it
7772 so we need to use relational tests against overflow
7773 that thus needs to be zero. */
7774 case GT
: /* ZF=0 & SF=OF */
7775 case LE
: /* ZF=1 | SF<>OF */
7776 if (op1
== const0_rtx
)
7780 /* strcmp pattern do (use flags) and combine may ask us for proper
7789 /* Return the fixed registers used for condition codes. */
7792 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
7799 /* If two condition code modes are compatible, return a condition code
7800 mode which is compatible with both. Otherwise, return
7803 static enum machine_mode
7804 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
7809 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
7812 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
7813 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
7841 /* These are only compatible with themselves, which we already
7847 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7850 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
7852 enum rtx_code swapped_code
= swap_condition (code
);
7853 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7854 || (ix86_fp_comparison_cost (swapped_code
)
7855 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7858 /* Swap, force into registers, or otherwise massage the two operands
7859 to a fp comparison. The operands are updated in place; the new
7860 comparison code is returned. */
7862 static enum rtx_code
7863 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
7865 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7866 rtx op0
= *pop0
, op1
= *pop1
;
7867 enum machine_mode op_mode
= GET_MODE (op0
);
7868 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7870 /* All of the unordered compare instructions only work on registers.
7871 The same is true of the XFmode compare instructions. The same is
7872 true of the fcomi compare instructions. */
7875 && (fpcmp_mode
== CCFPUmode
7876 || op_mode
== XFmode
7877 || ix86_use_fcomi_compare (code
)))
7879 op0
= force_reg (op_mode
, op0
);
7880 op1
= force_reg (op_mode
, op1
);
7884 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7885 things around if they appear profitable, otherwise force op0
7888 if (standard_80387_constant_p (op0
) == 0
7889 || (GET_CODE (op0
) == MEM
7890 && ! (standard_80387_constant_p (op1
) == 0
7891 || GET_CODE (op1
) == MEM
)))
7894 tmp
= op0
, op0
= op1
, op1
= tmp
;
7895 code
= swap_condition (code
);
7898 if (GET_CODE (op0
) != REG
)
7899 op0
= force_reg (op_mode
, op0
);
7901 if (CONSTANT_P (op1
))
7903 if (standard_80387_constant_p (op1
))
7904 op1
= force_reg (op_mode
, op1
);
7906 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7910 /* Try to rearrange the comparison to make it cheaper. */
7911 if (ix86_fp_comparison_cost (code
)
7912 > ix86_fp_comparison_cost (swap_condition (code
))
7913 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
7916 tmp
= op0
, op0
= op1
, op1
= tmp
;
7917 code
= swap_condition (code
);
7918 if (GET_CODE (op0
) != REG
)
7919 op0
= force_reg (op_mode
, op0
);
7927 /* Convert comparison codes we use to represent FP comparison to integer
7928 code that will result in proper branch. Return UNKNOWN if no such code
7932 ix86_fp_compare_code_to_integer (enum rtx_code code
)
7961 /* Split comparison code CODE into comparisons we can do using branch
7962 instructions. BYPASS_CODE is comparison code for branch that will
7963 branch around FIRST_CODE and SECOND_CODE. If some of branches
7964 is not required, set value to NIL.
7965 We never require more than two branches. */
7968 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
7969 enum rtx_code
*first_code
,
7970 enum rtx_code
*second_code
)
7976 /* The fcomi comparison sets flags as follows:
7986 case GT
: /* GTU - CF=0 & ZF=0 */
7987 case GE
: /* GEU - CF=0 */
7988 case ORDERED
: /* PF=0 */
7989 case UNORDERED
: /* PF=1 */
7990 case UNEQ
: /* EQ - ZF=1 */
7991 case UNLT
: /* LTU - CF=1 */
7992 case UNLE
: /* LEU - CF=1 | ZF=1 */
7993 case LTGT
: /* EQ - ZF=0 */
7995 case LT
: /* LTU - CF=1 - fails on unordered */
7997 *bypass_code
= UNORDERED
;
7999 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8001 *bypass_code
= UNORDERED
;
8003 case EQ
: /* EQ - ZF=1 - fails on unordered */
8005 *bypass_code
= UNORDERED
;
8007 case NE
: /* NE - ZF=0 - fails on unordered */
8009 *second_code
= UNORDERED
;
8011 case UNGE
: /* GEU - CF=0 - fails on unordered */
8013 *second_code
= UNORDERED
;
8015 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8017 *second_code
= UNORDERED
;
8022 if (!TARGET_IEEE_FP
)
8029 /* Return cost of comparison done fcom + arithmetics operations on AX.
8030 All following functions do use number of instructions as a cost metrics.
8031 In future this should be tweaked to compute bytes for optimize_size and
8032 take into account performance of various instructions on various CPUs. */
8034 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8036 if (!TARGET_IEEE_FP
)
8038 /* The cost of code output by ix86_expand_fp_compare. */
8066 /* Return cost of comparison done using fcomi operation.
8067 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8069 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
8071 enum rtx_code bypass_code
, first_code
, second_code
;
8072 /* Return arbitrarily high cost when instruction is not supported - this
8073 prevents gcc from using it. */
8076 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8077 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8080 /* Return cost of comparison done using sahf operation.
8081 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8083 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
8085 enum rtx_code bypass_code
, first_code
, second_code
;
8086 /* Return arbitrarily high cost when instruction is not preferred - this
8087 avoids gcc from using it. */
8088 if (!TARGET_USE_SAHF
&& !optimize_size
)
8090 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8091 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8094 /* Compute cost of the comparison done using any method.
8095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8097 ix86_fp_comparison_cost (enum rtx_code code
)
8099 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8102 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8103 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8105 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8106 if (min
> sahf_cost
)
8108 if (min
> fcomi_cost
)
8113 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8116 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
8117 rtx
*second_test
, rtx
*bypass_test
)
8119 enum machine_mode fpcmp_mode
, intcmp_mode
;
8121 int cost
= ix86_fp_comparison_cost (code
);
8122 enum rtx_code bypass_code
, first_code
, second_code
;
8124 fpcmp_mode
= ix86_fp_compare_mode (code
);
8125 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8128 *second_test
= NULL_RTX
;
8130 *bypass_test
= NULL_RTX
;
8132 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8134 /* Do fcomi/sahf based test when profitable. */
8135 if ((bypass_code
== NIL
|| bypass_test
)
8136 && (second_code
== NIL
|| second_test
)
8137 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8141 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8142 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8148 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8149 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8151 scratch
= gen_reg_rtx (HImode
);
8152 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8153 emit_insn (gen_x86_sahf_1 (scratch
));
8156 /* The FP codes work out to act like unsigned. */
8157 intcmp_mode
= fpcmp_mode
;
8159 if (bypass_code
!= NIL
)
8160 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8161 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8163 if (second_code
!= NIL
)
8164 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8165 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8170 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8171 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8172 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8174 scratch
= gen_reg_rtx (HImode
);
8175 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8177 /* In the unordered case, we have to check C2 for NaN's, which
8178 doesn't happen to work out to anything nice combination-wise.
8179 So do some bit twiddling on the value we've got in AH to come
8180 up with an appropriate set of condition codes. */
8182 intcmp_mode
= CCNOmode
;
8187 if (code
== GT
|| !TARGET_IEEE_FP
)
8189 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8194 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8195 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8196 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8197 intcmp_mode
= CCmode
;
8203 if (code
== LT
&& TARGET_IEEE_FP
)
8205 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8206 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8207 intcmp_mode
= CCmode
;
8212 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8218 if (code
== GE
|| !TARGET_IEEE_FP
)
8220 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8225 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8226 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8233 if (code
== LE
&& TARGET_IEEE_FP
)
8235 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8236 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8237 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8238 intcmp_mode
= CCmode
;
8243 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8249 if (code
== EQ
&& TARGET_IEEE_FP
)
8251 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8252 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8253 intcmp_mode
= CCmode
;
8258 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8265 if (code
== NE
&& TARGET_IEEE_FP
)
8267 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8268 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8274 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8280 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8284 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8293 /* Return the test that should be put into the flags user, i.e.
8294 the bcc, scc, or cmov instruction. */
8295 return gen_rtx_fmt_ee (code
, VOIDmode
,
8296 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8301 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
8304 op0
= ix86_compare_op0
;
8305 op1
= ix86_compare_op1
;
8308 *second_test
= NULL_RTX
;
8310 *bypass_test
= NULL_RTX
;
8312 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8313 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8314 second_test
, bypass_test
);
8316 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8321 /* Return true if the CODE will result in nontrivial jump sequence. */
8323 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
8325 enum rtx_code bypass_code
, first_code
, second_code
;
8328 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8329 return bypass_code
!= NIL
|| second_code
!= NIL
;
8333 ix86_expand_branch (enum rtx_code code
, rtx label
)
8337 switch (GET_MODE (ix86_compare_op0
))
8343 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8344 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8345 gen_rtx_LABEL_REF (VOIDmode
, label
),
8347 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8356 enum rtx_code bypass_code
, first_code
, second_code
;
8358 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8361 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8363 /* Check whether we will use the natural sequence with one jump. If
8364 so, we can expand jump early. Otherwise delay expansion by
8365 creating compound insn to not confuse optimizers. */
8366 if (bypass_code
== NIL
&& second_code
== NIL
8369 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8370 gen_rtx_LABEL_REF (VOIDmode
, label
),
8375 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8376 ix86_compare_op0
, ix86_compare_op1
);
8377 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8378 gen_rtx_LABEL_REF (VOIDmode
, label
),
8380 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8382 use_fcomi
= ix86_use_fcomi_compare (code
);
8383 vec
= rtvec_alloc (3 + !use_fcomi
);
8384 RTVEC_ELT (vec
, 0) = tmp
;
8386 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8388 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8391 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8393 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8401 /* Expand DImode branch into multiple compare+branch. */
8403 rtx lo
[2], hi
[2], label2
;
8404 enum rtx_code code1
, code2
, code3
;
8406 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8408 tmp
= ix86_compare_op0
;
8409 ix86_compare_op0
= ix86_compare_op1
;
8410 ix86_compare_op1
= tmp
;
8411 code
= swap_condition (code
);
8413 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8414 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8416 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8417 avoid two branches. This costs one extra insn, so disable when
8418 optimizing for size. */
8420 if ((code
== EQ
|| code
== NE
)
8422 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8427 if (hi
[1] != const0_rtx
)
8428 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8429 NULL_RTX
, 0, OPTAB_WIDEN
);
8432 if (lo
[1] != const0_rtx
)
8433 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8434 NULL_RTX
, 0, OPTAB_WIDEN
);
8436 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8437 NULL_RTX
, 0, OPTAB_WIDEN
);
8439 ix86_compare_op0
= tmp
;
8440 ix86_compare_op1
= const0_rtx
;
8441 ix86_expand_branch (code
, label
);
8445 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8446 op1 is a constant and the low word is zero, then we can just
8447 examine the high word. */
8449 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8452 case LT
: case LTU
: case GE
: case GEU
:
8453 ix86_compare_op0
= hi
[0];
8454 ix86_compare_op1
= hi
[1];
8455 ix86_expand_branch (code
, label
);
8461 /* Otherwise, we need two or three jumps. */
8463 label2
= gen_label_rtx ();
8466 code2
= swap_condition (code
);
8467 code3
= unsigned_condition (code
);
8471 case LT
: case GT
: case LTU
: case GTU
:
8474 case LE
: code1
= LT
; code2
= GT
; break;
8475 case GE
: code1
= GT
; code2
= LT
; break;
8476 case LEU
: code1
= LTU
; code2
= GTU
; break;
8477 case GEU
: code1
= GTU
; code2
= LTU
; break;
8479 case EQ
: code1
= NIL
; code2
= NE
; break;
8480 case NE
: code2
= NIL
; break;
8488 * if (hi(a) < hi(b)) goto true;
8489 * if (hi(a) > hi(b)) goto false;
8490 * if (lo(a) < lo(b)) goto true;
8494 ix86_compare_op0
= hi
[0];
8495 ix86_compare_op1
= hi
[1];
8498 ix86_expand_branch (code1
, label
);
8500 ix86_expand_branch (code2
, label2
);
8502 ix86_compare_op0
= lo
[0];
8503 ix86_compare_op1
= lo
[1];
8504 ix86_expand_branch (code3
, label
);
8507 emit_label (label2
);
8516 /* Split branch based on floating point condition. */
8518 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
8519 rtx target1
, rtx target2
, rtx tmp
)
8522 rtx label
= NULL_RTX
;
8524 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8527 if (target2
!= pc_rtx
)
8530 code
= reverse_condition_maybe_unordered (code
);
8535 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8536 tmp
, &second
, &bypass
);
8538 if (split_branch_probability
>= 0)
8540 /* Distribute the probabilities across the jumps.
8541 Assume the BYPASS and SECOND to be always test
8543 probability
= split_branch_probability
;
8545 /* Value of 1 is low enough to make no need for probability
8546 to be updated. Later we may run some experiments and see
8547 if unordered values are more frequent in practice. */
8549 bypass_probability
= 1;
8551 second_probability
= 1;
8553 if (bypass
!= NULL_RTX
)
8555 label
= gen_label_rtx ();
8556 i
= emit_jump_insn (gen_rtx_SET
8558 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8560 gen_rtx_LABEL_REF (VOIDmode
,
8563 if (bypass_probability
>= 0)
8565 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8566 GEN_INT (bypass_probability
),
8569 i
= emit_jump_insn (gen_rtx_SET
8571 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8572 condition
, target1
, target2
)));
8573 if (probability
>= 0)
8575 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8576 GEN_INT (probability
),
8578 if (second
!= NULL_RTX
)
8580 i
= emit_jump_insn (gen_rtx_SET
8582 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8584 if (second_probability
>= 0)
8586 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8587 GEN_INT (second_probability
),
8590 if (label
!= NULL_RTX
)
8595 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
8597 rtx ret
, tmp
, tmpreg
, equiv
;
8598 rtx second_test
, bypass_test
;
8600 if (GET_MODE (ix86_compare_op0
) == DImode
8602 return 0; /* FAIL */
8604 if (GET_MODE (dest
) != QImode
)
8607 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8608 PUT_MODE (ret
, QImode
);
8613 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8614 if (bypass_test
|| second_test
)
8616 rtx test
= second_test
;
8618 rtx tmp2
= gen_reg_rtx (QImode
);
8625 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8627 PUT_MODE (test
, QImode
);
8628 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8631 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8633 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8636 /* Attach a REG_EQUAL note describing the comparison result. */
8637 equiv
= simplify_gen_relational (code
, QImode
,
8638 GET_MODE (ix86_compare_op0
),
8639 ix86_compare_op0
, ix86_compare_op1
);
8640 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
8642 return 1; /* DONE */
8645 /* Expand comparison setting or clearing carry flag. Return true when
8646 successful and set pop for the operation. */
8648 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
8650 enum machine_mode mode
=
8651 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
8653 /* Do not handle DImode compares that go trought special path. Also we can't
8654 deal with FP compares yet. This is possible to add. */
8655 if ((mode
== DImode
&& !TARGET_64BIT
))
8657 if (FLOAT_MODE_P (mode
))
8659 rtx second_test
= NULL
, bypass_test
= NULL
;
8660 rtx compare_op
, compare_seq
;
8662 /* Shortcut: following common codes never translate into carry flag compares. */
8663 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
8664 || code
== ORDERED
|| code
== UNORDERED
)
8667 /* These comparisons require zero flag; swap operands so they won't. */
8668 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
8674 code
= swap_condition (code
);
8677 /* Try to expand the comparison and verify that we end up with carry flag
8678 based comparison. This is fails to be true only when we decide to expand
8679 comparison using arithmetic that is not too common scenario. */
8681 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8682 &second_test
, &bypass_test
);
8683 compare_seq
= get_insns ();
8686 if (second_test
|| bypass_test
)
8688 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
8689 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
8690 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
8692 code
= GET_CODE (compare_op
);
8693 if (code
!= LTU
&& code
!= GEU
)
8695 emit_insn (compare_seq
);
8699 if (!INTEGRAL_MODE_P (mode
))
8707 /* Convert a==0 into (unsigned)a<1. */
8710 if (op1
!= const0_rtx
)
8713 code
= (code
== EQ
? LTU
: GEU
);
8716 /* Convert a>b into b<a or a>=b-1. */
8719 if (GET_CODE (op1
) == CONST_INT
)
8721 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
8722 /* Bail out on overflow. We still can swap operands but that
8723 would force loading of the constant into register. */
8724 if (op1
== const0_rtx
8725 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
8727 code
= (code
== GTU
? GEU
: LTU
);
8734 code
= (code
== GTU
? LTU
: GEU
);
8738 /* Convert a>=0 into (unsigned)a<0x80000000. */
8741 if (mode
== DImode
|| op1
!= const0_rtx
)
8743 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
8744 code
= (code
== LT
? GEU
: LTU
);
8748 if (mode
== DImode
|| op1
!= constm1_rtx
)
8750 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
8751 code
= (code
== LE
? GEU
: LTU
);
8757 /* Swapping operands may cause constant to appear as first operand. */
8758 if (!nonimmediate_operand (op0
, VOIDmode
))
8762 op0
= force_reg (mode
, op0
);
8764 ix86_compare_op0
= op0
;
8765 ix86_compare_op1
= op1
;
8766 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
8767 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
8773 ix86_expand_int_movcc (rtx operands
[])
8775 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8776 rtx compare_seq
, compare_op
;
8777 rtx second_test
, bypass_test
;
8778 enum machine_mode mode
= GET_MODE (operands
[0]);
8779 bool sign_bit_compare_p
= false;;
8782 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8783 compare_seq
= get_insns ();
8786 compare_code
= GET_CODE (compare_op
);
8788 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
8789 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
8790 sign_bit_compare_p
= true;
8792 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8793 HImode insns, we'd be swallowed in word prefix ops. */
8795 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
8796 && (mode
!= DImode
|| TARGET_64BIT
)
8797 && GET_CODE (operands
[2]) == CONST_INT
8798 && GET_CODE (operands
[3]) == CONST_INT
)
8800 rtx out
= operands
[0];
8801 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8802 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8806 /* Sign bit compares are better done using shifts than we do by using
8808 if (sign_bit_compare_p
8809 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
8810 ix86_compare_op1
, &compare_op
))
8812 /* Detect overlap between destination and compare sources. */
8815 if (!sign_bit_compare_p
)
8819 compare_code
= GET_CODE (compare_op
);
8821 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
8822 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
8825 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
8828 /* To simplify rest of code, restrict to the GEU case. */
8829 if (compare_code
== LTU
)
8831 HOST_WIDE_INT tmp
= ct
;
8834 compare_code
= reverse_condition (compare_code
);
8835 code
= reverse_condition (code
);
8840 PUT_CODE (compare_op
,
8841 reverse_condition_maybe_unordered
8842 (GET_CODE (compare_op
)));
8844 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
8848 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8849 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8850 tmp
= gen_reg_rtx (mode
);
8853 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
8855 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
8859 if (code
== GT
|| code
== GE
)
8860 code
= reverse_condition (code
);
8863 HOST_WIDE_INT tmp
= ct
;
8868 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
8869 ix86_compare_op1
, VOIDmode
, 0, -1);
8882 tmp
= expand_simple_binop (mode
, PLUS
,
8884 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8895 tmp
= expand_simple_binop (mode
, IOR
,
8897 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8899 else if (diff
== -1 && ct
)
8909 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
8911 tmp
= expand_simple_binop (mode
, PLUS
,
8912 copy_rtx (tmp
), GEN_INT (cf
),
8913 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8921 * andl cf - ct, dest
8931 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
8934 tmp
= expand_simple_binop (mode
, AND
,
8936 gen_int_mode (cf
- ct
, mode
),
8937 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8939 tmp
= expand_simple_binop (mode
, PLUS
,
8940 copy_rtx (tmp
), GEN_INT (ct
),
8941 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8944 if (!rtx_equal_p (tmp
, out
))
8945 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
8947 return 1; /* DONE */
8953 tmp
= ct
, ct
= cf
, cf
= tmp
;
8955 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8957 /* We may be reversing unordered compare to normal compare, that
8958 is not valid in general (we may convert non-trapping condition
8959 to trapping one), however on i386 we currently emit all
8960 comparisons unordered. */
8961 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8962 code
= reverse_condition_maybe_unordered (code
);
8966 compare_code
= reverse_condition (compare_code
);
8967 code
= reverse_condition (code
);
8972 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
8973 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
8975 if (ix86_compare_op1
== const0_rtx
8976 && (code
== LT
|| code
== GE
))
8977 compare_code
= code
;
8978 else if (ix86_compare_op1
== constm1_rtx
)
8982 else if (code
== GT
)
8987 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8988 if (compare_code
!= NIL
8989 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
8990 && (cf
== -1 || ct
== -1))
8992 /* If lea code below could be used, only optimize
8993 if it results in a 2 insn sequence. */
8995 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8996 || diff
== 3 || diff
== 5 || diff
== 9)
8997 || (compare_code
== LT
&& ct
== -1)
8998 || (compare_code
== GE
&& cf
== -1))
9001 * notl op1 (if necessary)
9009 code
= reverse_condition (code
);
9012 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9013 ix86_compare_op1
, VOIDmode
, 0, -1);
9015 out
= expand_simple_binop (mode
, IOR
,
9017 out
, 1, OPTAB_DIRECT
);
9018 if (out
!= operands
[0])
9019 emit_move_insn (operands
[0], out
);
9021 return 1; /* DONE */
9026 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9027 || diff
== 3 || diff
== 5 || diff
== 9)
9028 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9030 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
9036 * lea cf(dest*(ct-cf)),dest
9040 * This also catches the degenerate setcc-only case.
9046 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9047 ix86_compare_op1
, VOIDmode
, 0, 1);
9050 /* On x86_64 the lea instruction operates on Pmode, so we need
9051 to get arithmetics done in proper mode to match. */
9053 tmp
= copy_rtx (out
);
9057 out1
= copy_rtx (out
);
9058 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9062 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9068 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9071 if (!rtx_equal_p (tmp
, out
))
9074 out
= force_operand (tmp
, copy_rtx (out
));
9076 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9078 if (!rtx_equal_p (out
, operands
[0]))
9079 emit_move_insn (operands
[0], copy_rtx (out
));
9081 return 1; /* DONE */
9085 * General case: Jumpful:
9086 * xorl dest,dest cmpl op1, op2
9087 * cmpl op1, op2 movl ct, dest
9089 * decl dest movl cf, dest
9090 * andl (cf-ct),dest 1:
9095 * This is reasonably steep, but branch mispredict costs are
9096 * high on modern cpus, so consider failing only if optimizing
9100 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9101 && BRANCH_COST
>= 2)
9107 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9108 /* We may be reversing unordered compare to normal compare,
9109 that is not valid in general (we may convert non-trapping
9110 condition to trapping one), however on i386 we currently
9111 emit all comparisons unordered. */
9112 code
= reverse_condition_maybe_unordered (code
);
9115 code
= reverse_condition (code
);
9116 if (compare_code
!= NIL
)
9117 compare_code
= reverse_condition (compare_code
);
9121 if (compare_code
!= NIL
)
9123 /* notl op1 (if needed)
9128 For x < 0 (resp. x <= -1) there will be no notl,
9129 so if possible swap the constants to get rid of the
9131 True/false will be -1/0 while code below (store flag
9132 followed by decrement) is 0/-1, so the constants need
9133 to be exchanged once more. */
9135 if (compare_code
== GE
|| !cf
)
9137 code
= reverse_condition (code
);
9142 HOST_WIDE_INT tmp
= cf
;
9147 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9148 ix86_compare_op1
, VOIDmode
, 0, -1);
9152 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9153 ix86_compare_op1
, VOIDmode
, 0, 1);
9155 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
9156 copy_rtx (out
), 1, OPTAB_DIRECT
);
9159 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
9160 gen_int_mode (cf
- ct
, mode
),
9161 copy_rtx (out
), 1, OPTAB_DIRECT
);
9163 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
9164 copy_rtx (out
), 1, OPTAB_DIRECT
);
9165 if (!rtx_equal_p (out
, operands
[0]))
9166 emit_move_insn (operands
[0], copy_rtx (out
));
9168 return 1; /* DONE */
9172 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9174 /* Try a few things more with specific constants and a variable. */
9177 rtx var
, orig_out
, out
, tmp
;
9179 if (BRANCH_COST
<= 2)
9180 return 0; /* FAIL */
9182 /* If one of the two operands is an interesting constant, load a
9183 constant with the above and mask it in with a logical operation. */
9185 if (GET_CODE (operands
[2]) == CONST_INT
)
9188 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
9189 operands
[3] = constm1_rtx
, op
= and_optab
;
9190 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
9191 operands
[3] = const0_rtx
, op
= ior_optab
;
9193 return 0; /* FAIL */
9195 else if (GET_CODE (operands
[3]) == CONST_INT
)
9198 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
9199 operands
[2] = constm1_rtx
, op
= and_optab
;
9200 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
9201 operands
[2] = const0_rtx
, op
= ior_optab
;
9203 return 0; /* FAIL */
9206 return 0; /* FAIL */
9208 orig_out
= operands
[0];
9209 tmp
= gen_reg_rtx (mode
);
9212 /* Recurse to get the constant loaded. */
9213 if (ix86_expand_int_movcc (operands
) == 0)
9214 return 0; /* FAIL */
9216 /* Mask in the interesting variable. */
9217 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9219 if (!rtx_equal_p (out
, orig_out
))
9220 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
9222 return 1; /* DONE */
9226 * For comparison with above,
9236 if (! nonimmediate_operand (operands
[2], mode
))
9237 operands
[2] = force_reg (mode
, operands
[2]);
9238 if (! nonimmediate_operand (operands
[3], mode
))
9239 operands
[3] = force_reg (mode
, operands
[3]);
9241 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9243 rtx tmp
= gen_reg_rtx (mode
);
9244 emit_move_insn (tmp
, operands
[3]);
9247 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9249 rtx tmp
= gen_reg_rtx (mode
);
9250 emit_move_insn (tmp
, operands
[2]);
9254 if (! register_operand (operands
[2], VOIDmode
)
9256 || ! register_operand (operands
[3], VOIDmode
)))
9257 operands
[2] = force_reg (mode
, operands
[2]);
9260 && ! register_operand (operands
[3], VOIDmode
))
9261 operands
[3] = force_reg (mode
, operands
[3]);
9263 emit_insn (compare_seq
);
9264 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9265 gen_rtx_IF_THEN_ELSE (mode
,
9266 compare_op
, operands
[2],
9269 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9270 gen_rtx_IF_THEN_ELSE (mode
,
9272 copy_rtx (operands
[3]),
9273 copy_rtx (operands
[0]))));
9275 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9276 gen_rtx_IF_THEN_ELSE (mode
,
9278 copy_rtx (operands
[2]),
9279 copy_rtx (operands
[0]))));
9281 return 1; /* DONE */
9285 ix86_expand_fp_movcc (rtx operands
[])
9289 rtx compare_op
, second_test
, bypass_test
;
9291 /* For SF/DFmode conditional moves based on comparisons
9292 in same mode, we may want to use SSE min/max instructions. */
9293 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9294 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9295 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9296 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9298 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9299 /* We may be called from the post-reload splitter. */
9300 && (!REG_P (operands
[0])
9301 || SSE_REG_P (operands
[0])
9302 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9304 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9305 code
= GET_CODE (operands
[1]);
9307 /* See if we have (cross) match between comparison operands and
9308 conditional move operands. */
9309 if (rtx_equal_p (operands
[2], op1
))
9314 code
= reverse_condition_maybe_unordered (code
);
9316 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9318 /* Check for min operation. */
9319 if (code
== LT
|| code
== UNLE
)
9327 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9328 if (memory_operand (op0
, VOIDmode
))
9329 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9330 if (GET_MODE (operands
[0]) == SFmode
)
9331 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9333 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9336 /* Check for max operation. */
9337 if (code
== GT
|| code
== UNGE
)
9345 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9346 if (memory_operand (op0
, VOIDmode
))
9347 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9348 if (GET_MODE (operands
[0]) == SFmode
)
9349 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9351 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9355 /* Manage condition to be sse_comparison_operator. In case we are
9356 in non-ieee mode, try to canonicalize the destination operand
9357 to be first in the comparison - this helps reload to avoid extra
9359 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9360 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9362 rtx tmp
= ix86_compare_op0
;
9363 ix86_compare_op0
= ix86_compare_op1
;
9364 ix86_compare_op1
= tmp
;
9365 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9366 VOIDmode
, ix86_compare_op0
,
9369 /* Similarly try to manage result to be first operand of conditional
9370 move. We also don't support the NE comparison on SSE, so try to
9372 if ((rtx_equal_p (operands
[0], operands
[3])
9373 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9374 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9376 rtx tmp
= operands
[2];
9377 operands
[2] = operands
[3];
9379 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9380 (GET_CODE (operands
[1])),
9381 VOIDmode
, ix86_compare_op0
,
9384 if (GET_MODE (operands
[0]) == SFmode
)
9385 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9386 operands
[2], operands
[3],
9387 ix86_compare_op0
, ix86_compare_op1
));
9389 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9390 operands
[2], operands
[3],
9391 ix86_compare_op0
, ix86_compare_op1
));
9395 /* The floating point conditional move instructions don't directly
9396 support conditions resulting from a signed integer comparison. */
9398 code
= GET_CODE (operands
[1]);
9399 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9401 /* The floating point conditional move instructions don't directly
9402 support signed integer comparisons. */
9404 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9406 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9408 tmp
= gen_reg_rtx (QImode
);
9409 ix86_expand_setcc (code
, tmp
);
9411 ix86_compare_op0
= tmp
;
9412 ix86_compare_op1
= const0_rtx
;
9413 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9415 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9417 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9418 emit_move_insn (tmp
, operands
[3]);
9421 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9423 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9424 emit_move_insn (tmp
, operands
[2]);
9428 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9429 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9434 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9435 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9440 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9441 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9449 /* Expand conditional increment or decrement using adb/sbb instructions.
9450 The default case using setcc followed by the conditional move can be
9451 done by generic code. */
9453 ix86_expand_int_addcc (rtx operands
[])
9455 enum rtx_code code
= GET_CODE (operands
[1]);
9457 rtx val
= const0_rtx
;
9459 enum machine_mode mode
= GET_MODE (operands
[0]);
9461 if (operands
[3] != const1_rtx
9462 && operands
[3] != constm1_rtx
)
9464 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9465 ix86_compare_op1
, &compare_op
))
9467 code
= GET_CODE (compare_op
);
9469 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9470 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9473 code
= ix86_fp_compare_code_to_integer (code
);
9480 PUT_CODE (compare_op
,
9481 reverse_condition_maybe_unordered
9482 (GET_CODE (compare_op
)));
9484 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9486 PUT_MODE (compare_op
, mode
);
9488 /* Construct either adc or sbb insn. */
9489 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
9491 switch (GET_MODE (operands
[0]))
9494 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9497 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9500 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9503 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9511 switch (GET_MODE (operands
[0]))
9514 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9517 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9520 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9523 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9529 return 1; /* DONE */
9533 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9534 works for floating pointer parameters and nonoffsetable memories.
9535 For pushes, it returns just stack offsets; the values will be saved
9536 in the right order. Maximally three parts are generated. */
9539 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
9544 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
9546 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9548 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9550 if (size
< 2 || size
> 3)
9553 /* Optimize constant pool reference to immediates. This is used by fp
9554 moves, that force all constants to memory to allow combining. */
9555 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
9557 rtx tmp
= maybe_get_pool_constant (operand
);
9562 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9564 /* The only non-offsetable memories we handle are pushes. */
9565 if (! push_operand (operand
, VOIDmode
))
9568 operand
= copy_rtx (operand
);
9569 PUT_MODE (operand
, Pmode
);
9570 parts
[0] = parts
[1] = parts
[2] = operand
;
9572 else if (!TARGET_64BIT
)
9575 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9578 if (REG_P (operand
))
9580 if (!reload_completed
)
9582 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9583 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9585 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9587 else if (offsettable_memref_p (operand
))
9589 operand
= adjust_address (operand
, SImode
, 0);
9591 parts
[1] = adjust_address (operand
, SImode
, 4);
9593 parts
[2] = adjust_address (operand
, SImode
, 8);
9595 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9600 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9604 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9605 parts
[2] = gen_int_mode (l
[2], SImode
);
9608 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9613 parts
[1] = gen_int_mode (l
[1], SImode
);
9614 parts
[0] = gen_int_mode (l
[0], SImode
);
9623 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9624 if (mode
== XFmode
|| mode
== TFmode
)
9626 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
9627 if (REG_P (operand
))
9629 if (!reload_completed
)
9631 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9632 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
9634 else if (offsettable_memref_p (operand
))
9636 operand
= adjust_address (operand
, DImode
, 0);
9638 parts
[1] = adjust_address (operand
, upper_mode
, 8);
9640 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9645 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9646 real_to_target (l
, &r
, mode
);
9647 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9648 if (HOST_BITS_PER_WIDE_INT
>= 64)
9651 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9652 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9655 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9656 if (upper_mode
== SImode
)
9657 parts
[1] = gen_int_mode (l
[2], SImode
);
9658 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9661 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9662 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
9665 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
9675 /* Emit insns to perform a move or push of DI, DF, and XF values.
9676 Return false when normal moves are needed; true when all required
9677 insns have been emitted. Operands 2-4 contain the input values
9678 int the correct order; operands 5-7 contain the output values. */
9681 ix86_split_long_move (rtx operands
[])
9687 enum machine_mode mode
= GET_MODE (operands
[0]);
9689 /* The DFmode expanders may ask us to move double.
9690 For 64bit target this is single move. By hiding the fact
9691 here we simplify i386.md splitters. */
9692 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9694 /* Optimize constant pool reference to immediates. This is used by
9695 fp moves, that force all constants to memory to allow combining. */
9697 if (GET_CODE (operands
[1]) == MEM
9698 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9699 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9700 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9701 if (push_operand (operands
[0], VOIDmode
))
9703 operands
[0] = copy_rtx (operands
[0]);
9704 PUT_MODE (operands
[0], Pmode
);
9707 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9708 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9709 emit_move_insn (operands
[0], operands
[1]);
9713 /* The only non-offsettable memory we handle is push. */
9714 if (push_operand (operands
[0], VOIDmode
))
9716 else if (GET_CODE (operands
[0]) == MEM
9717 && ! offsettable_memref_p (operands
[0]))
9720 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9721 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9723 /* When emitting push, take care for source operands on the stack. */
9724 if (push
&& GET_CODE (operands
[1]) == MEM
9725 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9728 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9729 XEXP (part
[1][2], 0));
9730 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9731 XEXP (part
[1][1], 0));
9734 /* We need to do copy in the right order in case an address register
9735 of the source overlaps the destination. */
9736 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9738 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9740 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9743 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9746 /* Collision in the middle part can be handled by reordering. */
9747 if (collisions
== 1 && nparts
== 3
9748 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9751 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9752 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9755 /* If there are more collisions, we can't handle it by reordering.
9756 Do an lea to the last part and use only one colliding move. */
9757 else if (collisions
> 1)
9763 base
= part
[0][nparts
- 1];
9765 /* Handle the case when the last part isn't valid for lea.
9766 Happens in 64-bit mode storing the 12-byte XFmode. */
9767 if (GET_MODE (base
) != Pmode
)
9768 base
= gen_rtx_REG (Pmode
, REGNO (base
));
9770 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
9771 part
[1][0] = replace_equiv_address (part
[1][0], base
);
9772 part
[1][1] = replace_equiv_address (part
[1][1],
9773 plus_constant (base
, UNITS_PER_WORD
));
9775 part
[1][2] = replace_equiv_address (part
[1][2],
9776 plus_constant (base
, 8));
9786 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
9787 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
9788 emit_move_insn (part
[0][2], part
[1][2]);
9793 /* In 64bit mode we don't have 32bit push available. In case this is
9794 register, it is OK - we will just use larger counterpart. We also
9795 retype memory - these comes from attempt to avoid REX prefix on
9796 moving of second half of TFmode value. */
9797 if (GET_MODE (part
[1][1]) == SImode
)
9799 if (GET_CODE (part
[1][1]) == MEM
)
9800 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9801 else if (REG_P (part
[1][1]))
9802 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9805 if (GET_MODE (part
[1][0]) == SImode
)
9806 part
[1][0] = part
[1][1];
9809 emit_move_insn (part
[0][1], part
[1][1]);
9810 emit_move_insn (part
[0][0], part
[1][0]);
9814 /* Choose correct order to not overwrite the source before it is copied. */
9815 if ((REG_P (part
[0][0])
9816 && REG_P (part
[1][1])
9817 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9819 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9821 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9825 operands
[2] = part
[0][2];
9826 operands
[3] = part
[0][1];
9827 operands
[4] = part
[0][0];
9828 operands
[5] = part
[1][2];
9829 operands
[6] = part
[1][1];
9830 operands
[7] = part
[1][0];
9834 operands
[2] = part
[0][1];
9835 operands
[3] = part
[0][0];
9836 operands
[5] = part
[1][1];
9837 operands
[6] = part
[1][0];
9844 operands
[2] = part
[0][0];
9845 operands
[3] = part
[0][1];
9846 operands
[4] = part
[0][2];
9847 operands
[5] = part
[1][0];
9848 operands
[6] = part
[1][1];
9849 operands
[7] = part
[1][2];
9853 operands
[2] = part
[0][0];
9854 operands
[3] = part
[0][1];
9855 operands
[5] = part
[1][0];
9856 operands
[6] = part
[1][1];
9859 emit_move_insn (operands
[2], operands
[5]);
9860 emit_move_insn (operands
[3], operands
[6]);
9862 emit_move_insn (operands
[4], operands
[7]);
9868 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
9870 rtx low
[2], high
[2];
9873 if (GET_CODE (operands
[2]) == CONST_INT
)
9875 split_di (operands
, 2, low
, high
);
9876 count
= INTVAL (operands
[2]) & 63;
9880 emit_move_insn (high
[0], low
[1]);
9881 emit_move_insn (low
[0], const0_rtx
);
9884 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9888 if (!rtx_equal_p (operands
[0], operands
[1]))
9889 emit_move_insn (operands
[0], operands
[1]);
9890 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9891 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9896 if (!rtx_equal_p (operands
[0], operands
[1]))
9897 emit_move_insn (operands
[0], operands
[1]);
9899 split_di (operands
, 1, low
, high
);
9901 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9902 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9904 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9906 if (! no_new_pseudos
)
9907 scratch
= force_reg (SImode
, const0_rtx
);
9909 emit_move_insn (scratch
, const0_rtx
);
9911 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9915 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9920 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
9922 rtx low
[2], high
[2];
9925 if (GET_CODE (operands
[2]) == CONST_INT
)
9927 split_di (operands
, 2, low
, high
);
9928 count
= INTVAL (operands
[2]) & 63;
9932 emit_move_insn (high
[0], high
[1]);
9933 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9934 emit_move_insn (low
[0], high
[0]);
9937 else if (count
>= 32)
9939 emit_move_insn (low
[0], high
[1]);
9941 if (! reload_completed
)
9942 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
9945 emit_move_insn (high
[0], low
[0]);
9946 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9950 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9954 if (!rtx_equal_p (operands
[0], operands
[1]))
9955 emit_move_insn (operands
[0], operands
[1]);
9956 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9957 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
9962 if (!rtx_equal_p (operands
[0], operands
[1]))
9963 emit_move_insn (operands
[0], operands
[1]);
9965 split_di (operands
, 1, low
, high
);
9967 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9968 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
9970 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9972 if (! no_new_pseudos
)
9973 scratch
= gen_reg_rtx (SImode
);
9974 emit_move_insn (scratch
, high
[0]);
9975 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
9976 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9980 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
9985 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
9987 rtx low
[2], high
[2];
9990 if (GET_CODE (operands
[2]) == CONST_INT
)
9992 split_di (operands
, 2, low
, high
);
9993 count
= INTVAL (operands
[2]) & 63;
9997 emit_move_insn (low
[0], high
[1]);
9998 emit_move_insn (high
[0], const0_rtx
);
10001 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10005 if (!rtx_equal_p (operands
[0], operands
[1]))
10006 emit_move_insn (operands
[0], operands
[1]);
10007 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10008 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10013 if (!rtx_equal_p (operands
[0], operands
[1]))
10014 emit_move_insn (operands
[0], operands
[1]);
10016 split_di (operands
, 1, low
, high
);
10018 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10019 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10021 /* Heh. By reversing the arguments, we can reuse this pattern. */
10022 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10024 if (! no_new_pseudos
)
10025 scratch
= force_reg (SImode
, const0_rtx
);
10027 emit_move_insn (scratch
, const0_rtx
);
10029 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10033 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10037 /* Helper function for the string operations below. Dest VARIABLE whether
10038 it is aligned to VALUE bytes. If true, jump to the label. */
10040 ix86_expand_aligntest (rtx variable
, int value
)
10042 rtx label
= gen_label_rtx ();
10043 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10044 if (GET_MODE (variable
) == DImode
)
10045 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10047 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10048 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10053 /* Adjust COUNTER by the VALUE. */
10055 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
10057 if (GET_MODE (countreg
) == DImode
)
10058 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10060 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10063 /* Zero extend possibly SImode EXP to Pmode register. */
10065 ix86_zero_extend_to_Pmode (rtx exp
)
10068 if (GET_MODE (exp
) == VOIDmode
)
10069 return force_reg (Pmode
, exp
);
10070 if (GET_MODE (exp
) == Pmode
)
10071 return copy_to_mode_reg (Pmode
, exp
);
10072 r
= gen_reg_rtx (Pmode
);
10073 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10077 /* Expand string move (memcpy) operation. Use i386 string operations when
10078 profitable. expand_clrmem contains similar code. */
10080 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
10082 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
10083 enum machine_mode counter_mode
;
10084 HOST_WIDE_INT align
= 0;
10085 unsigned HOST_WIDE_INT count
= 0;
10087 if (GET_CODE (align_exp
) == CONST_INT
)
10088 align
= INTVAL (align_exp
);
10090 /* Can't use any of this if the user has appropriated esi or edi. */
10091 if (global_regs
[4] || global_regs
[5])
10094 /* This simple hack avoids all inlining code and simplifies code below. */
10095 if (!TARGET_ALIGN_STRINGOPS
)
10098 if (GET_CODE (count_exp
) == CONST_INT
)
10100 count
= INTVAL (count_exp
);
10101 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10105 /* Figure out proper mode for counter. For 32bits it is always SImode,
10106 for 64bits use SImode when possible, otherwise DImode.
10107 Set count to number of bytes copied when known at compile time. */
10109 || GET_MODE (count_exp
) == SImode
10110 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10111 counter_mode
= SImode
;
10113 counter_mode
= DImode
;
10115 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10118 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10119 if (destreg
!= XEXP (dst
, 0))
10120 dst
= replace_equiv_address_nv (dst
, destreg
);
10121 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10122 if (srcreg
!= XEXP (src
, 0))
10123 src
= replace_equiv_address_nv (src
, srcreg
);
10125 /* When optimizing for size emit simple rep ; movsb instruction for
10126 counts not divisible by 4. */
10128 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10130 emit_insn (gen_cld ());
10131 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10132 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10133 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
10134 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
10138 /* For constant aligned (or small unaligned) copies use rep movsl
10139 followed by code copying the rest. For PentiumPro ensure 8 byte
10140 alignment to allow rep movsl acceleration. */
10142 else if (count
!= 0
10144 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10145 || optimize_size
|| count
< (unsigned int) 64))
10147 unsigned HOST_WIDE_INT offset
= 0;
10148 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10149 rtx srcmem
, dstmem
;
10151 emit_insn (gen_cld ());
10152 if (count
& ~(size
- 1))
10154 countreg
= copy_to_mode_reg (counter_mode
,
10155 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10156 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10157 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10159 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10160 GEN_INT (size
== 4 ? 2 : 3));
10161 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10162 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10164 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10165 countreg
, destexp
, srcexp
));
10166 offset
= count
& ~(size
- 1);
10168 if (size
== 8 && (count
& 0x04))
10170 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
10172 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
10174 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10179 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
10181 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
10183 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10188 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
10190 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
10192 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10195 /* The generic code based on the glibc implementation:
10196 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10197 allowing accelerated copying there)
10198 - copy the data using rep movsl
10199 - copy the rest. */
10204 rtx srcmem
, dstmem
;
10205 int desired_alignment
= (TARGET_PENTIUMPRO
10206 && (count
== 0 || count
>= (unsigned int) 260)
10207 ? 8 : UNITS_PER_WORD
);
10208 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10209 dst
= change_address (dst
, BLKmode
, destreg
);
10210 src
= change_address (src
, BLKmode
, srcreg
);
10212 /* In case we don't know anything about the alignment, default to
10213 library version, since it is usually equally fast and result in
10216 Also emit call when we know that the count is large and call overhead
10217 will not be important. */
10218 if (!TARGET_INLINE_ALL_STRINGOPS
10219 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10222 if (TARGET_SINGLE_STRINGOP
)
10223 emit_insn (gen_cld ());
10225 countreg2
= gen_reg_rtx (Pmode
);
10226 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10228 /* We don't use loops to align destination and to copy parts smaller
10229 than 4 bytes, because gcc is able to optimize such code better (in
10230 the case the destination or the count really is aligned, gcc is often
10231 able to predict the branches) and also it is friendlier to the
10232 hardware branch prediction.
10234 Using loops is beneficial for generic case, because we can
10235 handle small counts using the loops. Many CPUs (such as Athlon)
10236 have large REP prefix setup costs.
10238 This is quite costly. Maybe we can revisit this decision later or
10239 add some customizability to this code. */
10241 if (count
== 0 && align
< desired_alignment
)
10243 label
= gen_label_rtx ();
10244 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10245 LEU
, 0, counter_mode
, 1, label
);
10249 rtx label
= ix86_expand_aligntest (destreg
, 1);
10250 srcmem
= change_address (src
, QImode
, srcreg
);
10251 dstmem
= change_address (dst
, QImode
, destreg
);
10252 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10253 ix86_adjust_counter (countreg
, 1);
10254 emit_label (label
);
10255 LABEL_NUSES (label
) = 1;
10259 rtx label
= ix86_expand_aligntest (destreg
, 2);
10260 srcmem
= change_address (src
, HImode
, srcreg
);
10261 dstmem
= change_address (dst
, HImode
, destreg
);
10262 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10263 ix86_adjust_counter (countreg
, 2);
10264 emit_label (label
);
10265 LABEL_NUSES (label
) = 1;
10267 if (align
<= 4 && desired_alignment
> 4)
10269 rtx label
= ix86_expand_aligntest (destreg
, 4);
10270 srcmem
= change_address (src
, SImode
, srcreg
);
10271 dstmem
= change_address (dst
, SImode
, destreg
);
10272 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10273 ix86_adjust_counter (countreg
, 4);
10274 emit_label (label
);
10275 LABEL_NUSES (label
) = 1;
10278 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10280 emit_label (label
);
10281 LABEL_NUSES (label
) = 1;
10284 if (!TARGET_SINGLE_STRINGOP
)
10285 emit_insn (gen_cld ());
10288 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10290 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
10294 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
10295 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
10297 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10298 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10299 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10300 countreg2
, destexp
, srcexp
));
10304 emit_label (label
);
10305 LABEL_NUSES (label
) = 1;
10307 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10309 srcmem
= change_address (src
, SImode
, srcreg
);
10310 dstmem
= change_address (dst
, SImode
, destreg
);
10311 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10313 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10315 rtx label
= ix86_expand_aligntest (countreg
, 4);
10316 srcmem
= change_address (src
, SImode
, srcreg
);
10317 dstmem
= change_address (dst
, SImode
, destreg
);
10318 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10319 emit_label (label
);
10320 LABEL_NUSES (label
) = 1;
10322 if (align
> 2 && count
!= 0 && (count
& 2))
10324 srcmem
= change_address (src
, HImode
, srcreg
);
10325 dstmem
= change_address (dst
, HImode
, destreg
);
10326 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10328 if (align
<= 2 || count
== 0)
10330 rtx label
= ix86_expand_aligntest (countreg
, 2);
10331 srcmem
= change_address (src
, HImode
, srcreg
);
10332 dstmem
= change_address (dst
, HImode
, destreg
);
10333 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10334 emit_label (label
);
10335 LABEL_NUSES (label
) = 1;
10337 if (align
> 1 && count
!= 0 && (count
& 1))
10339 srcmem
= change_address (src
, QImode
, srcreg
);
10340 dstmem
= change_address (dst
, QImode
, destreg
);
10341 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10343 if (align
<= 1 || count
== 0)
10345 rtx label
= ix86_expand_aligntest (countreg
, 1);
10346 srcmem
= change_address (src
, QImode
, srcreg
);
10347 dstmem
= change_address (dst
, QImode
, destreg
);
10348 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10349 emit_label (label
);
10350 LABEL_NUSES (label
) = 1;
10357 /* Expand string clear operation (bzero). Use i386 string operations when
10358 profitable. expand_movmem contains similar code. */
10360 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
10362 rtx destreg
, zeroreg
, countreg
, destexp
;
10363 enum machine_mode counter_mode
;
10364 HOST_WIDE_INT align
= 0;
10365 unsigned HOST_WIDE_INT count
= 0;
10367 if (GET_CODE (align_exp
) == CONST_INT
)
10368 align
= INTVAL (align_exp
);
10370 /* Can't use any of this if the user has appropriated esi. */
10371 if (global_regs
[4])
10374 /* This simple hack avoids all inlining code and simplifies code below. */
10375 if (!TARGET_ALIGN_STRINGOPS
)
10378 if (GET_CODE (count_exp
) == CONST_INT
)
10380 count
= INTVAL (count_exp
);
10381 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10384 /* Figure out proper mode for counter. For 32bits it is always SImode,
10385 for 64bits use SImode when possible, otherwise DImode.
10386 Set count to number of bytes copied when known at compile time. */
10388 || GET_MODE (count_exp
) == SImode
10389 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10390 counter_mode
= SImode
;
10392 counter_mode
= DImode
;
10394 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10395 if (destreg
!= XEXP (dst
, 0))
10396 dst
= replace_equiv_address_nv (dst
, destreg
);
10399 /* When optimizing for size emit simple rep ; movsb instruction for
10400 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10401 sequence is 7 bytes long, so if optimizing for size and count is
10402 small enough that some stosl, stosw and stosb instructions without
10403 rep are shorter, fall back into the next if. */
10405 if ((!optimize
|| optimize_size
)
10408 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
10410 emit_insn (gen_cld ());
10412 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10413 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10414 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10415 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
10417 else if (count
!= 0
10419 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10420 || optimize_size
|| count
< (unsigned int) 64))
10422 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10423 unsigned HOST_WIDE_INT offset
= 0;
10425 emit_insn (gen_cld ());
10427 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10428 if (count
& ~(size
- 1))
10430 unsigned HOST_WIDE_INT repcount
;
10431 unsigned int max_nonrep
;
10433 repcount
= count
>> (size
== 4 ? 2 : 3);
10435 repcount
&= 0x3fffffff;
10437 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10438 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10439 bytes. In both cases the latter seems to be faster for small
10441 max_nonrep
= size
== 4 ? 7 : 4;
10442 if (!optimize_size
)
10445 case PROCESSOR_PENTIUM4
:
10446 case PROCESSOR_NOCONA
:
10453 if (repcount
<= max_nonrep
)
10454 while (repcount
-- > 0)
10456 rtx mem
= adjust_automodify_address_nv (dst
,
10457 GET_MODE (zeroreg
),
10459 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
10464 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
10465 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10466 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10467 GEN_INT (size
== 4 ? 2 : 3));
10468 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10469 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
10471 offset
= count
& ~(size
- 1);
10474 if (size
== 8 && (count
& 0x04))
10476 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
10478 emit_insn (gen_strset (destreg
, mem
,
10479 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10484 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
10486 emit_insn (gen_strset (destreg
, mem
,
10487 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10492 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
10494 emit_insn (gen_strset (destreg
, mem
,
10495 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10502 /* Compute desired alignment of the string operation. */
10503 int desired_alignment
= (TARGET_PENTIUMPRO
10504 && (count
== 0 || count
>= (unsigned int) 260)
10505 ? 8 : UNITS_PER_WORD
);
10507 /* In case we don't know anything about the alignment, default to
10508 library version, since it is usually equally fast and result in
10511 Also emit call when we know that the count is large and call overhead
10512 will not be important. */
10513 if (!TARGET_INLINE_ALL_STRINGOPS
10514 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10517 if (TARGET_SINGLE_STRINGOP
)
10518 emit_insn (gen_cld ());
10520 countreg2
= gen_reg_rtx (Pmode
);
10521 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10522 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10523 /* Get rid of MEM_OFFSET, it won't be accurate. */
10524 dst
= change_address (dst
, BLKmode
, destreg
);
10526 if (count
== 0 && align
< desired_alignment
)
10528 label
= gen_label_rtx ();
10529 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10530 LEU
, 0, counter_mode
, 1, label
);
10534 rtx label
= ix86_expand_aligntest (destreg
, 1);
10535 emit_insn (gen_strset (destreg
, dst
,
10536 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10537 ix86_adjust_counter (countreg
, 1);
10538 emit_label (label
);
10539 LABEL_NUSES (label
) = 1;
10543 rtx label
= ix86_expand_aligntest (destreg
, 2);
10544 emit_insn (gen_strset (destreg
, dst
,
10545 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10546 ix86_adjust_counter (countreg
, 2);
10547 emit_label (label
);
10548 LABEL_NUSES (label
) = 1;
10550 if (align
<= 4 && desired_alignment
> 4)
10552 rtx label
= ix86_expand_aligntest (destreg
, 4);
10553 emit_insn (gen_strset (destreg
, dst
,
10555 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10557 ix86_adjust_counter (countreg
, 4);
10558 emit_label (label
);
10559 LABEL_NUSES (label
) = 1;
10562 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10564 emit_label (label
);
10565 LABEL_NUSES (label
) = 1;
10569 if (!TARGET_SINGLE_STRINGOP
)
10570 emit_insn (gen_cld ());
10573 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10575 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
10579 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
10580 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
10582 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10583 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
10587 emit_label (label
);
10588 LABEL_NUSES (label
) = 1;
10591 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10592 emit_insn (gen_strset (destreg
, dst
,
10593 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10594 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10596 rtx label
= ix86_expand_aligntest (countreg
, 4);
10597 emit_insn (gen_strset (destreg
, dst
,
10598 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10599 emit_label (label
);
10600 LABEL_NUSES (label
) = 1;
10602 if (align
> 2 && count
!= 0 && (count
& 2))
10603 emit_insn (gen_strset (destreg
, dst
,
10604 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10605 if (align
<= 2 || count
== 0)
10607 rtx label
= ix86_expand_aligntest (countreg
, 2);
10608 emit_insn (gen_strset (destreg
, dst
,
10609 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10610 emit_label (label
);
10611 LABEL_NUSES (label
) = 1;
10613 if (align
> 1 && count
!= 0 && (count
& 1))
10614 emit_insn (gen_strset (destreg
, dst
,
10615 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10616 if (align
<= 1 || count
== 0)
10618 rtx label
= ix86_expand_aligntest (countreg
, 1);
10619 emit_insn (gen_strset (destreg
, dst
,
10620 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10621 emit_label (label
);
10622 LABEL_NUSES (label
) = 1;
10628 /* Expand strlen. */
10630 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
10632 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10634 /* The generic case of strlen expander is long. Avoid it's
10635 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10637 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10638 && !TARGET_INLINE_ALL_STRINGOPS
10640 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10643 addr
= force_reg (Pmode
, XEXP (src
, 0));
10644 scratch1
= gen_reg_rtx (Pmode
);
10646 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10649 /* Well it seems that some optimizer does not combine a call like
10650 foo(strlen(bar), strlen(bar));
10651 when the move and the subtraction is done here. It does calculate
10652 the length just once when these instructions are done inside of
10653 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10654 often used and I use one fewer register for the lifetime of
10655 output_strlen_unroll() this is better. */
10657 emit_move_insn (out
, addr
);
10659 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
10661 /* strlensi_unroll_1 returns the address of the zero at the end of
10662 the string, like memchr(), so compute the length by subtracting
10663 the start address. */
10665 emit_insn (gen_subdi3 (out
, out
, addr
));
10667 emit_insn (gen_subsi3 (out
, out
, addr
));
10672 scratch2
= gen_reg_rtx (Pmode
);
10673 scratch3
= gen_reg_rtx (Pmode
);
10674 scratch4
= force_reg (Pmode
, constm1_rtx
);
10676 emit_move_insn (scratch3
, addr
);
10677 eoschar
= force_reg (QImode
, eoschar
);
10679 emit_insn (gen_cld ());
10680 src
= replace_equiv_address_nv (src
, scratch3
);
10682 /* If .md starts supporting :P, this can be done in .md. */
10683 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
10684 scratch4
), UNSPEC_SCAS
);
10685 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
10688 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10689 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10693 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10694 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10700 /* Expand the appropriate insns for doing strlen if not just doing
10703 out = result, initialized with the start address
10704 align_rtx = alignment of the address.
10705 scratch = scratch register, initialized with the startaddress when
10706 not aligned, otherwise undefined
10708 This is just the body. It needs the initializations mentioned above and
10709 some address computing at the end. These things are done in i386.md. */
10712 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
10716 rtx align_2_label
= NULL_RTX
;
10717 rtx align_3_label
= NULL_RTX
;
10718 rtx align_4_label
= gen_label_rtx ();
10719 rtx end_0_label
= gen_label_rtx ();
10721 rtx tmpreg
= gen_reg_rtx (SImode
);
10722 rtx scratch
= gen_reg_rtx (SImode
);
10726 if (GET_CODE (align_rtx
) == CONST_INT
)
10727 align
= INTVAL (align_rtx
);
10729 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10731 /* Is there a known alignment and is it less than 4? */
10734 rtx scratch1
= gen_reg_rtx (Pmode
);
10735 emit_move_insn (scratch1
, out
);
10736 /* Is there a known alignment and is it not 2? */
10739 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10740 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10742 /* Leave just the 3 lower bits. */
10743 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10744 NULL_RTX
, 0, OPTAB_WIDEN
);
10746 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10747 Pmode
, 1, align_4_label
);
10748 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
10749 Pmode
, 1, align_2_label
);
10750 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
10751 Pmode
, 1, align_3_label
);
10755 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10756 check if is aligned to 4 - byte. */
10758 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
10759 NULL_RTX
, 0, OPTAB_WIDEN
);
10761 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10762 Pmode
, 1, align_4_label
);
10765 mem
= change_address (src
, QImode
, out
);
10767 /* Now compare the bytes. */
10769 /* Compare the first n unaligned byte on a byte per byte basis. */
10770 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10771 QImode
, 1, end_0_label
);
10773 /* Increment the address. */
10775 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10777 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10779 /* Not needed with an alignment of 2 */
10782 emit_label (align_2_label
);
10784 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10788 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10790 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10792 emit_label (align_3_label
);
10795 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10799 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10801 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10804 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10805 align this loop. It gives only huge programs, but does not help to
10807 emit_label (align_4_label
);
10809 mem
= change_address (src
, SImode
, out
);
10810 emit_move_insn (scratch
, mem
);
10812 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10814 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10816 /* This formula yields a nonzero result iff one of the bytes is zero.
10817 This saves three branches inside loop and many cycles. */
10819 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10820 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10821 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10822 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10823 gen_int_mode (0x80808080, SImode
)));
10824 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10829 rtx reg
= gen_reg_rtx (SImode
);
10830 rtx reg2
= gen_reg_rtx (Pmode
);
10831 emit_move_insn (reg
, tmpreg
);
10832 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10834 /* If zero is not in the first two bytes, move two bytes forward. */
10835 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10836 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10837 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10838 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10839 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10842 /* Emit lea manually to avoid clobbering of flags. */
10843 emit_insn (gen_rtx_SET (SImode
, reg2
,
10844 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
10846 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10847 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10848 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10849 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10856 rtx end_2_label
= gen_label_rtx ();
10857 /* Is zero in the first two bytes? */
10859 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10860 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10861 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10862 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10863 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10865 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10866 JUMP_LABEL (tmp
) = end_2_label
;
10868 /* Not in the first two. Move two bytes forward. */
10869 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10871 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
10873 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
10875 emit_label (end_2_label
);
10879 /* Avoid branch in fixing the byte. */
10880 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10881 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10882 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
10884 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
10886 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
10888 emit_label (end_0_label
);
10892 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
10893 rtx callarg2 ATTRIBUTE_UNUSED
,
10894 rtx pop
, int sibcall
)
10896 rtx use
= NULL
, call
;
10898 if (pop
== const0_rtx
)
10900 if (TARGET_64BIT
&& pop
)
10904 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
10905 fnaddr
= machopic_indirect_call_target (fnaddr
);
10907 /* Static functions and indirect calls don't need the pic register. */
10908 if (! TARGET_64BIT
&& flag_pic
10909 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10910 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
10911 use_reg (&use
, pic_offset_table_rtx
);
10913 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10915 rtx al
= gen_rtx_REG (QImode
, 0);
10916 emit_move_insn (al
, callarg2
);
10917 use_reg (&use
, al
);
10919 #endif /* TARGET_MACHO */
10921 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10923 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10924 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10926 if (sibcall
&& TARGET_64BIT
10927 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
10930 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10931 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
10932 emit_move_insn (fnaddr
, addr
);
10933 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10936 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10938 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10941 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10942 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10943 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10946 call
= emit_call_insn (call
);
10948 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10952 /* Clear stack slot assignments remembered from previous functions.
10953 This is called from INIT_EXPANDERS once before RTL is emitted for each
10956 static struct machine_function
*
10957 ix86_init_machine_status (void)
10959 struct machine_function
*f
;
10961 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
10962 f
->use_fast_prologue_epilogue_nregs
= -1;
10967 /* Return a MEM corresponding to a stack slot with mode MODE.
10968 Allocate a new slot if necessary.
10970 The RTL for a function can have several slots available: N is
10971 which slot to use. */
10974 assign_386_stack_local (enum machine_mode mode
, int n
)
10976 struct stack_local_entry
*s
;
10978 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10981 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
10982 if (s
->mode
== mode
&& s
->n
== n
)
10985 s
= (struct stack_local_entry
*)
10986 ggc_alloc (sizeof (struct stack_local_entry
));
10989 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10991 s
->next
= ix86_stack_locals
;
10992 ix86_stack_locals
= s
;
10996 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10998 static GTY(()) rtx ix86_tls_symbol
;
11000 ix86_tls_get_addr (void)
11003 if (!ix86_tls_symbol
)
11005 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11006 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11007 ? "___tls_get_addr"
11008 : "__tls_get_addr");
11011 return ix86_tls_symbol
;
11014 /* Calculate the length of the memory address in the instruction
11015 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11018 memory_address_length (rtx addr
)
11020 struct ix86_address parts
;
11021 rtx base
, index
, disp
;
11024 if (GET_CODE (addr
) == PRE_DEC
11025 || GET_CODE (addr
) == POST_INC
11026 || GET_CODE (addr
) == PRE_MODIFY
11027 || GET_CODE (addr
) == POST_MODIFY
)
11030 if (! ix86_decompose_address (addr
, &parts
))
11034 index
= parts
.index
;
11039 - esp as the base always wants an index,
11040 - ebp as the base always wants a displacement. */
11042 /* Register Indirect. */
11043 if (base
&& !index
&& !disp
)
11045 /* esp (for its index) and ebp (for its displacement) need
11046 the two-byte modrm form. */
11047 if (addr
== stack_pointer_rtx
11048 || addr
== arg_pointer_rtx
11049 || addr
== frame_pointer_rtx
11050 || addr
== hard_frame_pointer_rtx
)
11054 /* Direct Addressing. */
11055 else if (disp
&& !base
&& !index
)
11060 /* Find the length of the displacement constant. */
11063 if (GET_CODE (disp
) == CONST_INT
11064 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11070 /* ebp always wants a displacement. */
11071 else if (base
== hard_frame_pointer_rtx
)
11074 /* An index requires the two-byte modrm form.... */
11076 /* ...like esp, which always wants an index. */
11077 || base
== stack_pointer_rtx
11078 || base
== arg_pointer_rtx
11079 || base
== frame_pointer_rtx
)
11086 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11087 is set, expect that insn have 8bit immediate alternative. */
11089 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
11093 extract_insn_cached (insn
);
11094 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11095 if (CONSTANT_P (recog_data
.operand
[i
]))
11100 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11101 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11105 switch (get_attr_mode (insn
))
11116 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11121 fatal_insn ("unknown insn mode", insn
);
11127 /* Compute default value for "length_address" attribute. */
11129 ix86_attr_length_address_default (rtx insn
)
11133 if (get_attr_type (insn
) == TYPE_LEA
)
11135 rtx set
= PATTERN (insn
);
11136 if (GET_CODE (set
) == SET
)
11138 else if (GET_CODE (set
) == PARALLEL
11139 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
11140 set
= XVECEXP (set
, 0, 0);
11143 #ifdef ENABLE_CHECKING
11149 return memory_address_length (SET_SRC (set
));
11152 extract_insn_cached (insn
);
11153 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11154 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11156 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11162 /* Return the maximum number of instructions a cpu can issue. */
11165 ix86_issue_rate (void)
11169 case PROCESSOR_PENTIUM
:
11173 case PROCESSOR_PENTIUMPRO
:
11174 case PROCESSOR_PENTIUM4
:
11175 case PROCESSOR_ATHLON
:
11177 case PROCESSOR_NOCONA
:
11185 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11186 by DEP_INSN and nothing set by DEP_INSN. */
11189 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11193 /* Simplify the test for uninteresting insns. */
11194 if (insn_type
!= TYPE_SETCC
11195 && insn_type
!= TYPE_ICMOV
11196 && insn_type
!= TYPE_FCMOV
11197 && insn_type
!= TYPE_IBR
)
11200 if ((set
= single_set (dep_insn
)) != 0)
11202 set
= SET_DEST (set
);
11205 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11206 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11207 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11208 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11210 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11211 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11216 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11219 /* This test is true if the dependent insn reads the flags but
11220 not any other potentially set register. */
11221 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11224 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11230 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11231 address with operands set by DEP_INSN. */
11234 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11238 if (insn_type
== TYPE_LEA
11241 addr
= PATTERN (insn
);
11242 if (GET_CODE (addr
) == SET
)
11244 else if (GET_CODE (addr
) == PARALLEL
11245 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11246 addr
= XVECEXP (addr
, 0, 0);
11249 addr
= SET_SRC (addr
);
11254 extract_insn_cached (insn
);
11255 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11256 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11258 addr
= XEXP (recog_data
.operand
[i
], 0);
11265 return modified_in_p (addr
, dep_insn
);
11269 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
11271 enum attr_type insn_type
, dep_insn_type
;
11272 enum attr_memory memory
;
11274 int dep_insn_code_number
;
11276 /* Anti and output dependencies have zero cost on all CPUs. */
11277 if (REG_NOTE_KIND (link
) != 0)
11280 dep_insn_code_number
= recog_memoized (dep_insn
);
11282 /* If we can't recognize the insns, we can't really do anything. */
11283 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11286 insn_type
= get_attr_type (insn
);
11287 dep_insn_type
= get_attr_type (dep_insn
);
11291 case PROCESSOR_PENTIUM
:
11292 /* Address Generation Interlock adds a cycle of latency. */
11293 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11296 /* ??? Compares pair with jump/setcc. */
11297 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11300 /* Floating point stores require value to be ready one cycle earlier. */
11301 if (insn_type
== TYPE_FMOV
11302 && get_attr_memory (insn
) == MEMORY_STORE
11303 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11307 case PROCESSOR_PENTIUMPRO
:
11308 memory
= get_attr_memory (insn
);
11310 /* INT->FP conversion is expensive. */
11311 if (get_attr_fp_int_src (dep_insn
))
11314 /* There is one cycle extra latency between an FP op and a store. */
11315 if (insn_type
== TYPE_FMOV
11316 && (set
= single_set (dep_insn
)) != NULL_RTX
11317 && (set2
= single_set (insn
)) != NULL_RTX
11318 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11319 && GET_CODE (SET_DEST (set2
)) == MEM
)
11322 /* Show ability of reorder buffer to hide latency of load by executing
11323 in parallel with previous instruction in case
11324 previous instruction is not needed to compute the address. */
11325 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11326 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11328 /* Claim moves to take one cycle, as core can issue one load
11329 at time and the next load can start cycle later. */
11330 if (dep_insn_type
== TYPE_IMOV
11331 || dep_insn_type
== TYPE_FMOV
)
11339 memory
= get_attr_memory (insn
);
11341 /* The esp dependency is resolved before the instruction is really
11343 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11344 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11347 /* INT->FP conversion is expensive. */
11348 if (get_attr_fp_int_src (dep_insn
))
11351 /* Show ability of reorder buffer to hide latency of load by executing
11352 in parallel with previous instruction in case
11353 previous instruction is not needed to compute the address. */
11354 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11355 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11357 /* Claim moves to take one cycle, as core can issue one load
11358 at time and the next load can start cycle later. */
11359 if (dep_insn_type
== TYPE_IMOV
11360 || dep_insn_type
== TYPE_FMOV
)
11369 case PROCESSOR_ATHLON
:
11371 memory
= get_attr_memory (insn
);
11373 /* Show ability of reorder buffer to hide latency of load by executing
11374 in parallel with previous instruction in case
11375 previous instruction is not needed to compute the address. */
11376 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11377 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11379 enum attr_unit unit
= get_attr_unit (insn
);
11382 /* Because of the difference between the length of integer and
11383 floating unit pipeline preparation stages, the memory operands
11384 for floating point are cheaper.
11386 ??? For Athlon it the difference is most probably 2. */
11387 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
11390 loadcost
= TARGET_ATHLON
? 2 : 0;
11392 if (cost
>= loadcost
)
11405 /* How many alternative schedules to try. This should be as wide as the
11406 scheduling freedom in the DFA, but no wider. Making this value too
11407 large results extra work for the scheduler. */
11410 ia32_multipass_dfa_lookahead (void)
11412 if (ix86_tune
== PROCESSOR_PENTIUM
)
11415 if (ix86_tune
== PROCESSOR_PENTIUMPRO
11416 || ix86_tune
== PROCESSOR_K6
)
11424 /* Compute the alignment given to a constant that is being placed in memory.
11425 EXP is the constant and ALIGN is the alignment that the object would
11427 The value of this function is used instead of that alignment to align
11431 ix86_constant_alignment (tree exp
, int align
)
11433 if (TREE_CODE (exp
) == REAL_CST
)
11435 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11437 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11440 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
11441 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
11442 return BITS_PER_WORD
;
11447 /* Compute the alignment for a static variable.
11448 TYPE is the data type, and ALIGN is the alignment that
11449 the object would ordinarily have. The value of this function is used
11450 instead of that alignment to align the object. */
11453 ix86_data_alignment (tree type
, int align
)
11455 if (AGGREGATE_TYPE_P (type
)
11456 && TYPE_SIZE (type
)
11457 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11458 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11459 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11462 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11463 to 16byte boundary. */
11466 if (AGGREGATE_TYPE_P (type
)
11467 && TYPE_SIZE (type
)
11468 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11469 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11470 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11474 if (TREE_CODE (type
) == ARRAY_TYPE
)
11476 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11478 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11481 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11484 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11486 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11489 else if ((TREE_CODE (type
) == RECORD_TYPE
11490 || TREE_CODE (type
) == UNION_TYPE
11491 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11492 && TYPE_FIELDS (type
))
11494 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11496 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11499 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11500 || TREE_CODE (type
) == INTEGER_TYPE
)
11502 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11504 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11511 /* Compute the alignment for a local variable.
11512 TYPE is the data type, and ALIGN is the alignment that
11513 the object would ordinarily have. The value of this macro is used
11514 instead of that alignment to align the object. */
11517 ix86_local_alignment (tree type
, int align
)
11519 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11520 to 16byte boundary. */
11523 if (AGGREGATE_TYPE_P (type
)
11524 && TYPE_SIZE (type
)
11525 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11526 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11527 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11530 if (TREE_CODE (type
) == ARRAY_TYPE
)
11532 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11534 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11537 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11539 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11541 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11544 else if ((TREE_CODE (type
) == RECORD_TYPE
11545 || TREE_CODE (type
) == UNION_TYPE
11546 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11547 && TYPE_FIELDS (type
))
11549 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11551 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11554 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11555 || TREE_CODE (type
) == INTEGER_TYPE
)
11558 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11560 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11566 /* Emit RTL insns to initialize the variable parts of a trampoline.
11567 FNADDR is an RTX for the address of the function's pure code.
11568 CXT is an RTX for the static chain value for the function. */
11570 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
11574 /* Compute offset from the end of the jmp to the target function. */
11575 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11576 plus_constant (tramp
, 10),
11577 NULL_RTX
, 1, OPTAB_DIRECT
);
11578 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11579 gen_int_mode (0xb9, QImode
));
11580 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11581 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11582 gen_int_mode (0xe9, QImode
));
11583 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11588 /* Try to load address using shorter movl instead of movabs.
11589 We may want to support movq for kernel mode, but kernel does not use
11590 trampolines at the moment. */
11591 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
11593 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11594 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11595 gen_int_mode (0xbb41, HImode
));
11596 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11597 gen_lowpart (SImode
, fnaddr
));
11602 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11603 gen_int_mode (0xbb49, HImode
));
11604 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11608 /* Load static chain using movabs to r10. */
11609 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11610 gen_int_mode (0xba49, HImode
));
11611 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11614 /* Jump to the r11 */
11615 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11616 gen_int_mode (0xff49, HImode
));
11617 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11618 gen_int_mode (0xe3, QImode
));
11620 if (offset
> TRAMPOLINE_SIZE
)
11624 #ifdef ENABLE_EXECUTE_STACK
11625 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
11626 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
11630 #define def_builtin(MASK, NAME, TYPE, CODE) \
11632 if ((MASK) & target_flags \
11633 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11634 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11635 NULL, NULL_TREE); \
11638 struct builtin_description
11640 const unsigned int mask
;
11641 const enum insn_code icode
;
11642 const char *const name
;
11643 const enum ix86_builtins code
;
11644 const enum rtx_code comparison
;
11645 const unsigned int flag
;
11648 static const struct builtin_description bdesc_comi
[] =
11650 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
11651 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
11652 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
11653 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
11654 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
11655 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
11656 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
11657 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
11658 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
11659 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
11660 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
11661 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
11662 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
11663 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
11664 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
11665 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
11666 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
11667 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
11668 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
11669 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
11670 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
11671 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
11672 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
11673 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
11676 static const struct builtin_description bdesc_2arg
[] =
11679 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11680 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11681 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11682 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11683 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11684 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11685 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11686 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11688 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11689 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11690 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11691 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11692 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11693 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11694 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11695 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11696 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11697 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11698 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11699 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11700 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11701 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11702 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11703 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11704 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11705 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11706 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11707 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11709 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11710 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11711 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11712 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11714 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
11715 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
11716 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
11717 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
11719 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11720 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11721 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11722 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11723 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11726 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11727 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11728 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11729 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
11730 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11731 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11732 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11733 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
11735 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11736 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11737 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11738 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11739 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11740 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11741 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11742 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11744 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11745 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11746 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11748 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11749 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11750 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11751 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11753 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11754 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11756 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11757 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11758 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11759 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11760 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11761 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11763 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11764 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11765 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11766 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11768 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11769 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11770 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11771 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11772 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11773 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11776 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11777 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11778 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11780 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11781 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11782 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
11784 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11785 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11786 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11787 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11788 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11789 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11791 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11792 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11793 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11794 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11795 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
11796 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
11798 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
11799 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
11800 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
11801 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
11803 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
11804 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
11807 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
11808 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
11809 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
11810 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
11811 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
11812 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
11813 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
11814 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
11816 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
11817 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
11818 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
11819 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
11820 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
11821 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
11822 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
11823 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
11824 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
11825 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
11826 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
11827 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
11828 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
11829 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
11830 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
11831 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
11832 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
11833 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
11834 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
11835 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
11837 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
11838 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
11839 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
11840 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
11842 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
11843 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
11844 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
11845 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
11847 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
11848 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
11849 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
11852 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
11853 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
11854 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
11855 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
11856 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
11857 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
11858 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
11859 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
11861 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
11862 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
11863 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
11864 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
11865 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
11866 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
11867 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
11868 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
11870 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
11871 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
11873 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
11874 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
11875 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
11876 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
11878 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
11879 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
11881 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
11882 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
11883 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
11884 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
11885 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
11886 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
11888 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
11889 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
11890 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
11891 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
11893 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
11894 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
11895 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
11896 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
11897 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
11898 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
11899 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
11900 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
11902 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
11903 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
11904 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
11906 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
11907 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
11909 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
11910 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
11912 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
11913 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
11914 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
11915 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
11916 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
11917 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
11919 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
11920 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
11921 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
11922 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
11923 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
11924 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
11926 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
11927 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
11928 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
11929 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
11931 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
11933 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
11934 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
11935 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
11936 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
11939 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
11940 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
11941 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
11942 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
11943 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
11944 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
11947 static const struct builtin_description bdesc_1arg
[] =
11949 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
11950 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
11952 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
11953 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
11954 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
11956 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
11957 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
11958 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
11959 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
11960 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
11961 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
11963 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
11964 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
11965 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
11966 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
11968 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
11970 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
11971 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
11973 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
11974 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
11975 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
11976 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
11977 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
11979 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
11981 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
11982 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
11983 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
11984 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
11986 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
11987 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
11988 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
11990 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
11993 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
11994 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
11995 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
11999 ix86_init_builtins (void)
12002 ix86_init_mmx_sse_builtins ();
12005 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12006 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12009 ix86_init_mmx_sse_builtins (void)
12011 const struct builtin_description
* d
;
12014 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
12015 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
12016 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
12017 tree V2DI_type_node
= build_vector_type_for_mode (intDI_type_node
, V2DImode
);
12018 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
12019 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
12020 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
12021 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
12022 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
12023 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
12025 tree pchar_type_node
= build_pointer_type (char_type_node
);
12026 tree pcchar_type_node
= build_pointer_type (
12027 build_type_variant (char_type_node
, 1, 0));
12028 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12029 tree pcfloat_type_node
= build_pointer_type (
12030 build_type_variant (float_type_node
, 1, 0));
12031 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12032 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12033 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12036 tree int_ftype_v4sf_v4sf
12037 = build_function_type_list (integer_type_node
,
12038 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12039 tree v4si_ftype_v4sf_v4sf
12040 = build_function_type_list (V4SI_type_node
,
12041 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12042 /* MMX/SSE/integer conversions. */
12043 tree int_ftype_v4sf
12044 = build_function_type_list (integer_type_node
,
12045 V4SF_type_node
, NULL_TREE
);
12046 tree int64_ftype_v4sf
12047 = build_function_type_list (long_long_integer_type_node
,
12048 V4SF_type_node
, NULL_TREE
);
12049 tree int_ftype_v8qi
12050 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12051 tree v4sf_ftype_v4sf_int
12052 = build_function_type_list (V4SF_type_node
,
12053 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12054 tree v4sf_ftype_v4sf_int64
12055 = build_function_type_list (V4SF_type_node
,
12056 V4SF_type_node
, long_long_integer_type_node
,
12058 tree v4sf_ftype_v4sf_v2si
12059 = build_function_type_list (V4SF_type_node
,
12060 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12061 tree int_ftype_v4hi_int
12062 = build_function_type_list (integer_type_node
,
12063 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12064 tree v4hi_ftype_v4hi_int_int
12065 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12066 integer_type_node
, integer_type_node
,
12068 /* Miscellaneous. */
12069 tree v8qi_ftype_v4hi_v4hi
12070 = build_function_type_list (V8QI_type_node
,
12071 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12072 tree v4hi_ftype_v2si_v2si
12073 = build_function_type_list (V4HI_type_node
,
12074 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12075 tree v4sf_ftype_v4sf_v4sf_int
12076 = build_function_type_list (V4SF_type_node
,
12077 V4SF_type_node
, V4SF_type_node
,
12078 integer_type_node
, NULL_TREE
);
12079 tree v2si_ftype_v4hi_v4hi
12080 = build_function_type_list (V2SI_type_node
,
12081 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12082 tree v4hi_ftype_v4hi_int
12083 = build_function_type_list (V4HI_type_node
,
12084 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12085 tree v4hi_ftype_v4hi_di
12086 = build_function_type_list (V4HI_type_node
,
12087 V4HI_type_node
, long_long_unsigned_type_node
,
12089 tree v2si_ftype_v2si_di
12090 = build_function_type_list (V2SI_type_node
,
12091 V2SI_type_node
, long_long_unsigned_type_node
,
12093 tree void_ftype_void
12094 = build_function_type (void_type_node
, void_list_node
);
12095 tree void_ftype_unsigned
12096 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12097 tree void_ftype_unsigned_unsigned
12098 = build_function_type_list (void_type_node
, unsigned_type_node
,
12099 unsigned_type_node
, NULL_TREE
);
12100 tree void_ftype_pcvoid_unsigned_unsigned
12101 = build_function_type_list (void_type_node
, const_ptr_type_node
,
12102 unsigned_type_node
, unsigned_type_node
,
12104 tree unsigned_ftype_void
12105 = build_function_type (unsigned_type_node
, void_list_node
);
12107 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12108 tree v4sf_ftype_void
12109 = build_function_type (V4SF_type_node
, void_list_node
);
12110 tree v2si_ftype_v4sf
12111 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12112 /* Loads/stores. */
12113 tree void_ftype_v8qi_v8qi_pchar
12114 = build_function_type_list (void_type_node
,
12115 V8QI_type_node
, V8QI_type_node
,
12116 pchar_type_node
, NULL_TREE
);
12117 tree v4sf_ftype_pcfloat
12118 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
12119 /* @@@ the type is bogus */
12120 tree v4sf_ftype_v4sf_pv2si
12121 = build_function_type_list (V4SF_type_node
,
12122 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12123 tree void_ftype_pv2si_v4sf
12124 = build_function_type_list (void_type_node
,
12125 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12126 tree void_ftype_pfloat_v4sf
12127 = build_function_type_list (void_type_node
,
12128 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12129 tree void_ftype_pdi_di
12130 = build_function_type_list (void_type_node
,
12131 pdi_type_node
, long_long_unsigned_type_node
,
12133 tree void_ftype_pv2di_v2di
12134 = build_function_type_list (void_type_node
,
12135 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12136 /* Normal vector unops. */
12137 tree v4sf_ftype_v4sf
12138 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12140 /* Normal vector binops. */
12141 tree v4sf_ftype_v4sf_v4sf
12142 = build_function_type_list (V4SF_type_node
,
12143 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12144 tree v8qi_ftype_v8qi_v8qi
12145 = build_function_type_list (V8QI_type_node
,
12146 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12147 tree v4hi_ftype_v4hi_v4hi
12148 = build_function_type_list (V4HI_type_node
,
12149 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12150 tree v2si_ftype_v2si_v2si
12151 = build_function_type_list (V2SI_type_node
,
12152 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12153 tree di_ftype_di_di
12154 = build_function_type_list (long_long_unsigned_type_node
,
12155 long_long_unsigned_type_node
,
12156 long_long_unsigned_type_node
, NULL_TREE
);
12158 tree v2si_ftype_v2sf
12159 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12160 tree v2sf_ftype_v2si
12161 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12162 tree v2si_ftype_v2si
12163 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12164 tree v2sf_ftype_v2sf
12165 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12166 tree v2sf_ftype_v2sf_v2sf
12167 = build_function_type_list (V2SF_type_node
,
12168 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12169 tree v2si_ftype_v2sf_v2sf
12170 = build_function_type_list (V2SI_type_node
,
12171 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12172 tree pint_type_node
= build_pointer_type (integer_type_node
);
12173 tree pcint_type_node
= build_pointer_type (
12174 build_type_variant (integer_type_node
, 1, 0));
12175 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12176 tree pcdouble_type_node
= build_pointer_type (
12177 build_type_variant (double_type_node
, 1, 0));
12178 tree int_ftype_v2df_v2df
12179 = build_function_type_list (integer_type_node
,
12180 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12183 = build_function_type (intTI_type_node
, void_list_node
);
12184 tree v2di_ftype_void
12185 = build_function_type (V2DI_type_node
, void_list_node
);
12186 tree ti_ftype_ti_ti
12187 = build_function_type_list (intTI_type_node
,
12188 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12189 tree void_ftype_pcvoid
12190 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
12192 = build_function_type_list (V2DI_type_node
,
12193 long_long_unsigned_type_node
, NULL_TREE
);
12195 = build_function_type_list (long_long_unsigned_type_node
,
12196 V2DI_type_node
, NULL_TREE
);
12197 tree v4sf_ftype_v4si
12198 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12199 tree v4si_ftype_v4sf
12200 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12201 tree v2df_ftype_v4si
12202 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12203 tree v4si_ftype_v2df
12204 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12205 tree v2si_ftype_v2df
12206 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12207 tree v4sf_ftype_v2df
12208 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12209 tree v2df_ftype_v2si
12210 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12211 tree v2df_ftype_v4sf
12212 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12213 tree int_ftype_v2df
12214 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12215 tree int64_ftype_v2df
12216 = build_function_type_list (long_long_integer_type_node
,
12217 V2DF_type_node
, NULL_TREE
);
12218 tree v2df_ftype_v2df_int
12219 = build_function_type_list (V2DF_type_node
,
12220 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12221 tree v2df_ftype_v2df_int64
12222 = build_function_type_list (V2DF_type_node
,
12223 V2DF_type_node
, long_long_integer_type_node
,
12225 tree v4sf_ftype_v4sf_v2df
12226 = build_function_type_list (V4SF_type_node
,
12227 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12228 tree v2df_ftype_v2df_v4sf
12229 = build_function_type_list (V2DF_type_node
,
12230 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12231 tree v2df_ftype_v2df_v2df_int
12232 = build_function_type_list (V2DF_type_node
,
12233 V2DF_type_node
, V2DF_type_node
,
12236 tree v2df_ftype_v2df_pv2si
12237 = build_function_type_list (V2DF_type_node
,
12238 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12239 tree void_ftype_pv2si_v2df
12240 = build_function_type_list (void_type_node
,
12241 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12242 tree void_ftype_pdouble_v2df
12243 = build_function_type_list (void_type_node
,
12244 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12245 tree void_ftype_pint_int
12246 = build_function_type_list (void_type_node
,
12247 pint_type_node
, integer_type_node
, NULL_TREE
);
12248 tree void_ftype_v16qi_v16qi_pchar
12249 = build_function_type_list (void_type_node
,
12250 V16QI_type_node
, V16QI_type_node
,
12251 pchar_type_node
, NULL_TREE
);
12252 tree v2df_ftype_pcdouble
12253 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
12254 tree v2df_ftype_v2df_v2df
12255 = build_function_type_list (V2DF_type_node
,
12256 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12257 tree v16qi_ftype_v16qi_v16qi
12258 = build_function_type_list (V16QI_type_node
,
12259 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12260 tree v8hi_ftype_v8hi_v8hi
12261 = build_function_type_list (V8HI_type_node
,
12262 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12263 tree v4si_ftype_v4si_v4si
12264 = build_function_type_list (V4SI_type_node
,
12265 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12266 tree v2di_ftype_v2di_v2di
12267 = build_function_type_list (V2DI_type_node
,
12268 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12269 tree v2di_ftype_v2df_v2df
12270 = build_function_type_list (V2DI_type_node
,
12271 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12272 tree v2df_ftype_v2df
12273 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12274 tree v2df_ftype_double
12275 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12276 tree v2df_ftype_double_double
12277 = build_function_type_list (V2DF_type_node
,
12278 double_type_node
, double_type_node
, NULL_TREE
);
12279 tree int_ftype_v8hi_int
12280 = build_function_type_list (integer_type_node
,
12281 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12282 tree v8hi_ftype_v8hi_int_int
12283 = build_function_type_list (V8HI_type_node
,
12284 V8HI_type_node
, integer_type_node
,
12285 integer_type_node
, NULL_TREE
);
12286 tree v2di_ftype_v2di_int
12287 = build_function_type_list (V2DI_type_node
,
12288 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12289 tree v4si_ftype_v4si_int
12290 = build_function_type_list (V4SI_type_node
,
12291 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12292 tree v8hi_ftype_v8hi_int
12293 = build_function_type_list (V8HI_type_node
,
12294 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12295 tree v8hi_ftype_v8hi_v2di
12296 = build_function_type_list (V8HI_type_node
,
12297 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12298 tree v4si_ftype_v4si_v2di
12299 = build_function_type_list (V4SI_type_node
,
12300 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12301 tree v4si_ftype_v8hi_v8hi
12302 = build_function_type_list (V4SI_type_node
,
12303 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12304 tree di_ftype_v8qi_v8qi
12305 = build_function_type_list (long_long_unsigned_type_node
,
12306 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12307 tree di_ftype_v2si_v2si
12308 = build_function_type_list (long_long_unsigned_type_node
,
12309 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12310 tree v2di_ftype_v16qi_v16qi
12311 = build_function_type_list (V2DI_type_node
,
12312 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12313 tree v2di_ftype_v4si_v4si
12314 = build_function_type_list (V2DI_type_node
,
12315 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12316 tree int_ftype_v16qi
12317 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12318 tree v16qi_ftype_pcchar
12319 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
12320 tree void_ftype_pchar_v16qi
12321 = build_function_type_list (void_type_node
,
12322 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12323 tree v4si_ftype_pcint
12324 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
12325 tree void_ftype_pcint_v4si
12326 = build_function_type_list (void_type_node
,
12327 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
12328 tree v2di_ftype_v2di
12329 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12332 tree float128_type
;
12334 /* The __float80 type. */
12335 if (TYPE_MODE (long_double_type_node
) == XFmode
)
12336 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
12340 /* The __float80 type. */
12341 float80_type
= make_node (REAL_TYPE
);
12342 TYPE_PRECISION (float80_type
) = 80;
12343 layout_type (float80_type
);
12344 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
12347 float128_type
= make_node (REAL_TYPE
);
12348 TYPE_PRECISION (float128_type
) = 128;
12349 layout_type (float128_type
);
12350 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
12352 /* Add all builtins that are more or less simple operations on two
12354 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12356 /* Use one of the operands; the target can have a different mode for
12357 mask-generating compares. */
12358 enum machine_mode mode
;
12363 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12368 type
= v16qi_ftype_v16qi_v16qi
;
12371 type
= v8hi_ftype_v8hi_v8hi
;
12374 type
= v4si_ftype_v4si_v4si
;
12377 type
= v2di_ftype_v2di_v2di
;
12380 type
= v2df_ftype_v2df_v2df
;
12383 type
= ti_ftype_ti_ti
;
12386 type
= v4sf_ftype_v4sf_v4sf
;
12389 type
= v8qi_ftype_v8qi_v8qi
;
12392 type
= v4hi_ftype_v4hi_v4hi
;
12395 type
= v2si_ftype_v2si_v2si
;
12398 type
= di_ftype_di_di
;
12405 /* Override for comparisons. */
12406 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12407 || d
->icode
== CODE_FOR_maskncmpv4sf3
12408 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12409 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12410 type
= v4si_ftype_v4sf_v4sf
;
12412 if (d
->icode
== CODE_FOR_maskcmpv2df3
12413 || d
->icode
== CODE_FOR_maskncmpv2df3
12414 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12415 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12416 type
= v2di_ftype_v2df_v2df
;
12418 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12421 /* Add the remaining MMX insns with somewhat more complicated types. */
12422 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12423 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12424 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12425 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12426 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12428 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12429 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12430 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12432 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12433 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12435 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12436 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12438 /* comi/ucomi insns. */
12439 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12440 if (d
->mask
== MASK_SSE2
)
12441 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12443 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12445 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12446 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12447 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12449 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12450 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12451 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12452 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12453 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12454 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
12455 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12456 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
12457 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12458 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12459 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
12461 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12462 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12464 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12466 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
12467 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
12468 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
12469 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12470 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12471 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12473 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12474 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12475 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12476 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12478 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12479 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12480 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12481 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12483 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12485 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12487 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12488 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12489 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12490 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12491 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12492 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12494 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12496 /* Original 3DNow! */
12497 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12498 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12499 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12500 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12501 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12502 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12503 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12504 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12505 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12506 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12507 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12508 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12509 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12510 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12511 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12512 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12513 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12514 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12515 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12516 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12518 /* 3DNow! extension as used in the Athlon CPU. */
12519 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12520 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12521 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12522 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12523 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12524 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12526 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12529 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12530 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12532 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12533 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12534 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
12536 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
12537 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
12538 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
12539 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12540 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12541 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12543 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12544 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12545 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12546 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12548 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12549 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12550 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12551 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12552 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12554 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12555 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12556 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12557 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12559 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12560 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12562 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12564 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12565 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12567 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12568 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12569 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12570 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12571 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12573 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12575 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12576 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12577 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
12578 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
12580 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12581 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12582 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12584 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12585 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
12586 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12587 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12589 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12590 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12591 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12592 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
12593 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
12594 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12595 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12597 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
12598 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12599 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12601 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
12602 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
12603 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
12604 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
12605 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
12606 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
12607 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
12609 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
12611 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
12612 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
12614 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12615 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12616 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12618 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12619 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12620 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12622 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12623 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12625 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
12626 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12627 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12628 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12630 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
12631 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12632 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12633 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12635 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12636 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12638 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12640 /* Prescott New Instructions. */
12641 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
12642 void_ftype_pcvoid_unsigned_unsigned
,
12643 IX86_BUILTIN_MONITOR
);
12644 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
12645 void_ftype_unsigned_unsigned
,
12646 IX86_BUILTIN_MWAIT
);
12647 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
12649 IX86_BUILTIN_MOVSHDUP
);
12650 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
12652 IX86_BUILTIN_MOVSLDUP
);
12653 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
12654 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
12655 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
12656 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
12657 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
12658 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
12661 /* Errors in the source file can cause expand_expr to return const0_rtx
12662 where we expect a vector. To avoid crashing, use one of the vector
12663 clear instructions. */
12665 safe_vector_operand (rtx x
, enum machine_mode mode
)
12667 if (x
!= const0_rtx
)
12669 x
= gen_reg_rtx (mode
);
12671 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12672 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12673 : gen_rtx_SUBREG (DImode
, x
, 0)));
12675 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12676 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
12677 CONST0_RTX (V4SFmode
)));
12681 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12684 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
12687 tree arg0
= TREE_VALUE (arglist
);
12688 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12689 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12690 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12691 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12692 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12693 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12695 if (VECTOR_MODE_P (mode0
))
12696 op0
= safe_vector_operand (op0
, mode0
);
12697 if (VECTOR_MODE_P (mode1
))
12698 op1
= safe_vector_operand (op1
, mode1
);
12701 || GET_MODE (target
) != tmode
12702 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12703 target
= gen_reg_rtx (tmode
);
12705 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
12707 rtx x
= gen_reg_rtx (V4SImode
);
12708 emit_insn (gen_sse2_loadd (x
, op1
));
12709 op1
= gen_lowpart (TImode
, x
);
12712 /* In case the insn wants input operands in modes different from
12713 the result, abort. */
12714 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
12715 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
12718 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12719 op0
= copy_to_mode_reg (mode0
, op0
);
12720 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12721 op1
= copy_to_mode_reg (mode1
, op1
);
12723 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12724 yet one of the two must not be a memory. This is normally enforced
12725 by expanders, but we didn't bother to create one here. */
12726 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12727 op0
= copy_to_mode_reg (mode0
, op0
);
12729 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12736 /* Subroutine of ix86_expand_builtin to take care of stores. */
12739 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
12742 tree arg0
= TREE_VALUE (arglist
);
12743 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12744 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12745 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12746 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12747 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12749 if (VECTOR_MODE_P (mode1
))
12750 op1
= safe_vector_operand (op1
, mode1
);
12752 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12753 op1
= copy_to_mode_reg (mode1
, op1
);
12755 pat
= GEN_FCN (icode
) (op0
, op1
);
12761 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12764 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
12765 rtx target
, int do_load
)
12768 tree arg0
= TREE_VALUE (arglist
);
12769 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12770 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12771 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12774 || GET_MODE (target
) != tmode
12775 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12776 target
= gen_reg_rtx (tmode
);
12778 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12781 if (VECTOR_MODE_P (mode0
))
12782 op0
= safe_vector_operand (op0
, mode0
);
12784 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12785 op0
= copy_to_mode_reg (mode0
, op0
);
12788 pat
= GEN_FCN (icode
) (target
, op0
);
12795 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12796 sqrtss, rsqrtss, rcpss. */
12799 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
12802 tree arg0
= TREE_VALUE (arglist
);
12803 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12804 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12805 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12808 || GET_MODE (target
) != tmode
12809 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12810 target
= gen_reg_rtx (tmode
);
12812 if (VECTOR_MODE_P (mode0
))
12813 op0
= safe_vector_operand (op0
, mode0
);
12815 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12816 op0
= copy_to_mode_reg (mode0
, op0
);
12819 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12820 op1
= copy_to_mode_reg (mode0
, op1
);
12822 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12829 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12832 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
12836 tree arg0
= TREE_VALUE (arglist
);
12837 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12838 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12839 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12841 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12842 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12843 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12844 enum rtx_code comparison
= d
->comparison
;
12846 if (VECTOR_MODE_P (mode0
))
12847 op0
= safe_vector_operand (op0
, mode0
);
12848 if (VECTOR_MODE_P (mode1
))
12849 op1
= safe_vector_operand (op1
, mode1
);
12851 /* Swap operands if we have a comparison that isn't available in
12855 rtx tmp
= gen_reg_rtx (mode1
);
12856 emit_move_insn (tmp
, op1
);
12862 || GET_MODE (target
) != tmode
12863 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12864 target
= gen_reg_rtx (tmode
);
12866 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12867 op0
= copy_to_mode_reg (mode0
, op0
);
12868 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12869 op1
= copy_to_mode_reg (mode1
, op1
);
12871 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12872 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12879 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12882 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
12886 tree arg0
= TREE_VALUE (arglist
);
12887 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12888 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12889 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12891 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12892 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12893 enum rtx_code comparison
= d
->comparison
;
12895 if (VECTOR_MODE_P (mode0
))
12896 op0
= safe_vector_operand (op0
, mode0
);
12897 if (VECTOR_MODE_P (mode1
))
12898 op1
= safe_vector_operand (op1
, mode1
);
12900 /* Swap operands if we have a comparison that isn't available in
12909 target
= gen_reg_rtx (SImode
);
12910 emit_move_insn (target
, const0_rtx
);
12911 target
= gen_rtx_SUBREG (QImode
, target
, 0);
12913 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
12914 op0
= copy_to_mode_reg (mode0
, op0
);
12915 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
12916 op1
= copy_to_mode_reg (mode1
, op1
);
12918 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12919 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
12923 emit_insn (gen_rtx_SET (VOIDmode
,
12924 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
12925 gen_rtx_fmt_ee (comparison
, QImode
,
12929 return SUBREG_REG (target
);
12932 /* Expand an expression EXP that calls a built-in function,
12933 with result going to TARGET if that's convenient
12934 (and in mode MODE if that's convenient).
12935 SUBTARGET may be used as the target for computing one of EXP's operands.
12936 IGNORE is nonzero if the value is to be ignored. */
12939 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
12940 enum machine_mode mode ATTRIBUTE_UNUSED
,
12941 int ignore ATTRIBUTE_UNUSED
)
12943 const struct builtin_description
*d
;
12945 enum insn_code icode
;
12946 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
12947 tree arglist
= TREE_OPERAND (exp
, 1);
12948 tree arg0
, arg1
, arg2
;
12949 rtx op0
, op1
, op2
, pat
;
12950 enum machine_mode tmode
, mode0
, mode1
, mode2
;
12951 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
12955 case IX86_BUILTIN_EMMS
:
12956 emit_insn (gen_emms ());
12959 case IX86_BUILTIN_SFENCE
:
12960 emit_insn (gen_sfence ());
12963 case IX86_BUILTIN_PEXTRW
:
12964 case IX86_BUILTIN_PEXTRW128
:
12965 icode
= (fcode
== IX86_BUILTIN_PEXTRW
12966 ? CODE_FOR_mmx_pextrw
12967 : CODE_FOR_sse2_pextrw
);
12968 arg0
= TREE_VALUE (arglist
);
12969 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12970 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12971 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12972 tmode
= insn_data
[icode
].operand
[0].mode
;
12973 mode0
= insn_data
[icode
].operand
[1].mode
;
12974 mode1
= insn_data
[icode
].operand
[2].mode
;
12976 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12977 op0
= copy_to_mode_reg (mode0
, op0
);
12978 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12980 error ("selector must be an integer constant in the range 0..%i",
12981 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
12982 return gen_reg_rtx (tmode
);
12985 || GET_MODE (target
) != tmode
12986 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12987 target
= gen_reg_rtx (tmode
);
12988 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12994 case IX86_BUILTIN_PINSRW
:
12995 case IX86_BUILTIN_PINSRW128
:
12996 icode
= (fcode
== IX86_BUILTIN_PINSRW
12997 ? CODE_FOR_mmx_pinsrw
12998 : CODE_FOR_sse2_pinsrw
);
12999 arg0
= TREE_VALUE (arglist
);
13000 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13001 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13002 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13003 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13004 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13005 tmode
= insn_data
[icode
].operand
[0].mode
;
13006 mode0
= insn_data
[icode
].operand
[1].mode
;
13007 mode1
= insn_data
[icode
].operand
[2].mode
;
13008 mode2
= insn_data
[icode
].operand
[3].mode
;
13010 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13011 op0
= copy_to_mode_reg (mode0
, op0
);
13012 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13013 op1
= copy_to_mode_reg (mode1
, op1
);
13014 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13016 error ("selector must be an integer constant in the range 0..%i",
13017 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
13021 || GET_MODE (target
) != tmode
13022 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13023 target
= gen_reg_rtx (tmode
);
13024 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13030 case IX86_BUILTIN_MASKMOVQ
:
13031 case IX86_BUILTIN_MASKMOVDQU
:
13032 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13033 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13034 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13035 : CODE_FOR_sse2_maskmovdqu
));
13036 /* Note the arg order is different from the operand order. */
13037 arg1
= TREE_VALUE (arglist
);
13038 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13039 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13040 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13041 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13042 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13043 mode0
= insn_data
[icode
].operand
[0].mode
;
13044 mode1
= insn_data
[icode
].operand
[1].mode
;
13045 mode2
= insn_data
[icode
].operand
[2].mode
;
13047 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13048 op0
= copy_to_mode_reg (mode0
, op0
);
13049 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13050 op1
= copy_to_mode_reg (mode1
, op1
);
13051 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13052 op2
= copy_to_mode_reg (mode2
, op2
);
13053 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13059 case IX86_BUILTIN_SQRTSS
:
13060 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13061 case IX86_BUILTIN_RSQRTSS
:
13062 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13063 case IX86_BUILTIN_RCPSS
:
13064 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13066 case IX86_BUILTIN_LOADAPS
:
13067 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13069 case IX86_BUILTIN_LOADUPS
:
13070 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13072 case IX86_BUILTIN_STOREAPS
:
13073 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13075 case IX86_BUILTIN_STOREUPS
:
13076 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13078 case IX86_BUILTIN_LOADSS
:
13079 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13081 case IX86_BUILTIN_STORESS
:
13082 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13084 case IX86_BUILTIN_LOADHPS
:
13085 case IX86_BUILTIN_LOADLPS
:
13086 case IX86_BUILTIN_LOADHPD
:
13087 case IX86_BUILTIN_LOADLPD
:
13088 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13089 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13090 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13091 : CODE_FOR_sse2_movsd
);
13092 arg0
= TREE_VALUE (arglist
);
13093 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13094 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13095 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13096 tmode
= insn_data
[icode
].operand
[0].mode
;
13097 mode0
= insn_data
[icode
].operand
[1].mode
;
13098 mode1
= insn_data
[icode
].operand
[2].mode
;
13100 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13101 op0
= copy_to_mode_reg (mode0
, op0
);
13102 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13104 || GET_MODE (target
) != tmode
13105 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13106 target
= gen_reg_rtx (tmode
);
13107 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13113 case IX86_BUILTIN_STOREHPS
:
13114 case IX86_BUILTIN_STORELPS
:
13115 case IX86_BUILTIN_STOREHPD
:
13116 case IX86_BUILTIN_STORELPD
:
13117 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13118 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13119 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13120 : CODE_FOR_sse2_movsd
);
13121 arg0
= TREE_VALUE (arglist
);
13122 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13123 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13124 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13125 mode0
= insn_data
[icode
].operand
[1].mode
;
13126 mode1
= insn_data
[icode
].operand
[2].mode
;
13128 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13129 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13130 op1
= copy_to_mode_reg (mode1
, op1
);
13132 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13138 case IX86_BUILTIN_MOVNTPS
:
13139 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13140 case IX86_BUILTIN_MOVNTQ
:
13141 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13143 case IX86_BUILTIN_LDMXCSR
:
13144 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13145 target
= assign_386_stack_local (SImode
, 0);
13146 emit_move_insn (target
, op0
);
13147 emit_insn (gen_ldmxcsr (target
));
13150 case IX86_BUILTIN_STMXCSR
:
13151 target
= assign_386_stack_local (SImode
, 0);
13152 emit_insn (gen_stmxcsr (target
));
13153 return copy_to_mode_reg (SImode
, target
);
13155 case IX86_BUILTIN_SHUFPS
:
13156 case IX86_BUILTIN_SHUFPD
:
13157 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13158 ? CODE_FOR_sse_shufps
13159 : CODE_FOR_sse2_shufpd
);
13160 arg0
= TREE_VALUE (arglist
);
13161 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13162 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13163 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13164 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13165 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13166 tmode
= insn_data
[icode
].operand
[0].mode
;
13167 mode0
= insn_data
[icode
].operand
[1].mode
;
13168 mode1
= insn_data
[icode
].operand
[2].mode
;
13169 mode2
= insn_data
[icode
].operand
[3].mode
;
13171 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13172 op0
= copy_to_mode_reg (mode0
, op0
);
13173 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13174 op1
= copy_to_mode_reg (mode1
, op1
);
13175 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13177 /* @@@ better error message */
13178 error ("mask must be an immediate");
13179 return gen_reg_rtx (tmode
);
13182 || GET_MODE (target
) != tmode
13183 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13184 target
= gen_reg_rtx (tmode
);
13185 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13191 case IX86_BUILTIN_PSHUFW
:
13192 case IX86_BUILTIN_PSHUFD
:
13193 case IX86_BUILTIN_PSHUFHW
:
13194 case IX86_BUILTIN_PSHUFLW
:
13195 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13196 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13197 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13198 : CODE_FOR_mmx_pshufw
);
13199 arg0
= TREE_VALUE (arglist
);
13200 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13201 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13202 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13203 tmode
= insn_data
[icode
].operand
[0].mode
;
13204 mode1
= insn_data
[icode
].operand
[1].mode
;
13205 mode2
= insn_data
[icode
].operand
[2].mode
;
13207 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13208 op0
= copy_to_mode_reg (mode1
, op0
);
13209 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13211 /* @@@ better error message */
13212 error ("mask must be an immediate");
13216 || GET_MODE (target
) != tmode
13217 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13218 target
= gen_reg_rtx (tmode
);
13219 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13225 case IX86_BUILTIN_PSLLDQI128
:
13226 case IX86_BUILTIN_PSRLDQI128
:
13227 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13228 : CODE_FOR_sse2_lshrti3
);
13229 arg0
= TREE_VALUE (arglist
);
13230 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13231 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13232 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13233 tmode
= insn_data
[icode
].operand
[0].mode
;
13234 mode1
= insn_data
[icode
].operand
[1].mode
;
13235 mode2
= insn_data
[icode
].operand
[2].mode
;
13237 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13239 op0
= copy_to_reg (op0
);
13240 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13242 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13244 error ("shift must be an immediate");
13247 target
= gen_reg_rtx (V2DImode
);
13248 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13254 case IX86_BUILTIN_FEMMS
:
13255 emit_insn (gen_femms ());
13258 case IX86_BUILTIN_PAVGUSB
:
13259 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13261 case IX86_BUILTIN_PF2ID
:
13262 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13264 case IX86_BUILTIN_PFACC
:
13265 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13267 case IX86_BUILTIN_PFADD
:
13268 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13270 case IX86_BUILTIN_PFCMPEQ
:
13271 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13273 case IX86_BUILTIN_PFCMPGE
:
13274 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13276 case IX86_BUILTIN_PFCMPGT
:
13277 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13279 case IX86_BUILTIN_PFMAX
:
13280 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13282 case IX86_BUILTIN_PFMIN
:
13283 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13285 case IX86_BUILTIN_PFMUL
:
13286 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13288 case IX86_BUILTIN_PFRCP
:
13289 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13291 case IX86_BUILTIN_PFRCPIT1
:
13292 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13294 case IX86_BUILTIN_PFRCPIT2
:
13295 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13297 case IX86_BUILTIN_PFRSQIT1
:
13298 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13300 case IX86_BUILTIN_PFRSQRT
:
13301 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13303 case IX86_BUILTIN_PFSUB
:
13304 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13306 case IX86_BUILTIN_PFSUBR
:
13307 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13309 case IX86_BUILTIN_PI2FD
:
13310 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13312 case IX86_BUILTIN_PMULHRW
:
13313 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13315 case IX86_BUILTIN_PF2IW
:
13316 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13318 case IX86_BUILTIN_PFNACC
:
13319 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13321 case IX86_BUILTIN_PFPNACC
:
13322 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13324 case IX86_BUILTIN_PI2FW
:
13325 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13327 case IX86_BUILTIN_PSWAPDSI
:
13328 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13330 case IX86_BUILTIN_PSWAPDSF
:
13331 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13333 case IX86_BUILTIN_SSE_ZERO
:
13334 target
= gen_reg_rtx (V4SFmode
);
13335 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
13338 case IX86_BUILTIN_MMX_ZERO
:
13339 target
= gen_reg_rtx (DImode
);
13340 emit_insn (gen_mmx_clrdi (target
));
13343 case IX86_BUILTIN_CLRTI
:
13344 target
= gen_reg_rtx (V2DImode
);
13345 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
13349 case IX86_BUILTIN_SQRTSD
:
13350 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13351 case IX86_BUILTIN_LOADAPD
:
13352 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13353 case IX86_BUILTIN_LOADUPD
:
13354 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13356 case IX86_BUILTIN_STOREAPD
:
13357 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13358 case IX86_BUILTIN_STOREUPD
:
13359 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13361 case IX86_BUILTIN_LOADSD
:
13362 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13364 case IX86_BUILTIN_STORESD
:
13365 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13367 case IX86_BUILTIN_SETPD1
:
13368 target
= assign_386_stack_local (DFmode
, 0);
13369 arg0
= TREE_VALUE (arglist
);
13370 emit_move_insn (adjust_address (target
, DFmode
, 0),
13371 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13372 op0
= gen_reg_rtx (V2DFmode
);
13373 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13374 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
13377 case IX86_BUILTIN_SETPD
:
13378 target
= assign_386_stack_local (V2DFmode
, 0);
13379 arg0
= TREE_VALUE (arglist
);
13380 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13381 emit_move_insn (adjust_address (target
, DFmode
, 0),
13382 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13383 emit_move_insn (adjust_address (target
, DFmode
, 8),
13384 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13385 op0
= gen_reg_rtx (V2DFmode
);
13386 emit_insn (gen_sse2_movapd (op0
, target
));
13389 case IX86_BUILTIN_LOADRPD
:
13390 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13391 gen_reg_rtx (V2DFmode
), 1);
13392 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
13395 case IX86_BUILTIN_LOADPD1
:
13396 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13397 gen_reg_rtx (V2DFmode
), 1);
13398 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13401 case IX86_BUILTIN_STOREPD1
:
13402 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13403 case IX86_BUILTIN_STORERPD
:
13404 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13406 case IX86_BUILTIN_CLRPD
:
13407 target
= gen_reg_rtx (V2DFmode
);
13408 emit_insn (gen_sse_clrv2df (target
));
13411 case IX86_BUILTIN_MFENCE
:
13412 emit_insn (gen_sse2_mfence ());
13414 case IX86_BUILTIN_LFENCE
:
13415 emit_insn (gen_sse2_lfence ());
13418 case IX86_BUILTIN_CLFLUSH
:
13419 arg0
= TREE_VALUE (arglist
);
13420 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13421 icode
= CODE_FOR_sse2_clflush
;
13422 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13423 op0
= copy_to_mode_reg (Pmode
, op0
);
13425 emit_insn (gen_sse2_clflush (op0
));
13428 case IX86_BUILTIN_MOVNTPD
:
13429 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13430 case IX86_BUILTIN_MOVNTDQ
:
13431 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13432 case IX86_BUILTIN_MOVNTI
:
13433 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13435 case IX86_BUILTIN_LOADDQA
:
13436 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
13437 case IX86_BUILTIN_LOADDQU
:
13438 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13439 case IX86_BUILTIN_LOADD
:
13440 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13442 case IX86_BUILTIN_STOREDQA
:
13443 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
13444 case IX86_BUILTIN_STOREDQU
:
13445 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13446 case IX86_BUILTIN_STORED
:
13447 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13449 case IX86_BUILTIN_MONITOR
:
13450 arg0
= TREE_VALUE (arglist
);
13451 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13452 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13453 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13454 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13455 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13457 op0
= copy_to_mode_reg (SImode
, op0
);
13459 op1
= copy_to_mode_reg (SImode
, op1
);
13461 op2
= copy_to_mode_reg (SImode
, op2
);
13462 emit_insn (gen_monitor (op0
, op1
, op2
));
13465 case IX86_BUILTIN_MWAIT
:
13466 arg0
= TREE_VALUE (arglist
);
13467 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13468 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13469 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13471 op0
= copy_to_mode_reg (SImode
, op0
);
13473 op1
= copy_to_mode_reg (SImode
, op1
);
13474 emit_insn (gen_mwait (op0
, op1
));
13477 case IX86_BUILTIN_LOADDDUP
:
13478 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
13480 case IX86_BUILTIN_LDDQU
:
13481 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
13488 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13489 if (d
->code
== fcode
)
13491 /* Compares are treated specially. */
13492 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13493 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13494 || d
->icode
== CODE_FOR_maskncmpv4sf3
13495 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13496 || d
->icode
== CODE_FOR_maskcmpv2df3
13497 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13498 || d
->icode
== CODE_FOR_maskncmpv2df3
13499 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13500 return ix86_expand_sse_compare (d
, arglist
, target
);
13502 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13505 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13506 if (d
->code
== fcode
)
13507 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13509 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13510 if (d
->code
== fcode
)
13511 return ix86_expand_sse_comi (d
, arglist
, target
);
13513 /* @@@ Should really do something sensible here. */
13517 /* Store OPERAND to the memory after reload is completed. This means
13518 that we can't easily use assign_stack_local. */
13520 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
13523 if (!reload_completed
)
13525 if (TARGET_RED_ZONE
)
13527 result
= gen_rtx_MEM (mode
,
13528 gen_rtx_PLUS (Pmode
,
13530 GEN_INT (-RED_ZONE_SIZE
)));
13531 emit_move_insn (result
, operand
);
13533 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
13539 operand
= gen_lowpart (DImode
, operand
);
13543 gen_rtx_SET (VOIDmode
,
13544 gen_rtx_MEM (DImode
,
13545 gen_rtx_PRE_DEC (DImode
,
13546 stack_pointer_rtx
)),
13552 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13561 split_di (&operand
, 1, operands
, operands
+ 1);
13563 gen_rtx_SET (VOIDmode
,
13564 gen_rtx_MEM (SImode
,
13565 gen_rtx_PRE_DEC (Pmode
,
13566 stack_pointer_rtx
)),
13569 gen_rtx_SET (VOIDmode
,
13570 gen_rtx_MEM (SImode
,
13571 gen_rtx_PRE_DEC (Pmode
,
13572 stack_pointer_rtx
)),
13577 /* It is better to store HImodes as SImodes. */
13578 if (!TARGET_PARTIAL_REG_STALL
)
13579 operand
= gen_lowpart (SImode
, operand
);
13583 gen_rtx_SET (VOIDmode
,
13584 gen_rtx_MEM (GET_MODE (operand
),
13585 gen_rtx_PRE_DEC (SImode
,
13586 stack_pointer_rtx
)),
13592 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13597 /* Free operand from the memory. */
13599 ix86_free_from_memory (enum machine_mode mode
)
13601 if (!TARGET_RED_ZONE
)
13605 if (mode
== DImode
|| TARGET_64BIT
)
13607 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13611 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13612 to pop or add instruction if registers are available. */
13613 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13614 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13619 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13620 QImode must go into class Q_REGS.
13621 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13622 movdf to do mem-to-mem moves through integer regs. */
13624 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
13626 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
13628 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13630 /* SSE can't load any constant directly yet. */
13631 if (SSE_CLASS_P (class))
13633 /* Floats can load 0 and 1. */
13634 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13636 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13637 if (MAYBE_SSE_CLASS_P (class))
13638 return (reg_class_subset_p (class, GENERAL_REGS
)
13639 ? GENERAL_REGS
: FLOAT_REGS
);
13643 /* General regs can load everything. */
13644 if (reg_class_subset_p (class, GENERAL_REGS
))
13645 return GENERAL_REGS
;
13646 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13647 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13650 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13652 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13657 /* If we are copying between general and FP registers, we need a memory
13658 location. The same is true for SSE and MMX registers.
13660 The macro can't work reliably when one of the CLASSES is class containing
13661 registers from multiple units (SSE, MMX, integer). We avoid this by never
13662 combining those units in single alternative in the machine description.
13663 Ensure that this constraint holds to avoid unexpected surprises.
13665 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13666 enforce these sanity checks. */
13668 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
13669 enum machine_mode mode
, int strict
)
13671 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13672 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13673 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13674 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13675 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13676 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13683 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13684 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13685 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
13686 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
13687 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
13689 /* Return the cost of moving data from a register in class CLASS1 to
13690 one in class CLASS2.
13692 It is not required that the cost always equal 2 when FROM is the same as TO;
13693 on some machines it is expensive to move between registers if they are not
13694 general registers. */
13696 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
13697 enum reg_class class2
)
13699 /* In case we require secondary memory, compute cost of the store followed
13700 by load. In order to avoid bad register allocation choices, we need
13701 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13703 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13707 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
13708 MEMORY_MOVE_COST (mode
, class1
, 1));
13709 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
13710 MEMORY_MOVE_COST (mode
, class2
, 1));
13712 /* In case of copying from general_purpose_register we may emit multiple
13713 stores followed by single load causing memory size mismatch stall.
13714 Count this as arbitrarily high cost of 20. */
13715 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13718 /* In the case of FP/MMX moves, the registers actually overlap, and we
13719 have to switch modes in order to treat them differently. */
13720 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
13721 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
13727 /* Moves between SSE/MMX and integer unit are expensive. */
13728 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13729 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13730 return ix86_cost
->mmxsse_to_integer
;
13731 if (MAYBE_FLOAT_CLASS_P (class1
))
13732 return ix86_cost
->fp_move
;
13733 if (MAYBE_SSE_CLASS_P (class1
))
13734 return ix86_cost
->sse_move
;
13735 if (MAYBE_MMX_CLASS_P (class1
))
13736 return ix86_cost
->mmx_move
;
13740 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13742 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
13744 /* Flags and only flags can only hold CCmode values. */
13745 if (CC_REGNO_P (regno
))
13746 return GET_MODE_CLASS (mode
) == MODE_CC
;
13747 if (GET_MODE_CLASS (mode
) == MODE_CC
13748 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13749 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13751 if (FP_REGNO_P (regno
))
13752 return VALID_FP_MODE_P (mode
);
13753 if (SSE_REGNO_P (regno
))
13754 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
13755 if (MMX_REGNO_P (regno
))
13757 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
13758 /* We handle both integer and floats in the general purpose registers.
13759 In future we should be able to handle vector modes as well. */
13760 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13762 /* Take care for QImode values - they can be in non-QI regs, but then
13763 they do cause partial register stalls. */
13764 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13766 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13769 /* Return the cost of moving data of mode M between a
13770 register and memory. A value of 2 is the default; this cost is
13771 relative to those in `REGISTER_MOVE_COST'.
13773 If moving between registers and memory is more expensive than
13774 between two registers, you should define this macro to express the
13777 Model also increased moving costs of QImode registers in non
13781 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
13783 if (FLOAT_CLASS_P (class))
13800 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13802 if (SSE_CLASS_P (class))
13805 switch (GET_MODE_SIZE (mode
))
13819 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13821 if (MMX_CLASS_P (class))
13824 switch (GET_MODE_SIZE (mode
))
13835 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13837 switch (GET_MODE_SIZE (mode
))
13841 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13842 : ix86_cost
->movzbl_load
);
13844 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13845 : ix86_cost
->int_store
[0] + 4);
13848 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13850 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13851 if (mode
== TFmode
)
13853 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13854 * (((int) GET_MODE_SIZE (mode
)
13855 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
13859 /* Compute a (partial) cost for rtx X. Return true if the complete
13860 cost has been computed, and false if subexpressions should be
13861 scanned. In either case, *TOTAL contains the cost result. */
13864 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
13866 enum machine_mode mode
= GET_MODE (x
);
13874 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
13876 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
13878 else if (flag_pic
&& SYMBOLIC_CONST (x
)
13880 || (!GET_CODE (x
) != LABEL_REF
13881 && (GET_CODE (x
) != SYMBOL_REF
13882 || !SYMBOL_REF_LOCAL_P (x
)))))
13889 if (mode
== VOIDmode
)
13892 switch (standard_80387_constant_p (x
))
13897 default: /* Other constants */
13902 /* Start with (MEM (SYMBOL_REF)), since that's where
13903 it'll probably end up. Add a penalty for size. */
13904 *total
= (COSTS_N_INSNS (1)
13905 + (flag_pic
!= 0 && !TARGET_64BIT
)
13906 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
13912 /* The zero extensions is often completely free on x86_64, so make
13913 it as cheap as possible. */
13914 if (TARGET_64BIT
&& mode
== DImode
13915 && GET_MODE (XEXP (x
, 0)) == SImode
)
13917 else if (TARGET_ZERO_EXTEND_WITH_AND
)
13918 *total
= COSTS_N_INSNS (ix86_cost
->add
);
13920 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
13924 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
13928 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
13929 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
13931 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
13934 *total
= COSTS_N_INSNS (ix86_cost
->add
);
13937 if ((value
== 2 || value
== 3)
13938 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
13940 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
13950 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
13952 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
13954 if (INTVAL (XEXP (x
, 1)) > 32)
13955 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
13957 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
13961 if (GET_CODE (XEXP (x
, 1)) == AND
)
13962 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
13964 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
13969 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
13970 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
13972 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
13977 if (FLOAT_MODE_P (mode
))
13979 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
13984 rtx op0
= XEXP (x
, 0);
13985 rtx op1
= XEXP (x
, 1);
13987 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
13989 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
13990 for (nbits
= 0; value
!= 0; value
&= value
- 1)
13994 /* This is arbitrary. */
13997 /* Compute costs correctly for widening multiplication. */
13998 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
13999 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
14000 == GET_MODE_SIZE (mode
))
14002 int is_mulwiden
= 0;
14003 enum machine_mode inner_mode
= GET_MODE (op0
);
14005 if (GET_CODE (op0
) == GET_CODE (op1
))
14006 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
14007 else if (GET_CODE (op1
) == CONST_INT
)
14009 if (GET_CODE (op0
) == SIGN_EXTEND
)
14010 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
14013 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
14017 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
14020 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14021 + nbits
* ix86_cost
->mult_bit
)
14022 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
14031 if (FLOAT_MODE_P (mode
))
14032 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
14034 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
14038 if (FLOAT_MODE_P (mode
))
14039 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14040 else if (GET_MODE_CLASS (mode
) == MODE_INT
14041 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
14043 if (GET_CODE (XEXP (x
, 0)) == PLUS
14044 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
14045 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
14046 && CONSTANT_P (XEXP (x
, 1)))
14048 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
14049 if (val
== 2 || val
== 4 || val
== 8)
14051 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14052 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14053 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
14055 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14059 else if (GET_CODE (XEXP (x
, 0)) == MULT
14060 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
14062 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
14063 if (val
== 2 || val
== 4 || val
== 8)
14065 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14066 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14067 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14071 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14073 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14074 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14075 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14076 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14083 if (FLOAT_MODE_P (mode
))
14085 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14093 if (!TARGET_64BIT
&& mode
== DImode
)
14095 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
14096 + (rtx_cost (XEXP (x
, 0), outer_code
)
14097 << (GET_MODE (XEXP (x
, 0)) != DImode
))
14098 + (rtx_cost (XEXP (x
, 1), outer_code
)
14099 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
14105 if (FLOAT_MODE_P (mode
))
14107 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
14113 if (!TARGET_64BIT
&& mode
== DImode
)
14114 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
14116 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14120 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
14125 if (FLOAT_MODE_P (mode
))
14126 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
14130 if (FLOAT_MODE_P (mode
))
14131 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
14135 if (XINT (x
, 1) == UNSPEC_TP
)
14144 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14146 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
14149 fputs ("\tpushl $", asm_out_file
);
14150 assemble_name (asm_out_file
, XSTR (symbol
, 0));
14151 fputc ('\n', asm_out_file
);
14157 static int current_machopic_label_num
;
14159 /* Given a symbol name and its associated stub, write out the
14160 definition of the stub. */
14163 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
14165 unsigned int length
;
14166 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
14167 int label
= ++current_machopic_label_num
;
14169 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14170 symb
= (*targetm
.strip_name_encoding
) (symb
);
14172 length
= strlen (stub
);
14173 binder_name
= alloca (length
+ 32);
14174 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
14176 length
= strlen (symb
);
14177 symbol_name
= alloca (length
+ 32);
14178 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
14180 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14183 machopic_picsymbol_stub_section ();
14185 machopic_symbol_stub_section ();
14187 fprintf (file
, "%s:\n", stub
);
14188 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14192 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14193 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14194 fprintf (file
, "\tjmp %%edx\n");
14197 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14199 fprintf (file
, "%s:\n", binder_name
);
14203 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14204 fprintf (file
, "\tpushl %%eax\n");
14207 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14209 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14211 machopic_lazy_symbol_ptr_section ();
14212 fprintf (file
, "%s:\n", lazy_ptr_name
);
14213 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14214 fprintf (file
, "\t.long %s\n", binder_name
);
14216 #endif /* TARGET_MACHO */
14218 /* Order the registers for register allocator. */
14221 x86_order_regs_for_local_alloc (void)
14226 /* First allocate the local general purpose registers. */
14227 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14228 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14229 reg_alloc_order
[pos
++] = i
;
14231 /* Global general purpose registers. */
14232 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14233 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14234 reg_alloc_order
[pos
++] = i
;
14236 /* x87 registers come first in case we are doing FP math
14238 if (!TARGET_SSE_MATH
)
14239 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14240 reg_alloc_order
[pos
++] = i
;
14242 /* SSE registers. */
14243 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14244 reg_alloc_order
[pos
++] = i
;
14245 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14246 reg_alloc_order
[pos
++] = i
;
14248 /* x87 registers. */
14249 if (TARGET_SSE_MATH
)
14250 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14251 reg_alloc_order
[pos
++] = i
;
14253 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14254 reg_alloc_order
[pos
++] = i
;
14256 /* Initialize the rest of array as we do not allocate some registers
14258 while (pos
< FIRST_PSEUDO_REGISTER
)
14259 reg_alloc_order
[pos
++] = 0;
14262 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14263 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14266 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14267 struct attribute_spec.handler. */
14269 ix86_handle_struct_attribute (tree
*node
, tree name
,
14270 tree args ATTRIBUTE_UNUSED
,
14271 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
14274 if (DECL_P (*node
))
14276 if (TREE_CODE (*node
) == TYPE_DECL
)
14277 type
= &TREE_TYPE (*node
);
14282 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
14283 || TREE_CODE (*type
) == UNION_TYPE
)))
14285 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
14286 *no_add_attrs
= true;
14289 else if ((is_attribute_p ("ms_struct", name
)
14290 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
14291 || ((is_attribute_p ("gcc_struct", name
)
14292 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
14294 warning ("`%s' incompatible attribute ignored",
14295 IDENTIFIER_POINTER (name
));
14296 *no_add_attrs
= true;
14303 ix86_ms_bitfield_layout_p (tree record_type
)
14305 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
14306 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
14307 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
14310 /* Returns an expression indicating where the this parameter is
14311 located on entry to the FUNCTION. */
14314 x86_this_parameter (tree function
)
14316 tree type
= TREE_TYPE (function
);
14320 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
14321 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14324 if (ix86_function_regparm (type
, function
) > 0)
14328 parm
= TYPE_ARG_TYPES (type
);
14329 /* Figure out whether or not the function has a variable number of
14331 for (; parm
; parm
= TREE_CHAIN (parm
))
14332 if (TREE_VALUE (parm
) == void_type_node
)
14334 /* If not, the this parameter is in the first argument. */
14338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
14340 return gen_rtx_REG (SImode
, regno
);
14344 if (aggregate_value_p (TREE_TYPE (type
), type
))
14345 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14347 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14350 /* Determine whether x86_output_mi_thunk can succeed. */
14353 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
14354 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
14355 HOST_WIDE_INT vcall_offset
, tree function
)
14357 /* 64-bit can handle anything. */
14361 /* For 32-bit, everything's fine if we have one free register. */
14362 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
14365 /* Need a free register for vcall_offset. */
14369 /* Need a free register for GOT references. */
14370 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14373 /* Otherwise ok. */
14377 /* Output the assembler code for a thunk function. THUNK_DECL is the
14378 declaration for the thunk function itself, FUNCTION is the decl for
14379 the target function. DELTA is an immediate constant offset to be
14380 added to THIS. If VCALL_OFFSET is nonzero, the word at
14381 *(*this + vcall_offset) should be added to THIS. */
14384 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
14385 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
14386 HOST_WIDE_INT vcall_offset
, tree function
)
14389 rtx
this = x86_this_parameter (function
);
14392 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14393 pull it in now and let DELTA benefit. */
14396 else if (vcall_offset
)
14398 /* Put the this parameter into %eax. */
14400 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14401 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14404 this_reg
= NULL_RTX
;
14406 /* Adjust the this parameter by a fixed constant. */
14409 xops
[0] = GEN_INT (delta
);
14410 xops
[1] = this_reg
? this_reg
: this;
14413 if (!x86_64_general_operand (xops
[0], DImode
))
14415 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14417 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14421 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14424 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14427 /* Adjust the this parameter by a value stored in the vtable. */
14431 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14434 int tmp_regno
= 2 /* ECX */;
14435 if (lookup_attribute ("fastcall",
14436 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
14437 tmp_regno
= 0 /* EAX */;
14438 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
14441 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14444 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14446 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14448 /* Adjust the this parameter. */
14449 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14450 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14452 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14453 xops
[0] = GEN_INT (vcall_offset
);
14455 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14456 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14458 xops
[1] = this_reg
;
14460 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14462 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14465 /* If necessary, drop THIS back to its stack slot. */
14466 if (this_reg
&& this_reg
!= this)
14468 xops
[0] = this_reg
;
14470 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14473 xops
[0] = XEXP (DECL_RTL (function
), 0);
14476 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14477 output_asm_insn ("jmp\t%P0", xops
);
14480 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
14481 tmp
= gen_rtx_CONST (Pmode
, tmp
);
14482 tmp
= gen_rtx_MEM (QImode
, tmp
);
14484 output_asm_insn ("jmp\t%A0", xops
);
14489 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14490 output_asm_insn ("jmp\t%P0", xops
);
14495 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
14496 tmp
= (gen_rtx_SYMBOL_REF
14498 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
14499 tmp
= gen_rtx_MEM (QImode
, tmp
);
14501 output_asm_insn ("jmp\t%0", xops
);
14504 #endif /* TARGET_MACHO */
14506 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14507 output_set_got (tmp
);
14510 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
14511 output_asm_insn ("jmp\t{*}%1", xops
);
14517 x86_file_start (void)
14519 default_file_start ();
14520 if (X86_FILE_START_VERSION_DIRECTIVE
)
14521 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
14522 if (X86_FILE_START_FLTUSED
)
14523 fputs ("\t.global\t__fltused\n", asm_out_file
);
14524 if (ix86_asm_dialect
== ASM_INTEL
)
14525 fputs ("\t.intel_syntax\n", asm_out_file
);
14529 x86_field_alignment (tree field
, int computed
)
14531 enum machine_mode mode
;
14532 tree type
= TREE_TYPE (field
);
14534 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
14536 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
14537 ? get_inner_array_type (type
) : type
);
14538 if (mode
== DFmode
|| mode
== DCmode
14539 || GET_MODE_CLASS (mode
) == MODE_INT
14540 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
14541 return MIN (32, computed
);
14545 /* Output assembler code to FILE to increment profiler label # LABELNO
14546 for profiling a function entry. */
14548 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
14553 #ifndef NO_PROFILE_COUNTERS
14554 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
14556 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
14560 #ifndef NO_PROFILE_COUNTERS
14561 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
14563 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14567 #ifndef NO_PROFILE_COUNTERS
14568 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14569 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
14571 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
14575 #ifndef NO_PROFILE_COUNTERS
14576 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
14577 PROFILE_COUNT_REGISTER
);
14579 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14583 /* We don't have exact information about the insn sizes, but we may assume
14584 quite safely that we are informed about all 1 byte insns and memory
14585 address sizes. This is enough to eliminate unnecessary padding in
14589 min_insn_size (rtx insn
)
14593 if (!INSN_P (insn
) || !active_insn_p (insn
))
14596 /* Discard alignments we've emit and jump instructions. */
14597 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
14598 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
14600 if (GET_CODE (insn
) == JUMP_INSN
14601 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
14602 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
14605 /* Important case - calls are always 5 bytes.
14606 It is common to have many calls in the row. */
14607 if (GET_CODE (insn
) == CALL_INSN
14608 && symbolic_reference_mentioned_p (PATTERN (insn
))
14609 && !SIBLING_CALL_P (insn
))
14611 if (get_attr_length (insn
) <= 1)
14614 /* For normal instructions we may rely on the sizes of addresses
14615 and the presence of symbol to require 4 bytes of encoding.
14616 This is not the case for jumps where references are PC relative. */
14617 if (GET_CODE (insn
) != JUMP_INSN
)
14619 l
= get_attr_length_address (insn
);
14620 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
14629 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14633 ix86_avoid_jump_misspredicts (void)
14635 rtx insn
, start
= get_insns ();
14636 int nbytes
= 0, njumps
= 0;
14639 /* Look for all minimal intervals of instructions containing 4 jumps.
14640 The intervals are bounded by START and INSN. NBYTES is the total
14641 size of instructions in the interval including INSN and not including
14642 START. When the NBYTES is smaller than 16 bytes, it is possible
14643 that the end of START and INSN ends up in the same 16byte page.
14645 The smallest offset in the page INSN can start is the case where START
14646 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14647 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14649 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14652 nbytes
+= min_insn_size (insn
);
14654 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
14655 INSN_UID (insn
), min_insn_size (insn
));
14656 if ((GET_CODE (insn
) == JUMP_INSN
14657 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
14658 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
14659 || GET_CODE (insn
) == CALL_INSN
)
14666 start
= NEXT_INSN (start
);
14667 if ((GET_CODE (start
) == JUMP_INSN
14668 && GET_CODE (PATTERN (start
)) != ADDR_VEC
14669 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
14670 || GET_CODE (start
) == CALL_INSN
)
14671 njumps
--, isjump
= 1;
14674 nbytes
-= min_insn_size (start
);
14679 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
14680 INSN_UID (start
), INSN_UID (insn
), nbytes
);
14682 if (njumps
== 3 && isjump
&& nbytes
< 16)
14684 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
14687 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
14688 INSN_UID (insn
), padsize
);
14689 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
14694 /* AMD Athlon works faster
14695 when RET is not destination of conditional jump or directly preceded
14696 by other jump instruction. We avoid the penalty by inserting NOP just
14697 before the RET instructions in such cases. */
14699 ix86_pad_returns (void)
14703 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
14705 basic_block bb
= e
->src
;
14706 rtx ret
= BB_END (bb
);
14708 bool replace
= false;
14710 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
14711 || !maybe_hot_bb_p (bb
))
14713 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
14714 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
14716 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
14719 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
14720 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
14721 && !(e
->flags
& EDGE_FALLTHRU
))
14726 prev
= prev_active_insn (ret
);
14728 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
14729 || GET_CODE (prev
) == CALL_INSN
))
14731 /* Empty functions get branch mispredict even when the jump destination
14732 is not visible to us. */
14733 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
14738 emit_insn_before (gen_return_internal_long (), ret
);
14744 /* Implement machine specific optimizations. We implement padding of returns
14745 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14749 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
14750 ix86_pad_returns ();
14751 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
14752 ix86_avoid_jump_misspredicts ();
14755 /* Return nonzero when QImode register that must be represented via REX prefix
14758 x86_extended_QIreg_mentioned_p (rtx insn
)
14761 extract_insn_cached (insn
);
14762 for (i
= 0; i
< recog_data
.n_operands
; i
++)
14763 if (REG_P (recog_data
.operand
[i
])
14764 && REGNO (recog_data
.operand
[i
]) >= 4)
14769 /* Return nonzero when P points to register encoded via REX prefix.
14770 Called via for_each_rtx. */
14772 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
14774 unsigned int regno
;
14777 regno
= REGNO (*p
);
14778 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
14781 /* Return true when INSN mentions register that must be encoded using REX
14784 x86_extended_reg_mentioned_p (rtx insn
)
14786 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
14789 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
14790 optabs would emit if we didn't have TFmode patterns. */
14793 x86_emit_floatuns (rtx operands
[2])
14795 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
14796 enum machine_mode mode
, inmode
;
14798 inmode
= GET_MODE (operands
[1]);
14799 if (inmode
!= SImode
14800 && inmode
!= DImode
)
14804 in
= force_reg (inmode
, operands
[1]);
14805 mode
= GET_MODE (out
);
14806 neglab
= gen_label_rtx ();
14807 donelab
= gen_label_rtx ();
14808 i1
= gen_reg_rtx (Pmode
);
14809 f0
= gen_reg_rtx (mode
);
14811 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
14813 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
14814 emit_jump_insn (gen_jump (donelab
));
14817 emit_label (neglab
);
14819 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
14820 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
14821 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
14822 expand_float (f0
, i0
, 0);
14823 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
14825 emit_label (donelab
);
14828 /* Initialize vector TARGET via VALS. */
14830 ix86_expand_vector_init (rtx target
, rtx vals
)
14832 enum machine_mode mode
= GET_MODE (target
);
14833 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
14834 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
14837 for (i
= n_elts
- 1; i
>= 0; i
--)
14838 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
14839 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
14842 /* Few special cases first...
14843 ... constants are best loaded from constant pool. */
14846 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
14850 /* ... values where only first field is non-constant are best loaded
14851 from the pool and overwritten via move later. */
14854 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
14855 GET_MODE_INNER (mode
), 0);
14857 op
= force_reg (mode
, op
);
14858 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
14859 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
14860 switch (GET_MODE (target
))
14863 emit_insn (gen_sse2_movsd (target
, target
, op
));
14866 emit_insn (gen_sse_movss (target
, target
, op
));
14874 /* And the busy sequence doing rotations. */
14875 switch (GET_MODE (target
))
14880 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
14882 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
14884 vecop0
= force_reg (V2DFmode
, vecop0
);
14885 vecop1
= force_reg (V2DFmode
, vecop1
);
14886 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
14892 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
14894 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
14896 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
14898 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
14899 rtx tmp1
= gen_reg_rtx (V4SFmode
);
14900 rtx tmp2
= gen_reg_rtx (V4SFmode
);
14902 vecop0
= force_reg (V4SFmode
, vecop0
);
14903 vecop1
= force_reg (V4SFmode
, vecop1
);
14904 vecop2
= force_reg (V4SFmode
, vecop2
);
14905 vecop3
= force_reg (V4SFmode
, vecop3
);
14906 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
14907 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
14908 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
14916 /* Worker function for TARGET_MD_ASM_CLOBBERS.
14918 We do this in the new i386 backend to maintain source compatibility
14919 with the old cc0-based compiler. */
14922 ix86_md_asm_clobbers (tree clobbers
)
14924 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
14926 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
14928 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
14933 /* Worker function for REVERSE_CONDITION. */
14936 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
14938 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
14939 ? reverse_condition (code
)
14940 : reverse_condition_maybe_unordered (code
));
14943 /* Output code to perform an x87 FP register move, from OPERANDS[1]
14947 output_387_reg_move (rtx insn
, rtx
*operands
)
14949 if (REG_P (operands
[1])
14950 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14952 if (REGNO (operands
[0]) == FIRST_STACK_REG
14953 && TARGET_USE_FFREEP
)
14954 return "ffreep\t%y0";
14955 return "fstp\t%y0";
14957 if (STACK_TOP_P (operands
[0]))
14958 return "fld%z1\t%y1";
14962 /* Output code to perform a conditional jump to LABEL, if C2 flag in
14963 FP status register is set. */
14966 ix86_emit_fp_unordered_jump (rtx label
)
14968 rtx reg
= gen_reg_rtx (HImode
);
14971 emit_insn (gen_x86_fnstsw_1 (reg
));
14973 if (TARGET_USE_SAHF
)
14975 emit_insn (gen_x86_sahf_1 (reg
));
14977 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
14978 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
14982 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
14984 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14985 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
14988 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
14989 gen_rtx_LABEL_REF (VOIDmode
, label
),
14991 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
14992 emit_jump_insn (temp
);
14995 /* Output code to perform a log1p XFmode calculation. */
14997 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
14999 rtx label1
= gen_label_rtx ();
15000 rtx label2
= gen_label_rtx ();
15002 rtx tmp
= gen_reg_rtx (XFmode
);
15003 rtx tmp2
= gen_reg_rtx (XFmode
);
15005 emit_insn (gen_absxf2 (tmp
, op1
));
15006 emit_insn (gen_cmpxf (tmp
,
15007 CONST_DOUBLE_FROM_REAL_VALUE (
15008 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
15010 emit_jump_insn (gen_bge (label1
));
15012 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15013 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
15014 emit_jump (label2
);
15016 emit_label (label1
);
15017 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
15018 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
15019 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15020 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
15022 emit_label (label2
);
15025 #include "gt-i386.h"