1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_3dnow_a
= m_ATHLON_K8
;
529 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
530 const int x86_branch_hints
= m_PENT4
| m_NOCONA
;
531 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
532 const int x86_partial_reg_stall
= m_PPRO
;
533 const int x86_use_loop
= m_K6
;
534 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
535 const int x86_use_mov0
= m_K6
;
536 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
537 const int x86_read_modify_write
= ~m_PENT
;
538 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
539 const int x86_split_long_moves
= m_PPRO
;
540 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
541 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
542 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
543 const int x86_qimode_math
= ~(0);
544 const int x86_promote_qi_regs
= 0;
545 const int x86_himode_math
= ~(m_PPRO
);
546 const int x86_promote_hi_regs
= m_PPRO
;
547 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
548 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
549 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
550 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
551 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
552 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
553 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
554 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
555 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
556 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
557 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
558 const int x86_shift1
= ~m_486
;
559 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
560 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs
= m_ATHLON_K8
;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
568 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
569 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
570 const int x86_use_ffreep
= m_ATHLON_K8
;
571 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
572 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
573 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
585 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
586 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
594 AREG
, DREG
, CREG
, BREG
,
596 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
598 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
599 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
604 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
606 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
608 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
609 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
610 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers
[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
633 static int const x86_64_int_return_registers
[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0
= NULL_RTX
;
719 rtx ix86_compare_op1
= NULL_RTX
;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry
GTY(())
732 struct stack_local_entry
*next
;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size
;
764 HOST_WIDE_INT to_allocate
;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset
;
767 HOST_WIDE_INT hard_frame_pointer_offset
;
768 HOST_WIDE_INT stack_pointer_offset
;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov
;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string
;
780 enum cmodel ix86_cmodel
;
782 const char *ix86_asm_string
;
783 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
785 const char *ix86_tls_dialect_string
;
786 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath
;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune
;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch
;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string
; /* for -mtune=<xxx> */
798 const char *ix86_arch_string
; /* for -march=<xxx> */
799 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string
;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse
;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string
;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string
;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string
;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary
;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost
;
826 const char *ix86_branch_cost_string
;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string
;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix
[16];
833 static int internal_label_prefix_len
;
835 static int local_symbolic_operand (rtx
, enum machine_mode
);
836 static int tls_symbolic_operand_1 (rtx
, enum tls_model
);
837 static void output_pic_addr_const (FILE *, rtx
, int);
838 static void put_condition_code (enum rtx_code
, enum machine_mode
,
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx
*, void *);
842 static rtx
maybe_get_pool_constant (rtx
);
843 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
844 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
849 static rtx
get_thread_pointer (int);
850 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx
gen_push (rtx
);
853 static int memory_address_length (rtx addr
);
854 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
855 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
856 static struct machine_function
* ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
861 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
863 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
865 static rtx
ix86_expand_aligntest (rtx
, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx
x86_this_parameter (tree
);
872 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
873 HOST_WIDE_INT
, tree
);
874 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
878 static tree
ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
881 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
885 rtx base
, index
, disp
;
887 enum ix86_address_seg
{ SEG_DEFAULT
, SEG_FS
, SEG_GS
} seg
;
890 static int ix86_decompose_address (rtx
, struct ix86_address
*);
891 static int ix86_address_cost (rtx
);
892 static bool ix86_cannot_force_const_mem (rtx
);
893 static rtx
ix86_delegitimize_address (rtx
);
895 struct builtin_description
;
896 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
898 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
900 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
901 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
902 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
903 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
904 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
905 static enum rtx_code
ix86_fp_compare_code_to_integer (enum rtx_code
);
906 static void ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*,
907 enum rtx_code
*, enum rtx_code
*);
908 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
912 static int ix86_fp_comparison_cost (enum rtx_code code
);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame
*);
916 static int ix86_comp_type_attributes (tree
, tree
);
917 static int ix86_function_regparm (tree
, tree
);
918 const struct attribute_spec ix86_attribute_table
[];
919 static bool ix86_function_ok_for_sibcall (tree
, tree
);
920 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
921 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
922 static int ix86_value_regno (enum machine_mode
);
923 static bool contains_128bit_aligned_vector_p (tree
);
924 static rtx
ix86_struct_value_rtx (tree
, int);
925 static bool ix86_ms_bitfield_layout_p (tree
);
926 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
927 static int extended_reg_mentioned_1 (rtx
*, void *);
928 static bool ix86_rtx_costs (rtx
, int, int, int *);
929 static int min_insn_size (rtx
);
930 static tree
ix86_md_asm_clobbers (tree clobbers
);
931 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
932 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
935 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
936 static void ix86_svr3_asm_out_constructor (rtx
, int);
939 /* Register class used for passing given 64bit part of the argument.
940 These represent classes as documented by the PS ABI, with the exception
941 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
942 use SF or DFmode move instead of DImode to avoid reformatting penalties.
944 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
945 whenever possible (upper half does contain padding).
947 enum x86_64_reg_class
950 X86_64_INTEGER_CLASS
,
951 X86_64_INTEGERSI_CLASS
,
960 static const char * const x86_64_reg_class_name
[] =
961 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
963 #define MAX_CLASSES 4
964 static int classify_argument (enum machine_mode
, tree
,
965 enum x86_64_reg_class
[MAX_CLASSES
], int);
966 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
967 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
969 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
970 enum x86_64_reg_class
);
972 /* Table of constants used by fldpi, fldln2, etc.... */
973 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
974 static bool ext_80387_constants_init
= 0;
975 static void init_ext_80387_constants (void);
977 /* Initialize the GCC target structure. */
978 #undef TARGET_ATTRIBUTE_TABLE
979 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
980 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
981 # undef TARGET_MERGE_DECL_ATTRIBUTES
982 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
985 #undef TARGET_COMP_TYPE_ATTRIBUTES
986 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
988 #undef TARGET_INIT_BUILTINS
989 #define TARGET_INIT_BUILTINS ix86_init_builtins
991 #undef TARGET_EXPAND_BUILTIN
992 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
994 #undef TARGET_ASM_FUNCTION_EPILOGUE
995 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
997 #undef TARGET_ASM_OPEN_PAREN
998 #define TARGET_ASM_OPEN_PAREN ""
999 #undef TARGET_ASM_CLOSE_PAREN
1000 #define TARGET_ASM_CLOSE_PAREN ""
1002 #undef TARGET_ASM_ALIGNED_HI_OP
1003 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1004 #undef TARGET_ASM_ALIGNED_SI_OP
1005 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1007 #undef TARGET_ASM_ALIGNED_DI_OP
1008 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1011 #undef TARGET_ASM_UNALIGNED_HI_OP
1012 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1013 #undef TARGET_ASM_UNALIGNED_SI_OP
1014 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1015 #undef TARGET_ASM_UNALIGNED_DI_OP
1016 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1018 #undef TARGET_SCHED_ADJUST_COST
1019 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1020 #undef TARGET_SCHED_ISSUE_RATE
1021 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1022 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1023 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1024 ia32_multipass_dfa_lookahead
1026 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1027 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1030 #undef TARGET_HAVE_TLS
1031 #define TARGET_HAVE_TLS true
1033 #undef TARGET_CANNOT_FORCE_CONST_MEM
1034 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1036 #undef TARGET_DELEGITIMIZE_ADDRESS
1037 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1039 #undef TARGET_MS_BITFIELD_LAYOUT_P
1040 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1042 #undef TARGET_ASM_OUTPUT_MI_THUNK
1043 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1044 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1045 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1047 #undef TARGET_ASM_FILE_START
1048 #define TARGET_ASM_FILE_START x86_file_start
1050 #undef TARGET_RTX_COSTS
1051 #define TARGET_RTX_COSTS ix86_rtx_costs
1052 #undef TARGET_ADDRESS_COST
1053 #define TARGET_ADDRESS_COST ix86_address_cost
1055 #undef TARGET_FIXED_CONDITION_CODE_REGS
1056 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1057 #undef TARGET_CC_MODES_COMPATIBLE
1058 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1060 #undef TARGET_MACHINE_DEPENDENT_REORG
1061 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1063 #undef TARGET_BUILD_BUILTIN_VA_LIST
1064 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1066 #undef TARGET_MD_ASM_CLOBBERS
1067 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1069 #undef TARGET_PROMOTE_PROTOTYPES
1070 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1071 #undef TARGET_STRUCT_VALUE_RTX
1072 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1073 #undef TARGET_SETUP_INCOMING_VARARGS
1074 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_MUST_PASS_IN_STACK
1076 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1077 #undef TARGET_PASS_BY_REFERENCE
1078 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1080 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1081 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1083 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1084 #undef TARGET_INSERT_ATTRIBUTES
1085 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1088 struct gcc_target targetm
= TARGET_INITIALIZER
;
1091 /* The svr4 ABI for the i386 says that records and unions are returned
1093 #ifndef DEFAULT_PCC_STRUCT_RETURN
1094 #define DEFAULT_PCC_STRUCT_RETURN 1
1097 /* Sometimes certain combinations of command options do not make
1098 sense on a particular target machine. You can define a macro
1099 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1100 defined, is executed once just after all the command options have
1103 Don't use this macro to turn on various extra optimizations for
1104 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1107 override_options (void)
1110 int ix86_tune_defaulted
= 0;
1112 /* Comes from final.c -- no real reason to change it. */
1113 #define MAX_CODE_ALIGN 16
1117 const struct processor_costs
*cost
; /* Processor costs */
1118 const int target_enable
; /* Target flags to enable. */
1119 const int target_disable
; /* Target flags to disable. */
1120 const int align_loop
; /* Default alignments. */
1121 const int align_loop_max_skip
;
1122 const int align_jump
;
1123 const int align_jump_max_skip
;
1124 const int align_func
;
1126 const processor_target_table
[PROCESSOR_max
] =
1128 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1129 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1130 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1131 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1132 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1133 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1134 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1135 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1136 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1139 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1142 const char *const name
; /* processor name or nickname. */
1143 const enum processor_type processor
;
1144 const enum pta_flags
1150 PTA_PREFETCH_SSE
= 16,
1156 const processor_alias_table
[] =
1158 {"i386", PROCESSOR_I386
, 0},
1159 {"i486", PROCESSOR_I486
, 0},
1160 {"i586", PROCESSOR_PENTIUM
, 0},
1161 {"pentium", PROCESSOR_PENTIUM
, 0},
1162 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1163 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1164 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1165 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1166 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1167 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1168 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1169 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1170 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1171 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1172 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1173 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1174 | PTA_MMX
| PTA_PREFETCH_SSE
},
1175 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1176 | PTA_MMX
| PTA_PREFETCH_SSE
},
1177 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1178 | PTA_MMX
| PTA_PREFETCH_SSE
},
1179 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1180 | PTA_MMX
| PTA_PREFETCH_SSE
},
1181 {"k6", PROCESSOR_K6
, PTA_MMX
},
1182 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1183 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1184 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1186 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1187 | PTA_3DNOW
| PTA_3DNOW_A
},
1188 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1189 | PTA_3DNOW_A
| PTA_SSE
},
1190 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1191 | PTA_3DNOW_A
| PTA_SSE
},
1192 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1193 | PTA_3DNOW_A
| PTA_SSE
},
1194 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1195 | PTA_SSE
| PTA_SSE2
},
1196 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1197 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1198 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1199 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1200 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1201 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1202 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1203 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1206 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1212 if (flag_omit_frame_pointer
== 2)
1213 flag_omit_frame_pointer
= 1;
1214 if (flag_asynchronous_unwind_tables
== 2)
1215 flag_asynchronous_unwind_tables
= 1;
1216 if (flag_pcc_struct_return
== 2)
1217 flag_pcc_struct_return
= 0;
1221 if (flag_omit_frame_pointer
== 2)
1222 flag_omit_frame_pointer
= 0;
1223 if (flag_asynchronous_unwind_tables
== 2)
1224 flag_asynchronous_unwind_tables
= 0;
1225 if (flag_pcc_struct_return
== 2)
1226 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1229 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1230 SUBTARGET_OVERRIDE_OPTIONS
;
1233 if (!ix86_tune_string
&& ix86_arch_string
)
1234 ix86_tune_string
= ix86_arch_string
;
1235 if (!ix86_tune_string
)
1237 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1238 ix86_tune_defaulted
= 1;
1240 if (!ix86_arch_string
)
1241 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1243 if (ix86_cmodel_string
!= 0)
1245 if (!strcmp (ix86_cmodel_string
, "small"))
1246 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1248 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1249 else if (!strcmp (ix86_cmodel_string
, "32"))
1250 ix86_cmodel
= CM_32
;
1251 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1252 ix86_cmodel
= CM_KERNEL
;
1253 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1254 ix86_cmodel
= CM_MEDIUM
;
1255 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1256 ix86_cmodel
= CM_LARGE
;
1258 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1262 ix86_cmodel
= CM_32
;
1264 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1266 if (ix86_asm_string
!= 0)
1268 if (!strcmp (ix86_asm_string
, "intel"))
1269 ix86_asm_dialect
= ASM_INTEL
;
1270 else if (!strcmp (ix86_asm_string
, "att"))
1271 ix86_asm_dialect
= ASM_ATT
;
1273 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1275 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1276 error ("code model `%s' not supported in the %s bit mode",
1277 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1278 if (ix86_cmodel
== CM_LARGE
)
1279 sorry ("code model `large' not supported yet");
1280 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1281 sorry ("%i-bit mode not compiled in",
1282 (target_flags
& MASK_64BIT
) ? 64 : 32);
1284 for (i
= 0; i
< pta_size
; i
++)
1285 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1287 ix86_arch
= processor_alias_table
[i
].processor
;
1288 /* Default cpu tuning to the architecture. */
1289 ix86_tune
= ix86_arch
;
1290 if (processor_alias_table
[i
].flags
& PTA_MMX
1291 && !(target_flags_explicit
& MASK_MMX
))
1292 target_flags
|= MASK_MMX
;
1293 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1294 && !(target_flags_explicit
& MASK_3DNOW
))
1295 target_flags
|= MASK_3DNOW
;
1296 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1297 && !(target_flags_explicit
& MASK_3DNOW_A
))
1298 target_flags
|= MASK_3DNOW_A
;
1299 if (processor_alias_table
[i
].flags
& PTA_SSE
1300 && !(target_flags_explicit
& MASK_SSE
))
1301 target_flags
|= MASK_SSE
;
1302 if (processor_alias_table
[i
].flags
& PTA_SSE2
1303 && !(target_flags_explicit
& MASK_SSE2
))
1304 target_flags
|= MASK_SSE2
;
1305 if (processor_alias_table
[i
].flags
& PTA_SSE3
1306 && !(target_flags_explicit
& MASK_SSE3
))
1307 target_flags
|= MASK_SSE3
;
1308 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1309 x86_prefetch_sse
= true;
1310 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1312 if (ix86_tune_defaulted
)
1314 ix86_tune_string
= "x86-64";
1315 for (i
= 0; i
< pta_size
; i
++)
1316 if (! strcmp (ix86_tune_string
,
1317 processor_alias_table
[i
].name
))
1319 ix86_tune
= processor_alias_table
[i
].processor
;
1322 error ("CPU you selected does not support x86-64 "
1329 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1331 for (i
= 0; i
< pta_size
; i
++)
1332 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1334 ix86_tune
= processor_alias_table
[i
].processor
;
1335 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1336 error ("CPU you selected does not support x86-64 instruction set");
1338 /* Intel CPUs have always interpreted SSE prefetch instructions as
1339 NOPs; so, we can enable SSE prefetch instructions even when
1340 -mtune (rather than -march) points us to a processor that has them.
1341 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1342 higher processors. */
1343 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1344 x86_prefetch_sse
= true;
1348 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1351 ix86_cost
= &size_cost
;
1353 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1354 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1355 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1357 /* Arrange to set up i386_stack_locals for all functions. */
1358 init_machine_status
= ix86_init_machine_status
;
1360 /* Validate -mregparm= value. */
1361 if (ix86_regparm_string
)
1363 i
= atoi (ix86_regparm_string
);
1364 if (i
< 0 || i
> REGPARM_MAX
)
1365 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1371 ix86_regparm
= REGPARM_MAX
;
1373 /* If the user has provided any of the -malign-* options,
1374 warn and use that value only if -falign-* is not set.
1375 Remove this code in GCC 3.2 or later. */
1376 if (ix86_align_loops_string
)
1378 warning ("-malign-loops is obsolete, use -falign-loops");
1379 if (align_loops
== 0)
1381 i
= atoi (ix86_align_loops_string
);
1382 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1383 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1385 align_loops
= 1 << i
;
1389 if (ix86_align_jumps_string
)
1391 warning ("-malign-jumps is obsolete, use -falign-jumps");
1392 if (align_jumps
== 0)
1394 i
= atoi (ix86_align_jumps_string
);
1395 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1396 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1398 align_jumps
= 1 << i
;
1402 if (ix86_align_funcs_string
)
1404 warning ("-malign-functions is obsolete, use -falign-functions");
1405 if (align_functions
== 0)
1407 i
= atoi (ix86_align_funcs_string
);
1408 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1409 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1411 align_functions
= 1 << i
;
1415 /* Default align_* from the processor table. */
1416 if (align_loops
== 0)
1418 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1419 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1421 if (align_jumps
== 0)
1423 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1424 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1426 if (align_functions
== 0)
1428 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1431 /* Validate -mpreferred-stack-boundary= value, or provide default.
1432 The default of 128 bits is for Pentium III's SSE __m128, but we
1433 don't want additional code to keep the stack aligned when
1434 optimizing for code size. */
1435 ix86_preferred_stack_boundary
= (optimize_size
1436 ? TARGET_64BIT
? 128 : 32
1438 if (ix86_preferred_stack_boundary_string
)
1440 i
= atoi (ix86_preferred_stack_boundary_string
);
1441 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1442 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1443 TARGET_64BIT
? 4 : 2);
1445 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1448 /* Validate -mbranch-cost= value, or provide default. */
1449 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1450 if (ix86_branch_cost_string
)
1452 i
= atoi (ix86_branch_cost_string
);
1454 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1456 ix86_branch_cost
= i
;
1459 if (ix86_tls_dialect_string
)
1461 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1462 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1463 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1464 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1466 error ("bad value (%s) for -mtls-dialect= switch",
1467 ix86_tls_dialect_string
);
1470 /* Keep nonleaf frame pointers. */
1471 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1472 flag_omit_frame_pointer
= 1;
1474 /* If we're doing fast math, we don't care about comparison order
1475 wrt NaNs. This lets us use a shorter comparison sequence. */
1476 if (flag_unsafe_math_optimizations
)
1477 target_flags
&= ~MASK_IEEE_FP
;
1479 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1480 since the insns won't need emulation. */
1481 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1482 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1484 /* Turn on SSE2 builtins for -msse3. */
1486 target_flags
|= MASK_SSE2
;
1488 /* Turn on SSE builtins for -msse2. */
1490 target_flags
|= MASK_SSE
;
1494 if (TARGET_ALIGN_DOUBLE
)
1495 error ("-malign-double makes no sense in the 64bit mode");
1497 error ("-mrtd calling convention not supported in the 64bit mode");
1498 /* Enable by default the SSE and MMX builtins. */
1499 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1500 ix86_fpmath
= FPMATH_SSE
;
1504 ix86_fpmath
= FPMATH_387
;
1505 /* i386 ABI does not specify red zone. It still makes sense to use it
1506 when programmer takes care to stack from being destroyed. */
1507 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1508 target_flags
|= MASK_NO_RED_ZONE
;
1511 if (ix86_fpmath_string
!= 0)
1513 if (! strcmp (ix86_fpmath_string
, "387"))
1514 ix86_fpmath
= FPMATH_387
;
1515 else if (! strcmp (ix86_fpmath_string
, "sse"))
1519 warning ("SSE instruction set disabled, using 387 arithmetics");
1520 ix86_fpmath
= FPMATH_387
;
1523 ix86_fpmath
= FPMATH_SSE
;
1525 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1526 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1530 warning ("SSE instruction set disabled, using 387 arithmetics");
1531 ix86_fpmath
= FPMATH_387
;
1533 else if (!TARGET_80387
)
1535 warning ("387 instruction set disabled, using SSE arithmetics");
1536 ix86_fpmath
= FPMATH_SSE
;
1539 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1542 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1545 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1549 target_flags
|= MASK_MMX
;
1550 x86_prefetch_sse
= true;
1553 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1556 target_flags
|= MASK_MMX
;
1557 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1558 extensions it adds. */
1559 if (x86_3dnow_a
& (1 << ix86_arch
))
1560 target_flags
|= MASK_3DNOW_A
;
1562 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1563 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1565 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1567 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1570 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1571 p
= strchr (internal_label_prefix
, 'X');
1572 internal_label_prefix_len
= p
- internal_label_prefix
;
1578 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1580 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1581 make the problem with not enough registers even worse. */
1582 #ifdef INSN_SCHEDULING
1584 flag_schedule_insns
= 0;
1587 /* The default values of these switches depend on the TARGET_64BIT
1588 that is not known at this moment. Mark these values with 2 and
1589 let user the to override these. In case there is no command line option
1590 specifying them, we will set the defaults in override_options. */
1592 flag_omit_frame_pointer
= 2;
1593 flag_pcc_struct_return
= 2;
1594 flag_asynchronous_unwind_tables
= 2;
1597 /* Table of valid machine attributes. */
1598 const struct attribute_spec ix86_attribute_table
[] =
1600 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1601 /* Stdcall attribute says callee is responsible for popping arguments
1602 if they are not variable. */
1603 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1604 /* Fastcall attribute says callee is responsible for popping arguments
1605 if they are not variable. */
1606 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1607 /* Cdecl attribute says the callee is a normal C declaration */
1608 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1609 /* Regparm attribute specifies how many integer arguments are to be
1610 passed in registers. */
1611 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1612 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1613 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
1614 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
1615 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1617 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1618 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1619 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1620 SUBTARGET_ATTRIBUTE_TABLE
,
1622 { NULL
, 0, 0, false, false, false, NULL
}
1625 /* Decide whether we can make a sibling call to a function. DECL is the
1626 declaration of the function being targeted by the call and EXP is the
1627 CALL_EXPR representing the call. */
1630 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1632 /* If we are generating position-independent code, we cannot sibcall
1633 optimize any indirect call, or a direct call to a global function,
1634 as the PLT requires %ebx be live. */
1635 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1638 /* If we are returning floats on the 80387 register stack, we cannot
1639 make a sibcall from a function that doesn't return a float to a
1640 function that does or, conversely, from a function that does return
1641 a float to a function that doesn't; the necessary stack adjustment
1642 would not be executed. */
1643 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1644 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1647 /* If this call is indirect, we'll need to be able to use a call-clobbered
1648 register for the address of the target function. Make sure that all
1649 such registers are not used for passing parameters. */
1650 if (!decl
&& !TARGET_64BIT
)
1654 /* We're looking at the CALL_EXPR, we need the type of the function. */
1655 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1656 type
= TREE_TYPE (type
); /* pointer type */
1657 type
= TREE_TYPE (type
); /* function type */
1659 if (ix86_function_regparm (type
, NULL
) >= 3)
1661 /* ??? Need to count the actual number of registers to be used,
1662 not the possible number of registers. Fix later. */
1667 /* Otherwise okay. That also includes certain types of indirect calls. */
1671 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1672 arguments as in struct attribute_spec.handler. */
1674 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1675 tree args ATTRIBUTE_UNUSED
,
1676 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1678 if (TREE_CODE (*node
) != FUNCTION_TYPE
1679 && TREE_CODE (*node
) != METHOD_TYPE
1680 && TREE_CODE (*node
) != FIELD_DECL
1681 && TREE_CODE (*node
) != TYPE_DECL
)
1683 warning ("`%s' attribute only applies to functions",
1684 IDENTIFIER_POINTER (name
));
1685 *no_add_attrs
= true;
1689 if (is_attribute_p ("fastcall", name
))
1691 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1693 error ("fastcall and stdcall attributes are not compatible");
1695 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1697 error ("fastcall and regparm attributes are not compatible");
1700 else if (is_attribute_p ("stdcall", name
))
1702 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1704 error ("fastcall and stdcall attributes are not compatible");
1711 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1712 *no_add_attrs
= true;
1718 /* Handle a "regparm" attribute;
1719 arguments as in struct attribute_spec.handler. */
1721 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1722 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1724 if (TREE_CODE (*node
) != FUNCTION_TYPE
1725 && TREE_CODE (*node
) != METHOD_TYPE
1726 && TREE_CODE (*node
) != FIELD_DECL
1727 && TREE_CODE (*node
) != TYPE_DECL
)
1729 warning ("`%s' attribute only applies to functions",
1730 IDENTIFIER_POINTER (name
));
1731 *no_add_attrs
= true;
1737 cst
= TREE_VALUE (args
);
1738 if (TREE_CODE (cst
) != INTEGER_CST
)
1740 warning ("`%s' attribute requires an integer constant argument",
1741 IDENTIFIER_POINTER (name
));
1742 *no_add_attrs
= true;
1744 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1746 warning ("argument to `%s' attribute larger than %d",
1747 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1748 *no_add_attrs
= true;
1751 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1753 error ("fastcall and regparm attributes are not compatible");
1760 /* Return 0 if the attributes for two types are incompatible, 1 if they
1761 are compatible, and 2 if they are nearly compatible (which causes a
1762 warning to be generated). */
1765 ix86_comp_type_attributes (tree type1
, tree type2
)
1767 /* Check for mismatch of non-default calling convention. */
1768 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1770 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1773 /* Check for mismatched fastcall types */
1774 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1775 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1778 /* Check for mismatched return types (cdecl vs stdcall). */
1779 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1780 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1782 if (ix86_function_regparm (type1
, NULL
)
1783 != ix86_function_regparm (type2
, NULL
))
1788 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1789 DECL may be NULL when calling function indirectly
1790 or considering a libcall. */
1793 ix86_function_regparm (tree type
, tree decl
)
1796 int regparm
= ix86_regparm
;
1797 bool user_convention
= false;
1801 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1804 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1805 user_convention
= true;
1808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1811 user_convention
= true;
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT
&& !user_convention
&& decl
1816 && flag_unit_at_a_time
&& !profile_flag
)
1818 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1833 /* Return true if EAX is live at the start of the function. Used by
1834 ix86_expand_prologue to determine if we need special help before
1835 calling allocate_stack_worker. */
1838 ix86_eax_live_at_start_p (void)
1840 /* Cheat. Don't bother working forward from ix86_function_regparm
1841 to the function type to whether an actual argument is located in
1842 eax. Instead just look at cfg info, which is still close enough
1843 to correct at this point. This gives false positives for broken
1844 functions that might use uninitialized data that happens to be
1845 allocated in eax, but who cares? */
1846 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1849 /* Value is the number of bytes of arguments automatically
1850 popped when returning from a subroutine call.
1851 FUNDECL is the declaration node of the function (as a tree),
1852 FUNTYPE is the data type of the function (as a tree),
1853 or for a library call it is an identifier node for the subroutine name.
1854 SIZE is the number of bytes of arguments passed on the stack.
1856 On the 80386, the RTD insn may be used to pop them if the number
1857 of args is fixed, but if the number is variable then the caller
1858 must pop them all. RTD can't be used for library calls now
1859 because the library is compiled with the Unix compiler.
1860 Use of RTD is a selectable option, since it is incompatible with
1861 standard Unix calling sequences. If the option is not selected,
1862 the caller must always pop the args.
1864 The attribute stdcall is equivalent to RTD on a per module basis. */
1867 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1869 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1871 /* Cdecl functions override -mrtd, and never pop the stack. */
1872 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1874 /* Stdcall and fastcall functions will pop the stack if not
1876 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1877 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1881 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1882 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1883 == void_type_node
)))
1887 /* Lose any fake structure return argument if it is passed on the stack. */
1888 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1891 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1894 return GET_MODE_SIZE (Pmode
);
1900 /* Argument support functions. */
1902 /* Return true when register may be used to pass function parameters. */
1904 ix86_function_arg_regno_p (int regno
)
1908 return (regno
< REGPARM_MAX
1909 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1910 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1912 /* RAX is used as hidden argument to va_arg functions. */
1915 for (i
= 0; i
< REGPARM_MAX
; i
++)
1916 if (regno
== x86_64_int_parameter_registers
[i
])
1921 /* Return if we do not know how to pass TYPE solely in registers. */
1924 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
1926 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
1928 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
1931 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1932 for a call to a function whose data type is FNTYPE.
1933 For a library call, FNTYPE is 0. */
1936 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1937 tree fntype
, /* tree ptr for function decl */
1938 rtx libname
, /* SYMBOL_REF of library name or 0 */
1941 static CUMULATIVE_ARGS zero_cum
;
1942 tree param
, next_param
;
1944 if (TARGET_DEBUG_ARG
)
1946 fprintf (stderr
, "\ninit_cumulative_args (");
1948 fprintf (stderr
, "fntype code = %s, ret code = %s",
1949 tree_code_name
[(int) TREE_CODE (fntype
)],
1950 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1952 fprintf (stderr
, "no fntype");
1955 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1960 /* Set up the number of registers to use for passing arguments. */
1962 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1964 cum
->nregs
= ix86_regparm
;
1966 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1968 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1969 cum
->warn_sse
= true;
1970 cum
->warn_mmx
= true;
1971 cum
->maybe_vaarg
= false;
1973 /* Use ecx and edx registers if function has fastcall attribute */
1974 if (fntype
&& !TARGET_64BIT
)
1976 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1983 /* Determine if this function has variable arguments. This is
1984 indicated by the last argument being 'void_type_mode' if there
1985 are no variable arguments. If there are variable arguments, then
1986 we won't pass anything in registers in 32-bit mode. */
1988 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
1990 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1991 param
!= 0; param
= next_param
)
1993 next_param
= TREE_CHAIN (param
);
1994 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2005 cum
->maybe_vaarg
= true;
2009 if ((!fntype
&& !libname
)
2010 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2011 cum
->maybe_vaarg
= 1;
2013 if (TARGET_DEBUG_ARG
)
2014 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2019 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2020 of this code is to classify each 8bytes of incoming argument by the register
2021 class and assign registers accordingly. */
2023 /* Return the union class of CLASS1 and CLASS2.
2024 See the x86-64 PS ABI for details. */
2026 static enum x86_64_reg_class
2027 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2029 /* Rule #1: If both classes are equal, this is the resulting class. */
2030 if (class1
== class2
)
2033 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2035 if (class1
== X86_64_NO_CLASS
)
2037 if (class2
== X86_64_NO_CLASS
)
2040 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2041 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2042 return X86_64_MEMORY_CLASS
;
2044 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2045 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2046 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2047 return X86_64_INTEGERSI_CLASS
;
2048 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2049 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2050 return X86_64_INTEGER_CLASS
;
2052 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2053 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
2054 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
2055 return X86_64_MEMORY_CLASS
;
2057 /* Rule #6: Otherwise class SSE is used. */
2058 return X86_64_SSE_CLASS
;
2061 /* Classify the argument of type TYPE and mode MODE.
2062 CLASSES will be filled by the register class used to pass each word
2063 of the operand. The number of words is returned. In case the parameter
2064 should be passed in memory, 0 is returned. As a special case for zero
2065 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2067 BIT_OFFSET is used internally for handling records and specifies offset
2068 of the offset in bits modulo 256 to avoid overflow cases.
2070 See the x86-64 PS ABI for details.
2074 classify_argument (enum machine_mode mode
, tree type
,
2075 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2077 HOST_WIDE_INT bytes
=
2078 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2079 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2081 /* Variable sized entities are always passed/returned in memory. */
2085 if (mode
!= VOIDmode
2086 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2089 if (type
&& AGGREGATE_TYPE_P (type
))
2093 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2095 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2099 for (i
= 0; i
< words
; i
++)
2100 classes
[i
] = X86_64_NO_CLASS
;
2102 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2103 signalize memory class, so handle it as special case. */
2106 classes
[0] = X86_64_NO_CLASS
;
2110 /* Classify each field of record and merge classes. */
2111 if (TREE_CODE (type
) == RECORD_TYPE
)
2113 /* For classes first merge in the field of the subclasses. */
2114 if (TYPE_BINFO (type
))
2116 tree binfo
, base_binfo
;
2119 for (binfo
= TYPE_BINFO (type
), i
= 0;
2120 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2123 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2124 tree type
= BINFO_TYPE (base_binfo
);
2126 num
= classify_argument (TYPE_MODE (type
),
2128 (offset
+ bit_offset
) % 256);
2131 for (i
= 0; i
< num
; i
++)
2133 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2135 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2139 /* And now merge the fields of structure. */
2140 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2142 if (TREE_CODE (field
) == FIELD_DECL
)
2146 /* Bitfields are always classified as integer. Handle them
2147 early, since later code would consider them to be
2148 misaligned integers. */
2149 if (DECL_BIT_FIELD (field
))
2151 for (i
= int_bit_position (field
) / 8 / 8;
2152 i
< (int_bit_position (field
)
2153 + tree_low_cst (DECL_SIZE (field
), 0)
2156 merge_classes (X86_64_INTEGER_CLASS
,
2161 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2162 TREE_TYPE (field
), subclasses
,
2163 (int_bit_position (field
)
2164 + bit_offset
) % 256);
2167 for (i
= 0; i
< num
; i
++)
2170 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2172 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2178 /* Arrays are handled as small records. */
2179 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2182 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2183 TREE_TYPE (type
), subclasses
, bit_offset
);
2187 /* The partial classes are now full classes. */
2188 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2189 subclasses
[0] = X86_64_SSE_CLASS
;
2190 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2191 subclasses
[0] = X86_64_INTEGER_CLASS
;
2193 for (i
= 0; i
< words
; i
++)
2194 classes
[i
] = subclasses
[i
% num
];
2196 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2197 else if (TREE_CODE (type
) == UNION_TYPE
2198 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2200 /* For classes first merge in the field of the subclasses. */
2201 if (TYPE_BINFO (type
))
2203 tree binfo
, base_binfo
;
2206 for (binfo
= TYPE_BINFO (type
), i
= 0;
2207 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2210 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2211 tree type
= BINFO_TYPE (base_binfo
);
2213 num
= classify_argument (TYPE_MODE (type
),
2215 (offset
+ (bit_offset
% 64)) % 256);
2218 for (i
= 0; i
< num
; i
++)
2220 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2222 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2226 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2228 if (TREE_CODE (field
) == FIELD_DECL
)
2231 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2232 TREE_TYPE (field
), subclasses
,
2236 for (i
= 0; i
< num
; i
++)
2237 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2241 else if (TREE_CODE (type
) == SET_TYPE
)
2245 classes
[0] = X86_64_INTEGERSI_CLASS
;
2248 else if (bytes
<= 8)
2250 classes
[0] = X86_64_INTEGER_CLASS
;
2253 else if (bytes
<= 12)
2255 classes
[0] = X86_64_INTEGER_CLASS
;
2256 classes
[1] = X86_64_INTEGERSI_CLASS
;
2261 classes
[0] = X86_64_INTEGER_CLASS
;
2262 classes
[1] = X86_64_INTEGER_CLASS
;
2269 /* Final merger cleanup. */
2270 for (i
= 0; i
< words
; i
++)
2272 /* If one class is MEMORY, everything should be passed in
2274 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2277 /* The X86_64_SSEUP_CLASS should be always preceded by
2278 X86_64_SSE_CLASS. */
2279 if (classes
[i
] == X86_64_SSEUP_CLASS
2280 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2281 classes
[i
] = X86_64_SSE_CLASS
;
2283 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2284 if (classes
[i
] == X86_64_X87UP_CLASS
2285 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2286 classes
[i
] = X86_64_SSE_CLASS
;
2291 /* Compute alignment needed. We align all types to natural boundaries with
2292 exception of XFmode that is aligned to 64bits. */
2293 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2295 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2298 mode_alignment
= 128;
2299 else if (mode
== XCmode
)
2300 mode_alignment
= 256;
2301 if (COMPLEX_MODE_P (mode
))
2302 mode_alignment
/= 2;
2303 /* Misaligned fields are always returned in memory. */
2304 if (bit_offset
% mode_alignment
)
2308 /* for V1xx modes, just use the base mode */
2309 if (VECTOR_MODE_P (mode
)
2310 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2311 mode
= GET_MODE_INNER (mode
);
2313 /* Classification of atomic types. */
2323 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2324 classes
[0] = X86_64_INTEGERSI_CLASS
;
2326 classes
[0] = X86_64_INTEGER_CLASS
;
2330 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2335 if (!(bit_offset
% 64))
2336 classes
[0] = X86_64_SSESF_CLASS
;
2338 classes
[0] = X86_64_SSE_CLASS
;
2341 classes
[0] = X86_64_SSEDF_CLASS
;
2344 classes
[0] = X86_64_X87_CLASS
;
2345 classes
[1] = X86_64_X87UP_CLASS
;
2348 classes
[0] = X86_64_SSE_CLASS
;
2349 classes
[1] = X86_64_SSEUP_CLASS
;
2352 classes
[0] = X86_64_SSE_CLASS
;
2355 classes
[0] = X86_64_SSEDF_CLASS
;
2356 classes
[1] = X86_64_SSEDF_CLASS
;
2360 /* These modes are larger than 16 bytes. */
2368 classes
[0] = X86_64_SSE_CLASS
;
2369 classes
[1] = X86_64_SSEUP_CLASS
;
2375 classes
[0] = X86_64_SSE_CLASS
;
2381 if (VECTOR_MODE_P (mode
))
2385 if (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
)
2387 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2388 classes
[0] = X86_64_INTEGERSI_CLASS
;
2390 classes
[0] = X86_64_INTEGER_CLASS
;
2391 classes
[1] = X86_64_INTEGER_CLASS
;
2392 return 1 + (bytes
> 8);
2399 /* Examine the argument and return set number of register required in each
2400 class. Return 0 iff parameter should be passed in memory. */
2402 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2403 int *int_nregs
, int *sse_nregs
)
2405 enum x86_64_reg_class
class[MAX_CLASSES
];
2406 int n
= classify_argument (mode
, type
, class, 0);
2412 for (n
--; n
>= 0; n
--)
2415 case X86_64_INTEGER_CLASS
:
2416 case X86_64_INTEGERSI_CLASS
:
2419 case X86_64_SSE_CLASS
:
2420 case X86_64_SSESF_CLASS
:
2421 case X86_64_SSEDF_CLASS
:
2424 case X86_64_NO_CLASS
:
2425 case X86_64_SSEUP_CLASS
:
2427 case X86_64_X87_CLASS
:
2428 case X86_64_X87UP_CLASS
:
2432 case X86_64_MEMORY_CLASS
:
2437 /* Construct container for the argument used by GCC interface. See
2438 FUNCTION_ARG for the detailed description. */
2440 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2441 int nintregs
, int nsseregs
, const int * intreg
,
2444 enum machine_mode tmpmode
;
2446 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2447 enum x86_64_reg_class
class[MAX_CLASSES
];
2451 int needed_sseregs
, needed_intregs
;
2452 rtx exp
[MAX_CLASSES
];
2455 n
= classify_argument (mode
, type
, class, 0);
2456 if (TARGET_DEBUG_ARG
)
2459 fprintf (stderr
, "Memory class\n");
2462 fprintf (stderr
, "Classes:");
2463 for (i
= 0; i
< n
; i
++)
2465 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2467 fprintf (stderr
, "\n");
2472 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2474 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2477 /* First construct simple cases. Avoid SCmode, since we want to use
2478 single register to pass this type. */
2479 if (n
== 1 && mode
!= SCmode
)
2482 case X86_64_INTEGER_CLASS
:
2483 case X86_64_INTEGERSI_CLASS
:
2484 return gen_rtx_REG (mode
, intreg
[0]);
2485 case X86_64_SSE_CLASS
:
2486 case X86_64_SSESF_CLASS
:
2487 case X86_64_SSEDF_CLASS
:
2488 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2489 case X86_64_X87_CLASS
:
2490 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2491 case X86_64_NO_CLASS
:
2492 /* Zero sized array, struct or class. */
2497 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2499 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2501 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2502 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2503 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2504 && class[1] == X86_64_INTEGER_CLASS
2505 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2506 && intreg
[0] + 1 == intreg
[1])
2507 return gen_rtx_REG (mode
, intreg
[0]);
2509 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2510 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
2512 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2514 /* Otherwise figure out the entries of the PARALLEL. */
2515 for (i
= 0; i
< n
; i
++)
2519 case X86_64_NO_CLASS
:
2521 case X86_64_INTEGER_CLASS
:
2522 case X86_64_INTEGERSI_CLASS
:
2523 /* Merge TImodes on aligned occasions here too. */
2524 if (i
* 8 + 8 > bytes
)
2525 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2526 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2530 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2531 if (tmpmode
== BLKmode
)
2533 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2534 gen_rtx_REG (tmpmode
, *intreg
),
2538 case X86_64_SSESF_CLASS
:
2539 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2540 gen_rtx_REG (SFmode
,
2541 SSE_REGNO (sse_regno
)),
2545 case X86_64_SSEDF_CLASS
:
2546 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2547 gen_rtx_REG (DFmode
,
2548 SSE_REGNO (sse_regno
)),
2552 case X86_64_SSE_CLASS
:
2553 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2557 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2558 gen_rtx_REG (tmpmode
,
2559 SSE_REGNO (sse_regno
)),
2561 if (tmpmode
== TImode
)
2569 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2570 for (i
= 0; i
< nexps
; i
++)
2571 XVECEXP (ret
, 0, i
) = exp
[i
];
2575 /* Update the data in CUM to advance over an argument
2576 of mode MODE and data type TYPE.
2577 (TYPE is null for libcalls where that information may not be available.) */
2580 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2581 enum machine_mode mode
, /* current arg mode */
2582 tree type
, /* type of the argument or 0 if lib support */
2583 int named
) /* whether or not the argument was named */
2586 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2587 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2589 if (TARGET_DEBUG_ARG
)
2591 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2592 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2595 int int_nregs
, sse_nregs
;
2596 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2597 cum
->words
+= words
;
2598 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2600 cum
->nregs
-= int_nregs
;
2601 cum
->sse_nregs
-= sse_nregs
;
2602 cum
->regno
+= int_nregs
;
2603 cum
->sse_regno
+= sse_nregs
;
2606 cum
->words
+= words
;
2610 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2611 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2613 cum
->sse_words
+= words
;
2614 cum
->sse_nregs
-= 1;
2615 cum
->sse_regno
+= 1;
2616 if (cum
->sse_nregs
<= 0)
2622 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2623 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2625 cum
->mmx_words
+= words
;
2626 cum
->mmx_nregs
-= 1;
2627 cum
->mmx_regno
+= 1;
2628 if (cum
->mmx_nregs
<= 0)
2636 cum
->words
+= words
;
2637 cum
->nregs
-= words
;
2638 cum
->regno
+= words
;
2640 if (cum
->nregs
<= 0)
2650 /* Define where to put the arguments to a function.
2651 Value is zero to push the argument on the stack,
2652 or a hard register in which to store the argument.
2654 MODE is the argument's machine mode.
2655 TYPE is the data type of the argument (as a tree).
2656 This is null for libcalls where that information may
2658 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2659 the preceding args and about the function being called.
2660 NAMED is nonzero if this argument is a named parameter
2661 (otherwise it is an extra parameter matching an ellipsis). */
2664 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2665 enum machine_mode mode
, /* current arg mode */
2666 tree type
, /* type of the argument or 0 if lib support */
2667 int named
) /* != 0 for normal args, == 0 for ... args */
2671 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2672 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2673 static bool warnedsse
, warnedmmx
;
2675 /* To simplify the code below, represent vector types with a vector mode
2676 even if MMX/SSE are not active. */
2678 && TREE_CODE (type
) == VECTOR_TYPE
2679 && (bytes
== 8 || bytes
== 16)
2680 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_INT
2681 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_FLOAT
)
2683 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2684 mode
= TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
2685 ? MIN_MODE_VECTOR_FLOAT
: MIN_MODE_VECTOR_INT
;
2687 /* Get the mode which has this inner mode and number of units. */
2688 while (GET_MODE_NUNITS (mode
) != TYPE_VECTOR_SUBPARTS (type
)
2689 || GET_MODE_INNER (mode
) != innermode
)
2691 mode
= GET_MODE_WIDER_MODE (mode
);
2692 if (mode
== VOIDmode
)
2697 /* Handle a hidden AL argument containing number of registers for varargs
2698 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2700 if (mode
== VOIDmode
)
2703 return GEN_INT (cum
->maybe_vaarg
2704 ? (cum
->sse_nregs
< 0
2712 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2713 &x86_64_int_parameter_registers
[cum
->regno
],
2718 /* For now, pass fp/complex values on the stack. */
2730 if (words
<= cum
->nregs
)
2732 int regno
= cum
->regno
;
2734 /* Fastcall allocates the first two DWORD (SImode) or
2735 smaller arguments to ECX and EDX. */
2738 if (mode
== BLKmode
|| mode
== DImode
)
2741 /* ECX not EAX is the first allocated register. */
2745 ret
= gen_rtx_REG (mode
, regno
);
2755 if (!type
|| !AGGREGATE_TYPE_P (type
))
2757 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
2760 warning ("SSE vector argument without SSE enabled "
2764 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2771 if (!type
|| !AGGREGATE_TYPE_P (type
))
2773 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2776 warning ("MMX vector argument without MMX enabled "
2780 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2785 if (TARGET_DEBUG_ARG
)
2788 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2789 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2792 print_simple_rtl (stderr
, ret
);
2794 fprintf (stderr
, ", stack");
2796 fprintf (stderr
, " )\n");
2802 /* A C expression that indicates when an argument must be passed by
2803 reference. If nonzero for an argument, a copy of that argument is
2804 made in memory and a pointer to the argument is passed instead of
2805 the argument itself. The pointer is passed in whatever way is
2806 appropriate for passing a pointer to that type. */
2809 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2810 enum machine_mode mode ATTRIBUTE_UNUSED
,
2811 tree type
, bool named ATTRIBUTE_UNUSED
)
2816 if (type
&& int_size_in_bytes (type
) == -1)
2818 if (TARGET_DEBUG_ARG
)
2819 fprintf (stderr
, "function_arg_pass_by_reference\n");
2826 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2827 ABI. Only called if TARGET_SSE. */
2829 contains_128bit_aligned_vector_p (tree type
)
2831 enum machine_mode mode
= TYPE_MODE (type
);
2832 if (SSE_REG_MODE_P (mode
)
2833 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2835 if (TYPE_ALIGN (type
) < 128)
2838 if (AGGREGATE_TYPE_P (type
))
2840 /* Walk the aggregates recursively. */
2841 if (TREE_CODE (type
) == RECORD_TYPE
2842 || TREE_CODE (type
) == UNION_TYPE
2843 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2847 if (TYPE_BINFO (type
))
2849 tree binfo
, base_binfo
;
2852 for (binfo
= TYPE_BINFO (type
), i
= 0;
2853 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2854 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo
)))
2857 /* And now merge the fields of structure. */
2858 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2860 if (TREE_CODE (field
) == FIELD_DECL
2861 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2865 /* Just for use if some languages passes arrays by value. */
2866 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2868 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2877 /* Gives the alignment boundary, in bits, of an argument with the
2878 specified mode and type. */
2881 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2885 align
= TYPE_ALIGN (type
);
2887 align
= GET_MODE_ALIGNMENT (mode
);
2888 if (align
< PARM_BOUNDARY
)
2889 align
= PARM_BOUNDARY
;
2892 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2893 make an exception for SSE modes since these require 128bit
2896 The handling here differs from field_alignment. ICC aligns MMX
2897 arguments to 4 byte boundaries, while structure fields are aligned
2898 to 8 byte boundaries. */
2900 align
= PARM_BOUNDARY
;
2903 if (!SSE_REG_MODE_P (mode
))
2904 align
= PARM_BOUNDARY
;
2908 if (!contains_128bit_aligned_vector_p (type
))
2909 align
= PARM_BOUNDARY
;
2917 /* Return true if N is a possible register number of function value. */
2919 ix86_function_value_regno_p (int regno
)
2923 return ((regno
) == 0
2924 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2925 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2927 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2928 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2929 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2932 /* Define how to find the value returned by a function.
2933 VALTYPE is the data type of the value (as a tree).
2934 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2935 otherwise, FUNC is 0. */
2937 ix86_function_value (tree valtype
)
2941 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2942 REGPARM_MAX
, SSE_REGPARM_MAX
,
2943 x86_64_int_return_registers
, 0);
2944 /* For zero sized structures, construct_container return NULL, but we need
2945 to keep rest of compiler happy by returning meaningful value. */
2947 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2951 return gen_rtx_REG (TYPE_MODE (valtype
),
2952 ix86_value_regno (TYPE_MODE (valtype
)));
2955 /* Return false iff type is returned in memory. */
2957 ix86_return_in_memory (tree type
)
2959 int needed_intregs
, needed_sseregs
, size
;
2960 enum machine_mode mode
= TYPE_MODE (type
);
2963 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2965 if (mode
== BLKmode
)
2968 size
= int_size_in_bytes (type
);
2970 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2973 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2975 /* User-created vectors small enough to fit in EAX. */
2979 /* MMX/3dNow values are returned on the stack, since we've
2980 got to EMMS/FEMMS before returning. */
2984 /* SSE values are returned in XMM0, except when it doesn't exist. */
2986 return (TARGET_SSE
? 0 : 1);
2997 /* When returning SSE vector types, we have a choice of either
2998 (1) being abi incompatible with a -march switch, or
2999 (2) generating an error.
3000 Given no good solution, I think the safest thing is one warning.
3001 The user won't be able to use -Werror, but....
3003 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3004 called in response to actually generating a caller or callee that
3005 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3006 via aggregate_value_p for general type probing from tree-ssa. */
3009 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3013 if (!TARGET_SSE
&& type
&& !warned
)
3015 /* Look at the return type of the function, not the function type. */
3016 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3019 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3022 warning ("SSE vector return without SSE enabled changes the ABI");
3029 /* Define how to find the value returned by a library function
3030 assuming the value has mode MODE. */
3032 ix86_libcall_value (enum machine_mode mode
)
3043 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3045 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3050 return gen_rtx_REG (mode
, 0);
3054 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
3057 /* Given a mode, return the register to use for a return value. */
3060 ix86_value_regno (enum machine_mode mode
)
3062 /* Floating point return values in %st(0). */
3063 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
3064 return FIRST_FLOAT_REG
;
3065 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3066 we prevent this case when sse is not available. */
3067 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3068 return FIRST_SSE_REG
;
3069 /* Everything else in %eax. */
3073 /* Create the va_list data type. */
3076 ix86_build_builtin_va_list (void)
3078 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3080 /* For i386 we use plain pointer to argument area. */
3082 return build_pointer_type (char_type_node
);
3084 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3085 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3087 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3088 unsigned_type_node
);
3089 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3090 unsigned_type_node
);
3091 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3093 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3096 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3097 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3098 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3099 DECL_FIELD_CONTEXT (f_sav
) = record
;
3101 TREE_CHAIN (record
) = type_decl
;
3102 TYPE_NAME (record
) = type_decl
;
3103 TYPE_FIELDS (record
) = f_gpr
;
3104 TREE_CHAIN (f_gpr
) = f_fpr
;
3105 TREE_CHAIN (f_fpr
) = f_ovf
;
3106 TREE_CHAIN (f_ovf
) = f_sav
;
3108 layout_type (record
);
3110 /* The correct type is an array type of one element. */
3111 return build_array_type (record
, build_index_type (size_zero_node
));
3114 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3117 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3118 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3121 CUMULATIVE_ARGS next_cum
;
3122 rtx save_area
= NULL_RTX
, mem
;
3135 /* Indicate to allocate space on the stack for varargs save area. */
3136 ix86_save_varrargs_registers
= 1;
3138 cfun
->stack_alignment_needed
= 128;
3140 fntype
= TREE_TYPE (current_function_decl
);
3141 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3142 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3143 != void_type_node
));
3145 /* For varargs, we do not want to skip the dummy va_dcl argument.
3146 For stdargs, we do want to skip the last named argument. */
3149 function_arg_advance (&next_cum
, mode
, type
, 1);
3152 save_area
= frame_pointer_rtx
;
3154 set
= get_varargs_alias_set ();
3156 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3158 mem
= gen_rtx_MEM (Pmode
,
3159 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3160 set_mem_alias_set (mem
, set
);
3161 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3162 x86_64_int_parameter_registers
[i
]));
3165 if (next_cum
.sse_nregs
)
3167 /* Now emit code to save SSE registers. The AX parameter contains number
3168 of SSE parameter registers used to call this function. We use
3169 sse_prologue_save insn template that produces computed jump across
3170 SSE saves. We need some preparation work to get this working. */
3172 label
= gen_label_rtx ();
3173 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3175 /* Compute address to jump to :
3176 label - 5*eax + nnamed_sse_arguments*5 */
3177 tmp_reg
= gen_reg_rtx (Pmode
);
3178 nsse_reg
= gen_reg_rtx (Pmode
);
3179 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3180 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3181 gen_rtx_MULT (Pmode
, nsse_reg
,
3183 if (next_cum
.sse_regno
)
3186 gen_rtx_CONST (DImode
,
3187 gen_rtx_PLUS (DImode
,
3189 GEN_INT (next_cum
.sse_regno
* 4))));
3191 emit_move_insn (nsse_reg
, label_ref
);
3192 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3194 /* Compute address of memory block we save into. We always use pointer
3195 pointing 127 bytes after first byte to store - this is needed to keep
3196 instruction size limited by 4 bytes. */
3197 tmp_reg
= gen_reg_rtx (Pmode
);
3198 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3199 plus_constant (save_area
,
3200 8 * REGPARM_MAX
+ 127)));
3201 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3202 set_mem_alias_set (mem
, set
);
3203 set_mem_align (mem
, BITS_PER_WORD
);
3205 /* And finally do the dirty job! */
3206 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3207 GEN_INT (next_cum
.sse_regno
), label
));
3212 /* Implement va_start. */
3215 ix86_va_start (tree valist
, rtx nextarg
)
3217 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3218 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3219 tree gpr
, fpr
, ovf
, sav
, t
;
3221 /* Only 64bit target needs something special. */
3224 std_expand_builtin_va_start (valist
, nextarg
);
3228 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3229 f_fpr
= TREE_CHAIN (f_gpr
);
3230 f_ovf
= TREE_CHAIN (f_fpr
);
3231 f_sav
= TREE_CHAIN (f_ovf
);
3233 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3234 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3235 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3236 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3237 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3239 /* Count number of gp and fp argument registers used. */
3240 words
= current_function_args_info
.words
;
3241 n_gpr
= current_function_args_info
.regno
;
3242 n_fpr
= current_function_args_info
.sse_regno
;
3244 if (TARGET_DEBUG_ARG
)
3245 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3246 (int) words
, (int) n_gpr
, (int) n_fpr
);
3248 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3249 build_int_2 (n_gpr
* 8, 0));
3250 TREE_SIDE_EFFECTS (t
) = 1;
3251 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3253 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3254 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
3255 TREE_SIDE_EFFECTS (t
) = 1;
3256 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3258 /* Find the overflow area. */
3259 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3261 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3262 build_int_2 (words
* UNITS_PER_WORD
, 0));
3263 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3264 TREE_SIDE_EFFECTS (t
) = 1;
3265 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3267 /* Find the register save area.
3268 Prologue of the function save it right above stack frame. */
3269 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3270 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3271 TREE_SIDE_EFFECTS (t
) = 1;
3272 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3275 /* Implement va_arg. */
3278 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3280 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3281 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3282 tree gpr
, fpr
, ovf
, sav
, t
;
3284 tree lab_false
, lab_over
= NULL_TREE
;
3290 /* Only 64bit target needs something special. */
3292 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3294 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3295 f_fpr
= TREE_CHAIN (f_gpr
);
3296 f_ovf
= TREE_CHAIN (f_fpr
);
3297 f_sav
= TREE_CHAIN (f_ovf
);
3299 valist
= build_fold_indirect_ref (valist
);
3300 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3301 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3302 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3303 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3305 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
3307 type
= build_pointer_type (type
);
3308 size
= int_size_in_bytes (type
);
3309 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3311 container
= construct_container (TYPE_MODE (type
), type
, 0,
3312 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3314 * Pull the value out of the saved registers ...
3317 addr
= create_tmp_var (ptr_type_node
, "addr");
3318 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3322 int needed_intregs
, needed_sseregs
;
3324 tree int_addr
, sse_addr
;
3326 lab_false
= create_artificial_label ();
3327 lab_over
= create_artificial_label ();
3329 examine_argument (TYPE_MODE (type
), type
, 0,
3330 &needed_intregs
, &needed_sseregs
);
3332 need_temp
= (!REG_P (container
)
3333 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3334 || TYPE_ALIGN (type
) > 128));
3336 /* In case we are passing structure, verify that it is consecutive block
3337 on the register save area. If not we need to do moves. */
3338 if (!need_temp
&& !REG_P (container
))
3340 /* Verify that all registers are strictly consecutive */
3341 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3345 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3347 rtx slot
= XVECEXP (container
, 0, i
);
3348 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3349 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3357 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3359 rtx slot
= XVECEXP (container
, 0, i
);
3360 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3361 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3373 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3374 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3375 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3376 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3378 /* First ensure that we fit completely in registers. */
3381 t
= build_int_2 ((REGPARM_MAX
- needed_intregs
+ 1) * 8, 0);
3382 TREE_TYPE (t
) = TREE_TYPE (gpr
);
3383 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3384 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3385 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3386 gimplify_and_add (t
, pre_p
);
3390 t
= build_int_2 ((SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3391 + REGPARM_MAX
* 8, 0);
3392 TREE_TYPE (t
) = TREE_TYPE (fpr
);
3393 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3394 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3395 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3396 gimplify_and_add (t
, pre_p
);
3399 /* Compute index to start of area used for integer regs. */
3402 /* int_addr = gpr + sav; */
3403 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3404 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3405 gimplify_and_add (t
, pre_p
);
3409 /* sse_addr = fpr + sav; */
3410 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3411 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3412 gimplify_and_add (t
, pre_p
);
3417 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3420 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3421 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3422 gimplify_and_add (t
, pre_p
);
3424 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3426 rtx slot
= XVECEXP (container
, 0, i
);
3427 rtx reg
= XEXP (slot
, 0);
3428 enum machine_mode mode
= GET_MODE (reg
);
3429 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3430 tree addr_type
= build_pointer_type (piece_type
);
3433 tree dest_addr
, dest
;
3435 if (SSE_REGNO_P (REGNO (reg
)))
3437 src_addr
= sse_addr
;
3438 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3442 src_addr
= int_addr
;
3443 src_offset
= REGNO (reg
) * 8;
3445 src_addr
= fold_convert (addr_type
, src_addr
);
3446 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3447 size_int (src_offset
)));
3448 src
= build_fold_indirect_ref (src_addr
);
3450 dest_addr
= fold_convert (addr_type
, addr
);
3451 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3452 size_int (INTVAL (XEXP (slot
, 1)))));
3453 dest
= build_fold_indirect_ref (dest_addr
);
3455 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3456 gimplify_and_add (t
, pre_p
);
3462 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3463 build_int_2 (needed_intregs
* 8, 0));
3464 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3465 gimplify_and_add (t
, pre_p
);
3470 build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3471 build_int_2 (needed_sseregs
* 16, 0));
3472 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3473 gimplify_and_add (t
, pre_p
);
3476 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3477 gimplify_and_add (t
, pre_p
);
3479 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3480 append_to_statement_list (t
, pre_p
);
3483 /* ... otherwise out of the overflow area. */
3485 /* Care for on-stack alignment if needed. */
3486 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3490 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3491 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
3492 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
3494 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3496 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3497 gimplify_and_add (t2
, pre_p
);
3499 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3500 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
3501 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3502 gimplify_and_add (t
, pre_p
);
3506 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3507 append_to_statement_list (t
, pre_p
);
3510 ptrtype
= build_pointer_type (type
);
3511 addr
= fold_convert (ptrtype
, addr
);
3514 addr
= build_fold_indirect_ref (addr
);
3515 return build_fold_indirect_ref (addr
);
3518 /* Return nonzero if OP is either a i387 or SSE fp register. */
3520 any_fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3522 return ANY_FP_REG_P (op
);
3525 /* Return nonzero if OP is an i387 fp register. */
3527 fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3529 return FP_REG_P (op
);
3532 /* Return nonzero if OP is a non-fp register_operand. */
3534 register_and_not_any_fp_reg_operand (rtx op
, enum machine_mode mode
)
3536 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
3539 /* Return nonzero if OP is a register operand other than an
3540 i387 fp register. */
3542 register_and_not_fp_reg_operand (rtx op
, enum machine_mode mode
)
3544 return register_operand (op
, mode
) && !FP_REG_P (op
);
3547 /* Return nonzero if OP is general operand representable on x86_64. */
3550 x86_64_general_operand (rtx op
, enum machine_mode mode
)
3553 return general_operand (op
, mode
);
3554 if (nonimmediate_operand (op
, mode
))
3556 return x86_64_sign_extended_value (op
);
3559 /* Return nonzero if OP is general operand representable on x86_64
3560 as either sign extended or zero extended constant. */
3563 x86_64_szext_general_operand (rtx op
, enum machine_mode mode
)
3566 return general_operand (op
, mode
);
3567 if (nonimmediate_operand (op
, mode
))
3569 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3572 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3575 x86_64_nonmemory_operand (rtx op
, enum machine_mode mode
)
3578 return nonmemory_operand (op
, mode
);
3579 if (register_operand (op
, mode
))
3581 return x86_64_sign_extended_value (op
);
3584 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3587 x86_64_movabs_operand (rtx op
, enum machine_mode mode
)
3589 if (!TARGET_64BIT
|| !flag_pic
)
3590 return nonmemory_operand (op
, mode
);
3591 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
3593 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
3598 /* Return nonzero if OPNUM's MEM should be matched
3599 in movabs* patterns. */
3602 ix86_check_movabs (rtx insn
, int opnum
)
3606 set
= PATTERN (insn
);
3607 if (GET_CODE (set
) == PARALLEL
)
3608 set
= XVECEXP (set
, 0, 0);
3609 if (GET_CODE (set
) != SET
)
3611 mem
= XEXP (set
, opnum
);
3612 while (GET_CODE (mem
) == SUBREG
)
3613 mem
= SUBREG_REG (mem
);
3614 if (GET_CODE (mem
) != MEM
)
3616 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3619 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3622 x86_64_szext_nonmemory_operand (rtx op
, enum machine_mode mode
)
3625 return nonmemory_operand (op
, mode
);
3626 if (register_operand (op
, mode
))
3628 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3631 /* Return nonzero if OP is immediate operand representable on x86_64. */
3634 x86_64_immediate_operand (rtx op
, enum machine_mode mode
)
3637 return immediate_operand (op
, mode
);
3638 return x86_64_sign_extended_value (op
);
3641 /* Return nonzero if OP is immediate operand representable on x86_64. */
3644 x86_64_zext_immediate_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3646 return x86_64_zero_extended_value (op
);
3649 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3650 for shift & compare patterns, as shifting by 0 does not change flags),
3651 else return zero. */
3654 const_int_1_31_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3656 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3659 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3660 reference and a constant. */
3663 symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3665 switch (GET_CODE (op
))
3673 if (GET_CODE (op
) == SYMBOL_REF
3674 || GET_CODE (op
) == LABEL_REF
3675 || (GET_CODE (op
) == UNSPEC
3676 && (XINT (op
, 1) == UNSPEC_GOT
3677 || XINT (op
, 1) == UNSPEC_GOTOFF
3678 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3680 if (GET_CODE (op
) != PLUS
3681 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3685 if (GET_CODE (op
) == SYMBOL_REF
3686 || GET_CODE (op
) == LABEL_REF
)
3688 /* Only @GOTOFF gets offsets. */
3689 if (GET_CODE (op
) != UNSPEC
3690 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3693 op
= XVECEXP (op
, 0, 0);
3694 if (GET_CODE (op
) == SYMBOL_REF
3695 || GET_CODE (op
) == LABEL_REF
)
3704 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3707 pic_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3709 if (GET_CODE (op
) != CONST
)
3714 if (GET_CODE (op
) == UNSPEC
3715 && XINT (op
, 1) == UNSPEC_GOTPCREL
)
3717 if (GET_CODE (op
) == PLUS
3718 && GET_CODE (XEXP (op
, 0)) == UNSPEC
3719 && XINT (XEXP (op
, 0), 1) == UNSPEC_GOTPCREL
)
3724 if (GET_CODE (op
) == UNSPEC
)
3726 if (GET_CODE (op
) != PLUS
3727 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3730 if (GET_CODE (op
) == UNSPEC
)
3736 /* Return true if OP is a symbolic operand that resolves locally. */
3739 local_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3741 if (GET_CODE (op
) == CONST
3742 && GET_CODE (XEXP (op
, 0)) == PLUS
3743 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3744 op
= XEXP (XEXP (op
, 0), 0);
3746 if (GET_CODE (op
) == LABEL_REF
)
3749 if (GET_CODE (op
) != SYMBOL_REF
)
3752 if (SYMBOL_REF_LOCAL_P (op
))
3755 /* There is, however, a not insubstantial body of code in the rest of
3756 the compiler that assumes it can just stick the results of
3757 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3758 /* ??? This is a hack. Should update the body of the compiler to
3759 always create a DECL an invoke targetm.encode_section_info. */
3760 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3761 internal_label_prefix_len
) == 0)
3767 /* Test for various thread-local symbols. */
3770 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3772 if (GET_CODE (op
) != SYMBOL_REF
)
3774 return SYMBOL_REF_TLS_MODEL (op
);
3778 tls_symbolic_operand_1 (rtx op
, enum tls_model kind
)
3780 if (GET_CODE (op
) != SYMBOL_REF
)
3782 return SYMBOL_REF_TLS_MODEL (op
) == kind
;
3786 global_dynamic_symbolic_operand (rtx op
,
3787 enum machine_mode mode ATTRIBUTE_UNUSED
)
3789 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3793 local_dynamic_symbolic_operand (rtx op
,
3794 enum machine_mode mode ATTRIBUTE_UNUSED
)
3796 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3800 initial_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3802 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3806 local_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3808 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3811 /* Test for a valid operand for a call instruction. Don't allow the
3812 arg pointer register or virtual regs since they may decay into
3813 reg + const, which the patterns can't handle. */
3816 call_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3818 /* Disallow indirect through a virtual register. This leads to
3819 compiler aborts when trying to eliminate them. */
3820 if (GET_CODE (op
) == REG
3821 && (op
== arg_pointer_rtx
3822 || op
== frame_pointer_rtx
3823 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3824 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3827 /* Disallow `call 1234'. Due to varying assembler lameness this
3828 gets either rejected or translated to `call .+1234'. */
3829 if (GET_CODE (op
) == CONST_INT
)
3832 /* Explicitly allow SYMBOL_REF even if pic. */
3833 if (GET_CODE (op
) == SYMBOL_REF
)
3836 /* Otherwise we can allow any general_operand in the address. */
3837 return general_operand (op
, Pmode
);
3840 /* Test for a valid operand for a call instruction. Don't allow the
3841 arg pointer register or virtual regs since they may decay into
3842 reg + const, which the patterns can't handle. */
3845 sibcall_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3847 /* Disallow indirect through a virtual register. This leads to
3848 compiler aborts when trying to eliminate them. */
3849 if (GET_CODE (op
) == REG
3850 && (op
== arg_pointer_rtx
3851 || op
== frame_pointer_rtx
3852 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3853 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3856 /* Explicitly allow SYMBOL_REF even if pic. */
3857 if (GET_CODE (op
) == SYMBOL_REF
)
3860 /* Otherwise we can only allow register operands. */
3861 return register_operand (op
, Pmode
);
3865 constant_call_address_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3867 if (GET_CODE (op
) == CONST
3868 && GET_CODE (XEXP (op
, 0)) == PLUS
3869 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3870 op
= XEXP (XEXP (op
, 0), 0);
3871 return GET_CODE (op
) == SYMBOL_REF
;
3874 /* Match exactly zero and one. */
3877 const0_operand (rtx op
, enum machine_mode mode
)
3879 return op
== CONST0_RTX (mode
);
3883 const1_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3885 return op
== const1_rtx
;
3888 /* Match 2, 4, or 8. Used for leal multiplicands. */
3891 const248_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3893 return (GET_CODE (op
) == CONST_INT
3894 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3898 const_0_to_3_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3900 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 4);
3904 const_0_to_7_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3906 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 8);
3910 const_0_to_15_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3912 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 16);
3916 const_0_to_255_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3918 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 256);
3922 /* True if this is a constant appropriate for an increment or decrement. */
3925 incdec_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3927 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3928 registers, since carry flag is not set. */
3929 if ((TARGET_PENTIUM4
|| TARGET_NOCONA
) && !optimize_size
)
3931 return op
== const1_rtx
|| op
== constm1_rtx
;
3934 /* Return nonzero if OP is acceptable as operand of DImode shift
3938 shiftdi_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3941 return nonimmediate_operand (op
, mode
);
3943 return register_operand (op
, mode
);
3946 /* Return false if this is the stack pointer, or any other fake
3947 register eliminable to the stack pointer. Otherwise, this is
3950 This is used to prevent esp from being used as an index reg.
3951 Which would only happen in pathological cases. */
3954 reg_no_sp_operand (rtx op
, enum machine_mode mode
)
3957 if (GET_CODE (t
) == SUBREG
)
3959 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3962 return register_operand (op
, mode
);
3966 mmx_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3968 return MMX_REG_P (op
);
3971 /* Return false if this is any eliminable register. Otherwise
3975 general_no_elim_operand (rtx op
, enum machine_mode mode
)
3978 if (GET_CODE (t
) == SUBREG
)
3980 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3981 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3982 || t
== virtual_stack_dynamic_rtx
)
3985 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3986 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3989 return general_operand (op
, mode
);
3992 /* Return false if this is any eliminable register. Otherwise
3993 register_operand or const_int. */
3996 nonmemory_no_elim_operand (rtx op
, enum machine_mode mode
)
3999 if (GET_CODE (t
) == SUBREG
)
4001 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
4002 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
4003 || t
== virtual_stack_dynamic_rtx
)
4006 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
4009 /* Return false if this is any eliminable register or stack register,
4010 otherwise work like register_operand. */
4013 index_register_operand (rtx op
, enum machine_mode mode
)
4016 if (GET_CODE (t
) == SUBREG
)
4020 if (t
== arg_pointer_rtx
4021 || t
== frame_pointer_rtx
4022 || t
== virtual_incoming_args_rtx
4023 || t
== virtual_stack_vars_rtx
4024 || t
== virtual_stack_dynamic_rtx
4025 || REGNO (t
) == STACK_POINTER_REGNUM
)
4028 return general_operand (op
, mode
);
4031 /* Return true if op is a Q_REGS class register. */
4034 q_regs_operand (rtx op
, enum machine_mode mode
)
4036 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4038 if (GET_CODE (op
) == SUBREG
)
4039 op
= SUBREG_REG (op
);
4040 return ANY_QI_REG_P (op
);
4043 /* Return true if op is an flags register. */
4046 flags_reg_operand (rtx op
, enum machine_mode mode
)
4048 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4050 return REG_P (op
) && REGNO (op
) == FLAGS_REG
&& GET_MODE (op
) != VOIDmode
;
4053 /* Return true if op is a NON_Q_REGS class register. */
4056 non_q_regs_operand (rtx op
, enum machine_mode mode
)
4058 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4060 if (GET_CODE (op
) == SUBREG
)
4061 op
= SUBREG_REG (op
);
4062 return NON_QI_REG_P (op
);
4066 zero_extended_scalar_load_operand (rtx op
,
4067 enum machine_mode mode ATTRIBUTE_UNUSED
)
4070 if (GET_CODE (op
) != MEM
)
4072 op
= maybe_get_pool_constant (op
);
4075 if (GET_CODE (op
) != CONST_VECTOR
)
4078 (GET_MODE_SIZE (GET_MODE (op
)) /
4079 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op
))));
4080 for (n_elts
--; n_elts
> 0; n_elts
--)
4082 rtx elt
= CONST_VECTOR_ELT (op
, n_elts
);
4083 if (elt
!= CONST0_RTX (GET_MODE_INNER (GET_MODE (op
))))
4089 /* Return 1 when OP is operand acceptable for standard SSE move. */
4091 vector_move_operand (rtx op
, enum machine_mode mode
)
4093 if (nonimmediate_operand (op
, mode
))
4095 if (GET_MODE (op
) != mode
&& mode
!= VOIDmode
)
4097 return (op
== CONST0_RTX (GET_MODE (op
)));
4100 /* Return true if op if a valid address, and does not contain
4101 a segment override. */
4104 no_seg_address_operand (rtx op
, enum machine_mode mode
)
4106 struct ix86_address parts
;
4108 if (! address_operand (op
, mode
))
4111 if (! ix86_decompose_address (op
, &parts
))
4114 return parts
.seg
== SEG_DEFAULT
;
4117 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4120 sse_comparison_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4122 enum rtx_code code
= GET_CODE (op
);
4125 /* Operations supported directly. */
4135 /* These are equivalent to ones above in non-IEEE comparisons. */
4142 return !TARGET_IEEE_FP
;
4147 /* Return 1 if OP is a valid comparison operator in valid mode. */
4149 ix86_comparison_operator (rtx op
, enum machine_mode mode
)
4151 enum machine_mode inmode
;
4152 enum rtx_code code
= GET_CODE (op
);
4153 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4155 if (!COMPARISON_P (op
))
4157 inmode
= GET_MODE (XEXP (op
, 0));
4159 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4161 enum rtx_code second_code
, bypass_code
;
4162 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4163 return (bypass_code
== NIL
&& second_code
== NIL
);
4170 if (inmode
== CCmode
|| inmode
== CCGCmode
4171 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
4174 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
4175 if (inmode
== CCmode
)
4179 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
4187 /* Return 1 if OP is a valid comparison operator testing carry flag
4190 ix86_carry_flag_operator (rtx op
, enum machine_mode mode
)
4192 enum machine_mode inmode
;
4193 enum rtx_code code
= GET_CODE (op
);
4195 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4197 if (!COMPARISON_P (op
))
4199 inmode
= GET_MODE (XEXP (op
, 0));
4200 if (GET_CODE (XEXP (op
, 0)) != REG
4201 || REGNO (XEXP (op
, 0)) != 17
4202 || XEXP (op
, 1) != const0_rtx
)
4205 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4207 enum rtx_code second_code
, bypass_code
;
4209 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4210 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4212 code
= ix86_fp_compare_code_to_integer (code
);
4214 else if (inmode
!= CCmode
)
4219 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4222 fcmov_comparison_operator (rtx op
, enum machine_mode mode
)
4224 enum machine_mode inmode
;
4225 enum rtx_code code
= GET_CODE (op
);
4227 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4229 if (!COMPARISON_P (op
))
4231 inmode
= GET_MODE (XEXP (op
, 0));
4232 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4234 enum rtx_code second_code
, bypass_code
;
4236 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4237 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4239 code
= ix86_fp_compare_code_to_integer (code
);
4241 /* i387 supports just limited amount of conditional codes. */
4244 case LTU
: case GTU
: case LEU
: case GEU
:
4245 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4248 case ORDERED
: case UNORDERED
:
4256 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4259 promotable_binary_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4261 switch (GET_CODE (op
))
4264 /* Modern CPUs have same latency for HImode and SImode multiply,
4265 but 386 and 486 do HImode multiply faster. */
4266 return ix86_tune
> PROCESSOR_I486
;
4278 /* Nearly general operand, but accept any const_double, since we wish
4279 to be able to drop them into memory rather than have them get pulled
4283 cmp_fp_expander_operand (rtx op
, enum machine_mode mode
)
4285 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4287 if (GET_CODE (op
) == CONST_DOUBLE
)
4289 return general_operand (op
, mode
);
4292 /* Match an SI or HImode register for a zero_extract. */
4295 ext_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4298 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
4299 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
4302 if (!register_operand (op
, VOIDmode
))
4305 /* Be careful to accept only registers having upper parts. */
4306 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
4307 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
4310 /* Return 1 if this is a valid binary floating-point operation.
4311 OP is the expression matched, and MODE is its mode. */
4314 binary_fp_operator (rtx op
, enum machine_mode mode
)
4316 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4319 switch (GET_CODE (op
))
4325 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
4333 mult_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4335 return GET_CODE (op
) == MULT
;
4339 div_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4341 return GET_CODE (op
) == DIV
;
4345 arith_or_logical_operator (rtx op
, enum machine_mode mode
)
4347 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
4348 && ARITHMETIC_P (op
));
4351 /* Returns 1 if OP is memory operand with a displacement. */
4354 memory_displacement_operand (rtx op
, enum machine_mode mode
)
4356 struct ix86_address parts
;
4358 if (! memory_operand (op
, mode
))
4361 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
4364 return parts
.disp
!= NULL_RTX
;
4367 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4368 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4370 ??? It seems likely that this will only work because cmpsi is an
4371 expander, and no actual insns use this. */
4374 cmpsi_operand (rtx op
, enum machine_mode mode
)
4376 if (nonimmediate_operand (op
, mode
))
4379 if (GET_CODE (op
) == AND
4380 && GET_MODE (op
) == SImode
4381 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
4382 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
4383 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
4384 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
4385 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
4386 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
4392 /* Returns 1 if OP is memory operand that can not be represented by the
4396 long_memory_operand (rtx op
, enum machine_mode mode
)
4398 if (! memory_operand (op
, mode
))
4401 return memory_address_length (op
) != 0;
4404 /* Return nonzero if the rtx is known aligned. */
4407 aligned_operand (rtx op
, enum machine_mode mode
)
4409 struct ix86_address parts
;
4411 if (!general_operand (op
, mode
))
4414 /* Registers and immediate operands are always "aligned". */
4415 if (GET_CODE (op
) != MEM
)
4418 /* Don't even try to do any aligned optimizations with volatiles. */
4419 if (MEM_VOLATILE_P (op
))
4424 /* Pushes and pops are only valid on the stack pointer. */
4425 if (GET_CODE (op
) == PRE_DEC
4426 || GET_CODE (op
) == POST_INC
)
4429 /* Decode the address. */
4430 if (! ix86_decompose_address (op
, &parts
))
4433 /* Look for some component that isn't known to be aligned. */
4437 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
4442 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
4447 if (GET_CODE (parts
.disp
) != CONST_INT
4448 || (INTVAL (parts
.disp
) & 3) != 0)
4452 /* Didn't find one -- this must be an aligned address. */
4456 /* Initialize the table of extra 80387 mathematical constants. */
4459 init_ext_80387_constants (void)
4461 static const char * cst
[5] =
4463 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4464 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4465 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4466 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4467 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4471 for (i
= 0; i
< 5; i
++)
4473 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4474 /* Ensure each constant is rounded to XFmode precision. */
4475 real_convert (&ext_80387_constants_table
[i
],
4476 XFmode
, &ext_80387_constants_table
[i
]);
4479 ext_80387_constants_init
= 1;
4482 /* Return true if the constant is something that can be loaded with
4483 a special instruction. */
4486 standard_80387_constant_p (rtx x
)
4488 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4491 if (x
== CONST0_RTX (GET_MODE (x
)))
4493 if (x
== CONST1_RTX (GET_MODE (x
)))
4496 /* For XFmode constants, try to find a special 80387 instruction when
4497 optimizing for size or on those CPUs that benefit from them. */
4498 if (GET_MODE (x
) == XFmode
4499 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4504 if (! ext_80387_constants_init
)
4505 init_ext_80387_constants ();
4507 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4508 for (i
= 0; i
< 5; i
++)
4509 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4516 /* Return the opcode of the special instruction to be used to load
4520 standard_80387_constant_opcode (rtx x
)
4522 switch (standard_80387_constant_p (x
))
4542 /* Return the CONST_DOUBLE representing the 80387 constant that is
4543 loaded by the specified special instruction. The argument IDX
4544 matches the return value from standard_80387_constant_p. */
4547 standard_80387_constant_rtx (int idx
)
4551 if (! ext_80387_constants_init
)
4552 init_ext_80387_constants ();
4568 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4572 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4575 standard_sse_constant_p (rtx x
)
4577 if (x
== const0_rtx
)
4579 return (x
== CONST0_RTX (GET_MODE (x
)));
4582 /* Returns 1 if OP contains a symbol reference */
4585 symbolic_reference_mentioned_p (rtx op
)
4590 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4593 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4594 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4600 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4601 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4605 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4612 /* Return 1 if it is appropriate to emit `ret' instructions in the
4613 body of a function. Do this only if the epilogue is simple, needing a
4614 couple of insns. Prior to reloading, we can't tell how many registers
4615 must be saved, so return 0 then. Return 0 if there is no frame
4616 marker to de-allocate.
4618 If NON_SAVING_SETJMP is defined and true, then it is not possible
4619 for the epilogue to be simple, so return 0. This is a special case
4620 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4621 until final, but jump_optimize may need to know sooner if a
4625 ix86_can_use_return_insn_p (void)
4627 struct ix86_frame frame
;
4629 #ifdef NON_SAVING_SETJMP
4630 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
4634 if (! reload_completed
|| frame_pointer_needed
)
4637 /* Don't allow more than 32 pop, since that's all we can do
4638 with one instruction. */
4639 if (current_function_pops_args
4640 && current_function_args_size
>= 32768)
4643 ix86_compute_frame_layout (&frame
);
4644 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4647 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4649 x86_64_sign_extended_value (rtx value
)
4651 switch (GET_CODE (value
))
4653 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4654 to be at least 32 and this all acceptable constants are
4655 represented as CONST_INT. */
4657 if (HOST_BITS_PER_WIDE_INT
== 32)
4661 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
4662 return trunc_int_for_mode (val
, SImode
) == val
;
4666 /* For certain code models, the symbolic references are known to fit.
4667 in CM_SMALL_PIC model we know it fits if it is local to the shared
4668 library. Don't count TLS SYMBOL_REFs here, since they should fit
4669 only if inside of UNSPEC handled below. */
4671 /* TLS symbols are not constant. */
4672 if (tls_symbolic_operand (value
, Pmode
))
4674 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
4676 /* For certain code models, the code is near as well. */
4678 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
4679 || ix86_cmodel
== CM_KERNEL
);
4681 /* We also may accept the offsetted memory references in certain special
4684 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
4685 switch (XINT (XEXP (value
, 0), 1))
4687 case UNSPEC_GOTPCREL
:
4689 case UNSPEC_GOTNTPOFF
:
4695 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4697 rtx op1
= XEXP (XEXP (value
, 0), 0);
4698 rtx op2
= XEXP (XEXP (value
, 0), 1);
4699 HOST_WIDE_INT offset
;
4701 if (ix86_cmodel
== CM_LARGE
)
4703 if (GET_CODE (op2
) != CONST_INT
)
4705 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
4706 switch (GET_CODE (op1
))
4709 /* For CM_SMALL assume that latest object is 16MB before
4710 end of 31bits boundary. We may also accept pretty
4711 large negative constants knowing that all objects are
4712 in the positive half of address space. */
4713 if (ix86_cmodel
== CM_SMALL
4714 && offset
< 16*1024*1024
4715 && trunc_int_for_mode (offset
, SImode
) == offset
)
4717 /* For CM_KERNEL we know that all object resist in the
4718 negative half of 32bits address space. We may not
4719 accept negative offsets, since they may be just off
4720 and we may accept pretty large positive ones. */
4721 if (ix86_cmodel
== CM_KERNEL
4723 && trunc_int_for_mode (offset
, SImode
) == offset
)
4727 /* These conditions are similar to SYMBOL_REF ones, just the
4728 constraints for code models differ. */
4729 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4730 && offset
< 16*1024*1024
4731 && trunc_int_for_mode (offset
, SImode
) == offset
)
4733 if (ix86_cmodel
== CM_KERNEL
4735 && trunc_int_for_mode (offset
, SImode
) == offset
)
4739 switch (XINT (op1
, 1))
4744 && trunc_int_for_mode (offset
, SImode
) == offset
)
4758 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4760 x86_64_zero_extended_value (rtx value
)
4762 switch (GET_CODE (value
))
4765 if (HOST_BITS_PER_WIDE_INT
== 32)
4766 return (GET_MODE (value
) == VOIDmode
4767 && !CONST_DOUBLE_HIGH (value
));
4771 if (HOST_BITS_PER_WIDE_INT
== 32)
4772 return INTVAL (value
) >= 0;
4774 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
4777 /* For certain code models, the symbolic references are known to fit. */
4779 /* TLS symbols are not constant. */
4780 if (tls_symbolic_operand (value
, Pmode
))
4782 return ix86_cmodel
== CM_SMALL
;
4784 /* For certain code models, the code is near as well. */
4786 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
4788 /* We also may accept the offsetted memory references in certain special
4791 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4793 rtx op1
= XEXP (XEXP (value
, 0), 0);
4794 rtx op2
= XEXP (XEXP (value
, 0), 1);
4796 if (ix86_cmodel
== CM_LARGE
)
4798 switch (GET_CODE (op1
))
4802 /* For small code model we may accept pretty large positive
4803 offsets, since one bit is available for free. Negative
4804 offsets are limited by the size of NULL pointer area
4805 specified by the ABI. */
4806 if (ix86_cmodel
== CM_SMALL
4807 && GET_CODE (op2
) == CONST_INT
4808 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4809 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4812 /* ??? For the kernel, we may accept adjustment of
4813 -0x10000000, since we know that it will just convert
4814 negative address space to positive, but perhaps this
4815 is not worthwhile. */
4818 /* These conditions are similar to SYMBOL_REF ones, just the
4819 constraints for code models differ. */
4820 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4821 && GET_CODE (op2
) == CONST_INT
4822 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4823 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4837 /* Value should be nonzero if functions must have frame pointers.
4838 Zero means the frame pointer need not be set up (and parms may
4839 be accessed via the stack pointer) in functions that seem suitable. */
4842 ix86_frame_pointer_required (void)
4844 /* If we accessed previous frames, then the generated code expects
4845 to be able to access the saved ebp value in our frame. */
4846 if (cfun
->machine
->accesses_prev_frame
)
4849 /* Several x86 os'es need a frame pointer for other reasons,
4850 usually pertaining to setjmp. */
4851 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4854 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4855 the frame pointer by default. Turn it back on now if we've not
4856 got a leaf function. */
4857 if (TARGET_OMIT_LEAF_FRAME_POINTER
4858 && (!current_function_is_leaf
))
4861 if (current_function_profile
)
4867 /* Record that the current function accesses previous call frames. */
4870 ix86_setup_frame_addresses (void)
4872 cfun
->machine
->accesses_prev_frame
= 1;
4875 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4876 # define USE_HIDDEN_LINKONCE 1
4878 # define USE_HIDDEN_LINKONCE 0
4881 static int pic_labels_used
;
4883 /* Fills in the label name that should be used for a pc thunk for
4884 the given register. */
4887 get_pc_thunk_name (char name
[32], unsigned int regno
)
4889 if (USE_HIDDEN_LINKONCE
)
4890 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4892 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4896 /* This function generates code for -fpic that loads %ebx with
4897 the return address of the caller and then returns. */
4900 ix86_file_end (void)
4905 for (regno
= 0; regno
< 8; ++regno
)
4909 if (! ((pic_labels_used
>> regno
) & 1))
4912 get_pc_thunk_name (name
, regno
);
4914 if (USE_HIDDEN_LINKONCE
)
4918 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4920 TREE_PUBLIC (decl
) = 1;
4921 TREE_STATIC (decl
) = 1;
4922 DECL_ONE_ONLY (decl
) = 1;
4924 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4925 named_section (decl
, NULL
, 0);
4927 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4928 fputs ("\t.hidden\t", asm_out_file
);
4929 assemble_name (asm_out_file
, name
);
4930 fputc ('\n', asm_out_file
);
4931 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4936 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4939 xops
[0] = gen_rtx_REG (SImode
, regno
);
4940 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4941 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4942 output_asm_insn ("ret", xops
);
4945 if (NEED_INDICATE_EXEC_STACK
)
4946 file_end_indicate_exec_stack ();
4949 /* Emit code for the SET_GOT patterns. */
4952 output_set_got (rtx dest
)
4957 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4959 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4961 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4964 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4966 output_asm_insn ("call\t%a2", xops
);
4969 /* Output the "canonical" label name ("Lxx$pb") here too. This
4970 is what will be referred to by the Mach-O PIC subsystem. */
4971 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4973 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4974 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4977 output_asm_insn ("pop{l}\t%0", xops
);
4982 get_pc_thunk_name (name
, REGNO (dest
));
4983 pic_labels_used
|= 1 << REGNO (dest
);
4985 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4986 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4987 output_asm_insn ("call\t%X2", xops
);
4990 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4991 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4992 else if (!TARGET_MACHO
)
4993 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4998 /* Generate an "push" pattern for input ARG. */
5003 return gen_rtx_SET (VOIDmode
,
5005 gen_rtx_PRE_DEC (Pmode
,
5006 stack_pointer_rtx
)),
5010 /* Return >= 0 if there is an unused call-clobbered register available
5011 for the entire function. */
5014 ix86_select_alt_pic_regnum (void)
5016 if (current_function_is_leaf
&& !current_function_profile
)
5019 for (i
= 2; i
>= 0; --i
)
5020 if (!regs_ever_live
[i
])
5024 return INVALID_REGNUM
;
5027 /* Return 1 if we need to save REGNO. */
5029 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5031 if (pic_offset_table_rtx
5032 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5033 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5034 || current_function_profile
5035 || current_function_calls_eh_return
5036 || current_function_uses_const_pool
))
5038 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5043 if (current_function_calls_eh_return
&& maybe_eh_return
)
5048 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5049 if (test
== INVALID_REGNUM
)
5056 return (regs_ever_live
[regno
]
5057 && !call_used_regs
[regno
]
5058 && !fixed_regs
[regno
]
5059 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5062 /* Return number of registers to be saved on the stack. */
5065 ix86_nsaved_regs (void)
5070 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5071 if (ix86_save_reg (regno
, true))
5076 /* Return the offset between two registers, one to be eliminated, and the other
5077 its replacement, at the start of a routine. */
5080 ix86_initial_elimination_offset (int from
, int to
)
5082 struct ix86_frame frame
;
5083 ix86_compute_frame_layout (&frame
);
5085 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5086 return frame
.hard_frame_pointer_offset
;
5087 else if (from
== FRAME_POINTER_REGNUM
5088 && to
== HARD_FRAME_POINTER_REGNUM
)
5089 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5092 if (to
!= STACK_POINTER_REGNUM
)
5094 else if (from
== ARG_POINTER_REGNUM
)
5095 return frame
.stack_pointer_offset
;
5096 else if (from
!= FRAME_POINTER_REGNUM
)
5099 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5103 /* Fill structure ix86_frame about frame of currently computed function. */
5106 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5108 HOST_WIDE_INT total_size
;
5109 unsigned int stack_alignment_needed
;
5110 HOST_WIDE_INT offset
;
5111 unsigned int preferred_alignment
;
5112 HOST_WIDE_INT size
= get_frame_size ();
5114 frame
->nregs
= ix86_nsaved_regs ();
5117 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5118 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5120 /* During reload iteration the amount of registers saved can change.
5121 Recompute the value as needed. Do not recompute when amount of registers
5122 didn't change as reload does mutiple calls to the function and does not
5123 expect the decision to change within single iteration. */
5125 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5127 int count
= frame
->nregs
;
5129 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5130 /* The fast prologue uses move instead of push to save registers. This
5131 is significantly longer, but also executes faster as modern hardware
5132 can execute the moves in parallel, but can't do that for push/pop.
5134 Be careful about choosing what prologue to emit: When function takes
5135 many instructions to execute we may use slow version as well as in
5136 case function is known to be outside hot spot (this is known with
5137 feedback only). Weight the size of function by number of registers
5138 to save as it is cheap to use one or two push instructions but very
5139 slow to use many of them. */
5141 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5142 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5143 || (flag_branch_probabilities
5144 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5145 cfun
->machine
->use_fast_prologue_epilogue
= false;
5147 cfun
->machine
->use_fast_prologue_epilogue
5148 = !expensive_function_p (count
);
5150 if (TARGET_PROLOGUE_USING_MOVE
5151 && cfun
->machine
->use_fast_prologue_epilogue
)
5152 frame
->save_regs_using_mov
= true;
5154 frame
->save_regs_using_mov
= false;
5157 /* Skip return address and saved base pointer. */
5158 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5160 frame
->hard_frame_pointer_offset
= offset
;
5162 /* Do some sanity checking of stack_alignment_needed and
5163 preferred_alignment, since i386 port is the only using those features
5164 that may break easily. */
5166 if (size
&& !stack_alignment_needed
)
5168 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5170 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5172 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5175 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5176 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5178 /* Register save area */
5179 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5182 if (ix86_save_varrargs_registers
)
5184 offset
+= X86_64_VARARGS_SIZE
;
5185 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5188 frame
->va_arg_size
= 0;
5190 /* Align start of frame for local function. */
5191 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5192 & -stack_alignment_needed
) - offset
;
5194 offset
+= frame
->padding1
;
5196 /* Frame pointer points here. */
5197 frame
->frame_pointer_offset
= offset
;
5201 /* Add outgoing arguments area. Can be skipped if we eliminated
5202 all the function calls as dead code.
5203 Skipping is however impossible when function calls alloca. Alloca
5204 expander assumes that last current_function_outgoing_args_size
5205 of stack frame are unused. */
5206 if (ACCUMULATE_OUTGOING_ARGS
5207 && (!current_function_is_leaf
|| current_function_calls_alloca
))
5209 offset
+= current_function_outgoing_args_size
;
5210 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5213 frame
->outgoing_arguments_size
= 0;
5215 /* Align stack boundary. Only needed if we're calling another function
5217 if (!current_function_is_leaf
|| current_function_calls_alloca
)
5218 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5219 & -preferred_alignment
) - offset
;
5221 frame
->padding2
= 0;
5223 offset
+= frame
->padding2
;
5225 /* We've reached end of stack frame. */
5226 frame
->stack_pointer_offset
= offset
;
5228 /* Size prologue needs to allocate. */
5229 frame
->to_allocate
=
5230 (size
+ frame
->padding1
+ frame
->padding2
5231 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5233 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5234 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5235 frame
->save_regs_using_mov
= false;
5237 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5238 && current_function_is_leaf
)
5240 frame
->red_zone_size
= frame
->to_allocate
;
5241 if (frame
->save_regs_using_mov
)
5242 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5243 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5244 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5247 frame
->red_zone_size
= 0;
5248 frame
->to_allocate
-= frame
->red_zone_size
;
5249 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5251 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5252 fprintf (stderr
, "size: %i\n", size
);
5253 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5254 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5255 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5256 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5257 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5258 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5259 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5260 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5261 frame
->hard_frame_pointer_offset
);
5262 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5266 /* Emit code to save registers in the prologue. */
5269 ix86_emit_save_regs (void)
5274 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5275 if (ix86_save_reg (regno
, true))
5277 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5278 RTX_FRAME_RELATED_P (insn
) = 1;
5282 /* Emit code to save registers using MOV insns. First register
5283 is restored from POINTER + OFFSET. */
5285 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5290 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5291 if (ix86_save_reg (regno
, true))
5293 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5295 gen_rtx_REG (Pmode
, regno
));
5296 RTX_FRAME_RELATED_P (insn
) = 1;
5297 offset
+= UNITS_PER_WORD
;
5301 /* Expand prologue or epilogue stack adjustment.
5302 The pattern exist to put a dependency on all ebp-based memory accesses.
5303 STYLE should be negative if instructions should be marked as frame related,
5304 zero if %r11 register is live and cannot be freely used and positive
5308 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5313 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5314 else if (x86_64_immediate_operand (offset
, DImode
))
5315 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5319 /* r11 is used by indirect sibcall return as well, set before the
5320 epilogue and used after the epilogue. ATM indirect sibcall
5321 shouldn't be used together with huge frame sizes in one
5322 function because of the frame_size check in sibcall.c. */
5325 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5326 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5328 RTX_FRAME_RELATED_P (insn
) = 1;
5329 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5333 RTX_FRAME_RELATED_P (insn
) = 1;
5336 /* Expand the prologue into a bunch of separate insns. */
5339 ix86_expand_prologue (void)
5343 struct ix86_frame frame
;
5344 HOST_WIDE_INT allocate
;
5346 ix86_compute_frame_layout (&frame
);
5348 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5349 slower on all targets. Also sdb doesn't like it. */
5351 if (frame_pointer_needed
)
5353 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5354 RTX_FRAME_RELATED_P (insn
) = 1;
5356 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5357 RTX_FRAME_RELATED_P (insn
) = 1;
5360 allocate
= frame
.to_allocate
;
5362 if (!frame
.save_regs_using_mov
)
5363 ix86_emit_save_regs ();
5365 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5367 /* When using red zone we may start register saving before allocating
5368 the stack frame saving one cycle of the prologue. */
5369 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5370 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5371 : stack_pointer_rtx
,
5372 -frame
.nregs
* UNITS_PER_WORD
);
5376 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5377 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5378 GEN_INT (-allocate
), -1);
5381 /* Only valid for Win32. */
5382 rtx eax
= gen_rtx_REG (SImode
, 0);
5383 bool eax_live
= ix86_eax_live_at_start_p ();
5390 emit_insn (gen_push (eax
));
5394 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
5395 RTX_FRAME_RELATED_P (insn
) = 1;
5397 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5398 RTX_FRAME_RELATED_P (insn
) = 1;
5403 if (frame_pointer_needed
)
5404 t
= plus_constant (hard_frame_pointer_rtx
,
5407 - frame
.nregs
* UNITS_PER_WORD
);
5409 t
= plus_constant (stack_pointer_rtx
, allocate
);
5410 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5414 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5416 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5417 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5419 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5420 -frame
.nregs
* UNITS_PER_WORD
);
5423 pic_reg_used
= false;
5424 if (pic_offset_table_rtx
5425 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5426 || current_function_profile
))
5428 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5430 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5431 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5433 pic_reg_used
= true;
5438 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5440 /* Even with accurate pre-reload life analysis, we can wind up
5441 deleting all references to the pic register after reload.
5442 Consider if cross-jumping unifies two sides of a branch
5443 controlled by a comparison vs the only read from a global.
5444 In which case, allow the set_got to be deleted, though we're
5445 too late to do anything about the ebx save in the prologue. */
5446 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5449 /* Prevent function calls from be scheduled before the call to mcount.
5450 In the pic_reg_used case, make sure that the got load isn't deleted. */
5451 if (current_function_profile
)
5452 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5455 /* Emit code to restore saved registers using MOV insns. First register
5456 is restored from POINTER + OFFSET. */
5458 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5459 int maybe_eh_return
)
5462 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5464 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5465 if (ix86_save_reg (regno
, maybe_eh_return
))
5467 /* Ensure that adjust_address won't be forced to produce pointer
5468 out of range allowed by x86-64 instruction set. */
5469 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5473 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5474 emit_move_insn (r11
, GEN_INT (offset
));
5475 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5476 base_address
= gen_rtx_MEM (Pmode
, r11
);
5479 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5480 adjust_address (base_address
, Pmode
, offset
));
5481 offset
+= UNITS_PER_WORD
;
5485 /* Restore function stack, frame, and registers. */
5488 ix86_expand_epilogue (int style
)
5491 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5492 struct ix86_frame frame
;
5493 HOST_WIDE_INT offset
;
5495 ix86_compute_frame_layout (&frame
);
5497 /* Calculate start of saved registers relative to ebp. Special care
5498 must be taken for the normal return case of a function using
5499 eh_return: the eax and edx registers are marked as saved, but not
5500 restored along this path. */
5501 offset
= frame
.nregs
;
5502 if (current_function_calls_eh_return
&& style
!= 2)
5504 offset
*= -UNITS_PER_WORD
;
5506 /* If we're only restoring one register and sp is not valid then
5507 using a move instruction to restore the register since it's
5508 less work than reloading sp and popping the register.
5510 The default code result in stack adjustment using add/lea instruction,
5511 while this code results in LEAVE instruction (or discrete equivalent),
5512 so it is profitable in some other cases as well. Especially when there
5513 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5514 and there is exactly one register to pop. This heuristic may need some
5515 tuning in future. */
5516 if ((!sp_valid
&& frame
.nregs
<= 1)
5517 || (TARGET_EPILOGUE_USING_MOVE
5518 && cfun
->machine
->use_fast_prologue_epilogue
5519 && (frame
.nregs
> 1 || frame
.to_allocate
))
5520 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5521 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5522 && cfun
->machine
->use_fast_prologue_epilogue
5523 && frame
.nregs
== 1)
5524 || current_function_calls_eh_return
)
5526 /* Restore registers. We can use ebp or esp to address the memory
5527 locations. If both are available, default to ebp, since offsets
5528 are known to be small. Only exception is esp pointing directly to the
5529 end of block of saved registers, where we may simplify addressing
5532 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5533 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5534 frame
.to_allocate
, style
== 2);
5536 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5537 offset
, style
== 2);
5539 /* eh_return epilogues need %ecx added to the stack pointer. */
5542 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5544 if (frame_pointer_needed
)
5546 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5547 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5548 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5550 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5551 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5553 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5558 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5559 tmp
= plus_constant (tmp
, (frame
.to_allocate
5560 + frame
.nregs
* UNITS_PER_WORD
));
5561 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5564 else if (!frame_pointer_needed
)
5565 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5566 GEN_INT (frame
.to_allocate
5567 + frame
.nregs
* UNITS_PER_WORD
),
5569 /* If not an i386, mov & pop is faster than "leave". */
5570 else if (TARGET_USE_LEAVE
|| optimize_size
5571 || !cfun
->machine
->use_fast_prologue_epilogue
)
5572 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5575 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5576 hard_frame_pointer_rtx
,
5579 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5581 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5586 /* First step is to deallocate the stack frame so that we can
5587 pop the registers. */
5590 if (!frame_pointer_needed
)
5592 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5593 hard_frame_pointer_rtx
,
5594 GEN_INT (offset
), style
);
5596 else if (frame
.to_allocate
)
5597 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5598 GEN_INT (frame
.to_allocate
), style
);
5600 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5601 if (ix86_save_reg (regno
, false))
5604 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5606 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5608 if (frame_pointer_needed
)
5610 /* Leave results in shorter dependency chains on CPUs that are
5611 able to grok it fast. */
5612 if (TARGET_USE_LEAVE
)
5613 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5614 else if (TARGET_64BIT
)
5615 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5617 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5621 /* Sibcall epilogues don't want a return instruction. */
5625 if (current_function_pops_args
&& current_function_args_size
)
5627 rtx popc
= GEN_INT (current_function_pops_args
);
5629 /* i386 can only pop 64K bytes. If asked to pop more, pop
5630 return address, do explicit add, and jump indirectly to the
5633 if (current_function_pops_args
>= 65536)
5635 rtx ecx
= gen_rtx_REG (SImode
, 2);
5637 /* There is no "pascal" calling convention in 64bit ABI. */
5641 emit_insn (gen_popsi1 (ecx
));
5642 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5643 emit_jump_insn (gen_return_indirect_internal (ecx
));
5646 emit_jump_insn (gen_return_pop_internal (popc
));
5649 emit_jump_insn (gen_return_internal ());
5652 /* Reset from the function's potential modifications. */
5655 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5656 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5658 if (pic_offset_table_rtx
)
5659 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5662 /* Extract the parts of an RTL expression that is a valid memory address
5663 for an instruction. Return 0 if the structure of the address is
5664 grossly off. Return -1 if the address contains ASHIFT, so it is not
5665 strictly valid, but still used for computing length of lea instruction. */
5668 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5670 rtx base
= NULL_RTX
;
5671 rtx index
= NULL_RTX
;
5672 rtx disp
= NULL_RTX
;
5673 HOST_WIDE_INT scale
= 1;
5674 rtx scale_rtx
= NULL_RTX
;
5676 enum ix86_address_seg seg
= SEG_DEFAULT
;
5678 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5680 else if (GET_CODE (addr
) == PLUS
)
5690 addends
[n
++] = XEXP (op
, 1);
5693 while (GET_CODE (op
) == PLUS
);
5698 for (i
= n
; i
>= 0; --i
)
5701 switch (GET_CODE (op
))
5706 index
= XEXP (op
, 0);
5707 scale_rtx
= XEXP (op
, 1);
5711 if (XINT (op
, 1) == UNSPEC_TP
5712 && TARGET_TLS_DIRECT_SEG_REFS
5713 && seg
== SEG_DEFAULT
)
5714 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5743 else if (GET_CODE (addr
) == MULT
)
5745 index
= XEXP (addr
, 0); /* index*scale */
5746 scale_rtx
= XEXP (addr
, 1);
5748 else if (GET_CODE (addr
) == ASHIFT
)
5752 /* We're called for lea too, which implements ashift on occasion. */
5753 index
= XEXP (addr
, 0);
5754 tmp
= XEXP (addr
, 1);
5755 if (GET_CODE (tmp
) != CONST_INT
)
5757 scale
= INTVAL (tmp
);
5758 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5764 disp
= addr
; /* displacement */
5766 /* Extract the integral value of scale. */
5769 if (GET_CODE (scale_rtx
) != CONST_INT
)
5771 scale
= INTVAL (scale_rtx
);
5774 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5775 if (base
&& index
&& scale
== 1
5776 && (index
== arg_pointer_rtx
5777 || index
== frame_pointer_rtx
5778 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
5785 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5786 if ((base
== hard_frame_pointer_rtx
5787 || base
== frame_pointer_rtx
5788 || base
== arg_pointer_rtx
) && !disp
)
5791 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5792 Avoid this by transforming to [%esi+0]. */
5793 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5794 && base
&& !index
&& !disp
5796 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
5799 /* Special case: encode reg+reg instead of reg*2. */
5800 if (!base
&& index
&& scale
&& scale
== 2)
5801 base
= index
, scale
= 1;
5803 /* Special case: scaling cannot be encoded without base or displacement. */
5804 if (!base
&& !disp
&& index
&& scale
!= 1)
5816 /* Return cost of the memory address x.
5817 For i386, it is better to use a complex address than let gcc copy
5818 the address into a reg and make a new pseudo. But not if the address
5819 requires to two regs - that would mean more pseudos with longer
5822 ix86_address_cost (rtx x
)
5824 struct ix86_address parts
;
5827 if (!ix86_decompose_address (x
, &parts
))
5830 /* More complex memory references are better. */
5831 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5833 if (parts
.seg
!= SEG_DEFAULT
)
5836 /* Attempt to minimize number of registers in the address. */
5838 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5840 && (!REG_P (parts
.index
)
5841 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5845 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5847 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5848 && parts
.base
!= parts
.index
)
5851 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5852 since it's predecode logic can't detect the length of instructions
5853 and it degenerates to vector decoded. Increase cost of such
5854 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5855 to split such addresses or even refuse such addresses at all.
5857 Following addressing modes are affected:
5862 The first and last case may be avoidable by explicitly coding the zero in
5863 memory address, but I don't have AMD-K6 machine handy to check this
5867 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5868 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5869 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5875 /* If X is a machine specific address (i.e. a symbol or label being
5876 referenced as a displacement from the GOT implemented using an
5877 UNSPEC), then return the base term. Otherwise return X. */
5880 ix86_find_base_term (rtx x
)
5886 if (GET_CODE (x
) != CONST
)
5889 if (GET_CODE (term
) == PLUS
5890 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5891 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5892 term
= XEXP (term
, 0);
5893 if (GET_CODE (term
) != UNSPEC
5894 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5897 term
= XVECEXP (term
, 0, 0);
5899 if (GET_CODE (term
) != SYMBOL_REF
5900 && GET_CODE (term
) != LABEL_REF
)
5906 term
= ix86_delegitimize_address (x
);
5908 if (GET_CODE (term
) != SYMBOL_REF
5909 && GET_CODE (term
) != LABEL_REF
)
5915 /* Determine if a given RTX is a valid constant. We already know this
5916 satisfies CONSTANT_P. */
5919 legitimate_constant_p (rtx x
)
5923 switch (GET_CODE (x
))
5926 /* TLS symbols are not constant. */
5927 if (tls_symbolic_operand (x
, Pmode
))
5932 inner
= XEXP (x
, 0);
5934 /* Offsets of TLS symbols are never valid.
5935 Discourage CSE from creating them. */
5936 if (GET_CODE (inner
) == PLUS
5937 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5940 if (GET_CODE (inner
) == PLUS
5941 || GET_CODE (inner
) == MINUS
)
5943 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
5945 inner
= XEXP (inner
, 0);
5948 /* Only some unspecs are valid as "constants". */
5949 if (GET_CODE (inner
) == UNSPEC
)
5950 switch (XINT (inner
, 1))
5954 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5956 return local_dynamic_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5966 /* Otherwise we handle everything else in the move patterns. */
5970 /* Determine if it's legal to put X into the constant pool. This
5971 is not possible for the address of thread-local symbols, which
5972 is checked above. */
5975 ix86_cannot_force_const_mem (rtx x
)
5977 return !legitimate_constant_p (x
);
5980 /* Determine if a given RTX is a valid constant address. */
5983 constant_address_p (rtx x
)
5985 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5988 /* Nonzero if the constant value X is a legitimate general operand
5989 when generating PIC code. It is given that flag_pic is on and
5990 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5993 legitimate_pic_operand_p (rtx x
)
5997 switch (GET_CODE (x
))
6000 inner
= XEXP (x
, 0);
6002 /* Only some unspecs are valid as "constants". */
6003 if (GET_CODE (inner
) == UNSPEC
)
6004 switch (XINT (inner
, 1))
6007 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
6015 return legitimate_pic_address_disp_p (x
);
6022 /* Determine if a given CONST RTX is a valid memory displacement
6026 legitimate_pic_address_disp_p (rtx disp
)
6030 /* In 64bit mode we can allow direct addresses of symbols and labels
6031 when they are not dynamic symbols. */
6034 /* TLS references should always be enclosed in UNSPEC. */
6035 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
6037 if (GET_CODE (disp
) == SYMBOL_REF
6038 && ix86_cmodel
== CM_SMALL_PIC
6039 && SYMBOL_REF_LOCAL_P (disp
))
6041 if (GET_CODE (disp
) == LABEL_REF
)
6043 if (GET_CODE (disp
) == CONST
6044 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
6046 rtx op0
= XEXP (XEXP (disp
, 0), 0);
6047 rtx op1
= XEXP (XEXP (disp
, 0), 1);
6049 /* TLS references should always be enclosed in UNSPEC. */
6050 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
6052 if (((GET_CODE (op0
) == SYMBOL_REF
6053 && ix86_cmodel
== CM_SMALL_PIC
6054 && SYMBOL_REF_LOCAL_P (op0
))
6055 || GET_CODE (op0
) == LABEL_REF
)
6056 && GET_CODE (op1
) == CONST_INT
6057 && INTVAL (op1
) < 16*1024*1024
6058 && INTVAL (op1
) >= -16*1024*1024)
6062 if (GET_CODE (disp
) != CONST
)
6064 disp
= XEXP (disp
, 0);
6068 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6069 of GOT tables. We should not need these anyway. */
6070 if (GET_CODE (disp
) != UNSPEC
6071 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
6074 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6075 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6081 if (GET_CODE (disp
) == PLUS
)
6083 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6085 disp
= XEXP (disp
, 0);
6089 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6090 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
6092 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6093 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6094 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6096 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6097 if (! strcmp (sym_name
, "<pic base>"))
6102 if (GET_CODE (disp
) != UNSPEC
)
6105 switch (XINT (disp
, 1))
6110 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6112 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6113 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6114 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6116 case UNSPEC_GOTTPOFF
:
6117 case UNSPEC_GOTNTPOFF
:
6118 case UNSPEC_INDNTPOFF
:
6121 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6123 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6125 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6131 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6132 memory address for an instruction. The MODE argument is the machine mode
6133 for the MEM expression that wants to use this address.
6135 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6136 convert common non-canonical forms to canonical form so that they will
6140 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6142 struct ix86_address parts
;
6143 rtx base
, index
, disp
;
6144 HOST_WIDE_INT scale
;
6145 const char *reason
= NULL
;
6146 rtx reason_rtx
= NULL_RTX
;
6148 if (TARGET_DEBUG_ADDR
)
6151 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6152 GET_MODE_NAME (mode
), strict
);
6156 if (ix86_decompose_address (addr
, &parts
) <= 0)
6158 reason
= "decomposition failed";
6163 index
= parts
.index
;
6165 scale
= parts
.scale
;
6167 /* Validate base register.
6169 Don't allow SUBREG's here, it can lead to spill failures when the base
6170 is one word out of a two word structure, which is represented internally
6177 if (GET_CODE (base
) != REG
)
6179 reason
= "base is not a register";
6183 if (GET_MODE (base
) != Pmode
)
6185 reason
= "base is not in Pmode";
6189 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
6190 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
6192 reason
= "base is not valid";
6197 /* Validate index register.
6199 Don't allow SUBREG's here, it can lead to spill failures when the index
6200 is one word out of a two word structure, which is represented internally
6207 if (GET_CODE (index
) != REG
)
6209 reason
= "index is not a register";
6213 if (GET_MODE (index
) != Pmode
)
6215 reason
= "index is not in Pmode";
6219 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
6220 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
6222 reason
= "index is not valid";
6227 /* Validate scale factor. */
6230 reason_rtx
= GEN_INT (scale
);
6233 reason
= "scale without index";
6237 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6239 reason
= "scale is not a valid multiplier";
6244 /* Validate displacement. */
6249 if (GET_CODE (disp
) == CONST
6250 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6251 switch (XINT (XEXP (disp
, 0), 1))
6255 case UNSPEC_GOTPCREL
:
6258 goto is_legitimate_pic
;
6260 case UNSPEC_GOTTPOFF
:
6261 case UNSPEC_GOTNTPOFF
:
6262 case UNSPEC_INDNTPOFF
:
6268 reason
= "invalid address unspec";
6272 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
6274 && !machopic_operand_p (disp
)
6279 if (TARGET_64BIT
&& (index
|| base
))
6281 /* foo@dtpoff(%rX) is ok. */
6282 if (GET_CODE (disp
) != CONST
6283 || GET_CODE (XEXP (disp
, 0)) != PLUS
6284 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6285 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6286 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6287 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6289 reason
= "non-constant pic memory reference";
6293 else if (! legitimate_pic_address_disp_p (disp
))
6295 reason
= "displacement is an invalid pic construct";
6299 /* This code used to verify that a symbolic pic displacement
6300 includes the pic_offset_table_rtx register.
6302 While this is good idea, unfortunately these constructs may
6303 be created by "adds using lea" optimization for incorrect
6312 This code is nonsensical, but results in addressing
6313 GOT table with pic_offset_table_rtx base. We can't
6314 just refuse it easily, since it gets matched by
6315 "addsi3" pattern, that later gets split to lea in the
6316 case output register differs from input. While this
6317 can be handled by separate addsi pattern for this case
6318 that never results in lea, this seems to be easier and
6319 correct fix for crash to disable this test. */
6321 else if (GET_CODE (disp
) != LABEL_REF
6322 && GET_CODE (disp
) != CONST_INT
6323 && (GET_CODE (disp
) != CONST
6324 || !legitimate_constant_p (disp
))
6325 && (GET_CODE (disp
) != SYMBOL_REF
6326 || !legitimate_constant_p (disp
)))
6328 reason
= "displacement is not constant";
6331 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
6333 reason
= "displacement is out of range";
6338 /* Everything looks valid. */
6339 if (TARGET_DEBUG_ADDR
)
6340 fprintf (stderr
, "Success.\n");
6344 if (TARGET_DEBUG_ADDR
)
6346 fprintf (stderr
, "Error: %s\n", reason
);
6347 debug_rtx (reason_rtx
);
6352 /* Return an unique alias set for the GOT. */
6354 static HOST_WIDE_INT
6355 ix86_GOT_alias_set (void)
6357 static HOST_WIDE_INT set
= -1;
6359 set
= new_alias_set ();
6363 /* Return a legitimate reference for ORIG (an address) using the
6364 register REG. If REG is 0, a new pseudo is generated.
6366 There are two types of references that must be handled:
6368 1. Global data references must load the address from the GOT, via
6369 the PIC reg. An insn is emitted to do this load, and the reg is
6372 2. Static data references, constant pool addresses, and code labels
6373 compute the address as an offset from the GOT, whose base is in
6374 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6375 differentiate them from global data objects. The returned
6376 address is the PIC reg + an unspec constant.
6378 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6379 reg also appears in the address. */
6382 legitimize_pic_address (rtx orig
, rtx reg
)
6390 reg
= gen_reg_rtx (Pmode
);
6391 /* Use the generic Mach-O PIC machinery. */
6392 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6395 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6397 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6399 /* This symbol may be referenced via a displacement from the PIC
6400 base address (@GOTOFF). */
6402 if (reload_in_progress
)
6403 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6404 if (GET_CODE (addr
) == CONST
)
6405 addr
= XEXP (addr
, 0);
6406 if (GET_CODE (addr
) == PLUS
)
6408 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6409 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6412 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6413 new = gen_rtx_CONST (Pmode
, new);
6414 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6418 emit_move_insn (reg
, new);
6422 else if (GET_CODE (addr
) == SYMBOL_REF
)
6426 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6427 new = gen_rtx_CONST (Pmode
, new);
6428 new = gen_rtx_MEM (Pmode
, new);
6429 RTX_UNCHANGING_P (new) = 1;
6430 set_mem_alias_set (new, ix86_GOT_alias_set ());
6433 reg
= gen_reg_rtx (Pmode
);
6434 /* Use directly gen_movsi, otherwise the address is loaded
6435 into register for CSE. We don't want to CSE this addresses,
6436 instead we CSE addresses from the GOT table, so skip this. */
6437 emit_insn (gen_movsi (reg
, new));
6442 /* This symbol must be referenced via a load from the
6443 Global Offset Table (@GOT). */
6445 if (reload_in_progress
)
6446 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6447 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6448 new = gen_rtx_CONST (Pmode
, new);
6449 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6450 new = gen_rtx_MEM (Pmode
, new);
6451 RTX_UNCHANGING_P (new) = 1;
6452 set_mem_alias_set (new, ix86_GOT_alias_set ());
6455 reg
= gen_reg_rtx (Pmode
);
6456 emit_move_insn (reg
, new);
6462 if (GET_CODE (addr
) == CONST
)
6464 addr
= XEXP (addr
, 0);
6466 /* We must match stuff we generate before. Assume the only
6467 unspecs that can get here are ours. Not that we could do
6468 anything with them anyway.... */
6469 if (GET_CODE (addr
) == UNSPEC
6470 || (GET_CODE (addr
) == PLUS
6471 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6473 if (GET_CODE (addr
) != PLUS
)
6476 if (GET_CODE (addr
) == PLUS
)
6478 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6480 /* Check first to see if this is a constant offset from a @GOTOFF
6481 symbol reference. */
6482 if (local_symbolic_operand (op0
, Pmode
)
6483 && GET_CODE (op1
) == CONST_INT
)
6487 if (reload_in_progress
)
6488 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6489 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6491 new = gen_rtx_PLUS (Pmode
, new, op1
);
6492 new = gen_rtx_CONST (Pmode
, new);
6493 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6497 emit_move_insn (reg
, new);
6503 if (INTVAL (op1
) < -16*1024*1024
6504 || INTVAL (op1
) >= 16*1024*1024)
6505 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
6510 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6511 new = legitimize_pic_address (XEXP (addr
, 1),
6512 base
== reg
? NULL_RTX
: reg
);
6514 if (GET_CODE (new) == CONST_INT
)
6515 new = plus_constant (base
, INTVAL (new));
6518 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6520 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6521 new = XEXP (new, 1);
6523 new = gen_rtx_PLUS (Pmode
, base
, new);
6531 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6534 get_thread_pointer (int to_reg
)
6538 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6542 reg
= gen_reg_rtx (Pmode
);
6543 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6544 insn
= emit_insn (insn
);
6549 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6550 false if we expect this to be used for a memory address and true if
6551 we expect to load the address into a register. */
6554 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6556 rtx dest
, base
, off
, pic
;
6561 case TLS_MODEL_GLOBAL_DYNAMIC
:
6562 dest
= gen_reg_rtx (Pmode
);
6565 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6568 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6569 insns
= get_insns ();
6572 emit_libcall_block (insns
, dest
, rax
, x
);
6575 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6578 case TLS_MODEL_LOCAL_DYNAMIC
:
6579 base
= gen_reg_rtx (Pmode
);
6582 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6585 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6586 insns
= get_insns ();
6589 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6590 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6591 emit_libcall_block (insns
, base
, rax
, note
);
6594 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6596 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6597 off
= gen_rtx_CONST (Pmode
, off
);
6599 return gen_rtx_PLUS (Pmode
, base
, off
);
6601 case TLS_MODEL_INITIAL_EXEC
:
6605 type
= UNSPEC_GOTNTPOFF
;
6609 if (reload_in_progress
)
6610 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6611 pic
= pic_offset_table_rtx
;
6612 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6614 else if (!TARGET_GNU_TLS
)
6616 pic
= gen_reg_rtx (Pmode
);
6617 emit_insn (gen_set_got (pic
));
6618 type
= UNSPEC_GOTTPOFF
;
6623 type
= UNSPEC_INDNTPOFF
;
6626 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6627 off
= gen_rtx_CONST (Pmode
, off
);
6629 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6630 off
= gen_rtx_MEM (Pmode
, off
);
6631 RTX_UNCHANGING_P (off
) = 1;
6632 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6634 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6636 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6637 off
= force_reg (Pmode
, off
);
6638 return gen_rtx_PLUS (Pmode
, base
, off
);
6642 base
= get_thread_pointer (true);
6643 dest
= gen_reg_rtx (Pmode
);
6644 emit_insn (gen_subsi3 (dest
, base
, off
));
6648 case TLS_MODEL_LOCAL_EXEC
:
6649 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6650 (TARGET_64BIT
|| TARGET_GNU_TLS
)
6651 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6652 off
= gen_rtx_CONST (Pmode
, off
);
6654 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6656 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6657 return gen_rtx_PLUS (Pmode
, base
, off
);
6661 base
= get_thread_pointer (true);
6662 dest
= gen_reg_rtx (Pmode
);
6663 emit_insn (gen_subsi3 (dest
, base
, off
));
6674 /* Try machine-dependent ways of modifying an illegitimate address
6675 to be legitimate. If we find one, return the new, valid address.
6676 This macro is used in only one place: `memory_address' in explow.c.
6678 OLDX is the address as it was before break_out_memory_refs was called.
6679 In some cases it is useful to look at this to decide what needs to be done.
6681 MODE and WIN are passed so that this macro can use
6682 GO_IF_LEGITIMATE_ADDRESS.
6684 It is always safe for this macro to do nothing. It exists to recognize
6685 opportunities to optimize the output.
6687 For the 80386, we handle X+REG by loading X into a register R and
6688 using R+REG. R will go in a general reg and indexing will be used.
6689 However, if REG is a broken-out memory address or multiplication,
6690 nothing needs to be done because REG can certainly go in a general reg.
6692 When -fpic is used, special handling is needed for symbolic references.
6693 See comments by legitimize_pic_address in i386.c for details. */
6696 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6701 if (TARGET_DEBUG_ADDR
)
6703 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6704 GET_MODE_NAME (mode
));
6708 log
= tls_symbolic_operand (x
, mode
);
6710 return legitimize_tls_address (x
, log
, false);
6711 if (GET_CODE (x
) == CONST
6712 && GET_CODE (XEXP (x
, 0)) == PLUS
6713 && (log
= tls_symbolic_operand (XEXP (XEXP (x
, 0), 0), Pmode
)))
6715 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
6716 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
6719 if (flag_pic
&& SYMBOLIC_CONST (x
))
6720 return legitimize_pic_address (x
, 0);
6722 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6723 if (GET_CODE (x
) == ASHIFT
6724 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6725 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
6728 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6729 GEN_INT (1 << log
));
6732 if (GET_CODE (x
) == PLUS
)
6734 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6736 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6737 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6738 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
6741 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6742 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6743 GEN_INT (1 << log
));
6746 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6747 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6748 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
6751 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6752 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6753 GEN_INT (1 << log
));
6756 /* Put multiply first if it isn't already. */
6757 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6759 rtx tmp
= XEXP (x
, 0);
6760 XEXP (x
, 0) = XEXP (x
, 1);
6765 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6766 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6767 created by virtual register instantiation, register elimination, and
6768 similar optimizations. */
6769 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6772 x
= gen_rtx_PLUS (Pmode
,
6773 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6774 XEXP (XEXP (x
, 1), 0)),
6775 XEXP (XEXP (x
, 1), 1));
6779 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6780 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6781 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6782 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6783 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6784 && CONSTANT_P (XEXP (x
, 1)))
6787 rtx other
= NULL_RTX
;
6789 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6791 constant
= XEXP (x
, 1);
6792 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6794 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6796 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6797 other
= XEXP (x
, 1);
6805 x
= gen_rtx_PLUS (Pmode
,
6806 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6807 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6808 plus_constant (other
, INTVAL (constant
)));
6812 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6815 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6818 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6821 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6824 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6828 && GET_CODE (XEXP (x
, 1)) == REG
6829 && GET_CODE (XEXP (x
, 0)) == REG
)
6832 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6835 x
= legitimize_pic_address (x
, 0);
6838 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6841 if (GET_CODE (XEXP (x
, 0)) == REG
)
6843 rtx temp
= gen_reg_rtx (Pmode
);
6844 rtx val
= force_operand (XEXP (x
, 1), temp
);
6846 emit_move_insn (temp
, val
);
6852 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6854 rtx temp
= gen_reg_rtx (Pmode
);
6855 rtx val
= force_operand (XEXP (x
, 0), temp
);
6857 emit_move_insn (temp
, val
);
6867 /* Print an integer constant expression in assembler syntax. Addition
6868 and subtraction are the only arithmetic that may appear in these
6869 expressions. FILE is the stdio stream to write to, X is the rtx, and
6870 CODE is the operand print code from the output string. */
6873 output_pic_addr_const (FILE *file
, rtx x
, int code
)
6877 switch (GET_CODE (x
))
6887 /* Mark the decl as referenced so that cgraph will output the function. */
6888 if (SYMBOL_REF_DECL (x
))
6889 mark_decl_referenced (SYMBOL_REF_DECL (x
));
6891 assemble_name (file
, XSTR (x
, 0));
6892 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
6893 fputs ("@PLT", file
);
6900 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6901 assemble_name (asm_out_file
, buf
);
6905 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6909 /* This used to output parentheses around the expression,
6910 but that does not work on the 386 (either ATT or BSD assembler). */
6911 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6915 if (GET_MODE (x
) == VOIDmode
)
6917 /* We can use %d if the number is <32 bits and positive. */
6918 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6919 fprintf (file
, "0x%lx%08lx",
6920 (unsigned long) CONST_DOUBLE_HIGH (x
),
6921 (unsigned long) CONST_DOUBLE_LOW (x
));
6923 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6926 /* We can't handle floating point constants;
6927 PRINT_OPERAND must handle them. */
6928 output_operand_lossage ("floating constant misused");
6932 /* Some assemblers need integer constants to appear first. */
6933 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6935 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6937 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6939 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6941 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6943 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6951 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6952 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6954 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6956 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6960 if (XVECLEN (x
, 0) != 1)
6962 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6963 switch (XINT (x
, 1))
6966 fputs ("@GOT", file
);
6969 fputs ("@GOTOFF", file
);
6971 case UNSPEC_GOTPCREL
:
6972 fputs ("@GOTPCREL(%rip)", file
);
6974 case UNSPEC_GOTTPOFF
:
6975 /* FIXME: This might be @TPOFF in Sun ld too. */
6976 fputs ("@GOTTPOFF", file
);
6979 fputs ("@TPOFF", file
);
6983 fputs ("@TPOFF", file
);
6985 fputs ("@NTPOFF", file
);
6988 fputs ("@DTPOFF", file
);
6990 case UNSPEC_GOTNTPOFF
:
6992 fputs ("@GOTTPOFF(%rip)", file
);
6994 fputs ("@GOTNTPOFF", file
);
6996 case UNSPEC_INDNTPOFF
:
6997 fputs ("@INDNTPOFF", file
);
7000 output_operand_lossage ("invalid UNSPEC as operand");
7006 output_operand_lossage ("invalid expression as operand");
7010 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
7011 We need to handle our special PIC relocations. */
7014 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
7017 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
7021 fprintf (file
, "%s", ASM_LONG
);
7024 output_pic_addr_const (file
, x
, '\0');
7026 output_addr_const (file
, x
);
7030 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
7031 We need to emit DTP-relative relocations. */
7034 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7036 fputs (ASM_LONG
, file
);
7037 output_addr_const (file
, x
);
7038 fputs ("@DTPOFF", file
);
7044 fputs (", 0", file
);
7051 /* In the name of slightly smaller debug output, and to cater to
7052 general assembler losage, recognize PIC+GOTOFF and turn it back
7053 into a direct symbol reference. */
7056 ix86_delegitimize_address (rtx orig_x
)
7060 if (GET_CODE (x
) == MEM
)
7065 if (GET_CODE (x
) != CONST
7066 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7067 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7068 || GET_CODE (orig_x
) != MEM
)
7070 return XVECEXP (XEXP (x
, 0), 0, 0);
7073 if (GET_CODE (x
) != PLUS
7074 || GET_CODE (XEXP (x
, 1)) != CONST
)
7077 if (GET_CODE (XEXP (x
, 0)) == REG
7078 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7079 /* %ebx + GOT/GOTOFF */
7081 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7083 /* %ebx + %reg * scale + GOT/GOTOFF */
7085 if (GET_CODE (XEXP (y
, 0)) == REG
7086 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7088 else if (GET_CODE (XEXP (y
, 1)) == REG
7089 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7093 if (GET_CODE (y
) != REG
7094 && GET_CODE (y
) != MULT
7095 && GET_CODE (y
) != ASHIFT
)
7101 x
= XEXP (XEXP (x
, 1), 0);
7102 if (GET_CODE (x
) == UNSPEC
7103 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7104 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7107 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
7108 return XVECEXP (x
, 0, 0);
7111 if (GET_CODE (x
) == PLUS
7112 && GET_CODE (XEXP (x
, 0)) == UNSPEC
7113 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7114 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7115 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
7116 && GET_CODE (orig_x
) != MEM
)))
7118 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
7120 return gen_rtx_PLUS (Pmode
, y
, x
);
7128 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7133 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7135 enum rtx_code second_code
, bypass_code
;
7136 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7137 if (bypass_code
!= NIL
|| second_code
!= NIL
)
7139 code
= ix86_fp_compare_code_to_integer (code
);
7143 code
= reverse_condition (code
);
7154 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
7159 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7160 Those same assemblers have the same but opposite losage on cmov. */
7163 suffix
= fp
? "nbe" : "a";
7166 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7168 else if (mode
== CCmode
|| mode
== CCGCmode
)
7179 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7181 else if (mode
== CCmode
|| mode
== CCGCmode
)
7190 suffix
= fp
? "nb" : "ae";
7193 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
7203 suffix
= fp
? "u" : "p";
7206 suffix
= fp
? "nu" : "np";
7211 fputs (suffix
, file
);
7214 /* Print the name of register X to FILE based on its machine mode and number.
7215 If CODE is 'w', pretend the mode is HImode.
7216 If CODE is 'b', pretend the mode is QImode.
7217 If CODE is 'k', pretend the mode is SImode.
7218 If CODE is 'q', pretend the mode is DImode.
7219 If CODE is 'h', pretend the reg is the `high' byte register.
7220 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7223 print_reg (rtx x
, int code
, FILE *file
)
7225 if (REGNO (x
) == ARG_POINTER_REGNUM
7226 || REGNO (x
) == FRAME_POINTER_REGNUM
7227 || REGNO (x
) == FLAGS_REG
7228 || REGNO (x
) == FPSR_REG
)
7231 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7234 if (code
== 'w' || MMX_REG_P (x
))
7236 else if (code
== 'b')
7238 else if (code
== 'k')
7240 else if (code
== 'q')
7242 else if (code
== 'y')
7244 else if (code
== 'h')
7247 code
= GET_MODE_SIZE (GET_MODE (x
));
7249 /* Irritatingly, AMD extended registers use different naming convention
7250 from the normal registers. */
7251 if (REX_INT_REG_P (x
))
7258 error ("extended registers have no high halves");
7261 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7264 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7267 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7270 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7273 error ("unsupported operand size for extended register");
7281 if (STACK_TOP_P (x
))
7283 fputs ("st(0)", file
);
7290 if (! ANY_FP_REG_P (x
))
7291 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7296 fputs (hi_reg_name
[REGNO (x
)], file
);
7299 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7301 fputs (qi_reg_name
[REGNO (x
)], file
);
7304 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7306 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7313 /* Locate some local-dynamic symbol still in use by this function
7314 so that we can print its name in some tls_local_dynamic_base
7318 get_some_local_dynamic_name (void)
7322 if (cfun
->machine
->some_ld_name
)
7323 return cfun
->machine
->some_ld_name
;
7325 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7327 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7328 return cfun
->machine
->some_ld_name
;
7334 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7338 if (GET_CODE (x
) == SYMBOL_REF
7339 && local_dynamic_symbolic_operand (x
, Pmode
))
7341 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7349 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7350 C -- print opcode suffix for set/cmov insn.
7351 c -- like C, but print reversed condition
7352 F,f -- likewise, but for floating-point.
7353 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7355 R -- print the prefix for register names.
7356 z -- print the opcode suffix for the size of the current operand.
7357 * -- print a star (in certain assembler syntax)
7358 A -- print an absolute memory reference.
7359 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7360 s -- print a shift double count, followed by the assemblers argument
7362 b -- print the QImode name of the register for the indicated operand.
7363 %b0 would print %al if operands[0] is reg 0.
7364 w -- likewise, print the HImode name of the register.
7365 k -- likewise, print the SImode name of the register.
7366 q -- likewise, print the DImode name of the register.
7367 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7368 y -- print "st(0)" instead of "st" as a register.
7369 D -- print condition for SSE cmp instruction.
7370 P -- if PIC, print an @PLT suffix.
7371 X -- don't print any sort of PIC '@' suffix for a symbol.
7372 & -- print some in-use local-dynamic symbol name.
7376 print_operand (FILE *file
, rtx x
, int code
)
7383 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7388 assemble_name (file
, get_some_local_dynamic_name ());
7392 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7394 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7396 /* Intel syntax. For absolute addresses, registers should not
7397 be surrounded by braces. */
7398 if (GET_CODE (x
) != REG
)
7401 PRINT_OPERAND (file
, x
, 0);
7409 PRINT_OPERAND (file
, x
, 0);
7414 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7419 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7424 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7429 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7434 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7439 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7444 /* 387 opcodes don't get size suffixes if the operands are
7446 if (STACK_REG_P (x
))
7449 /* Likewise if using Intel opcodes. */
7450 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7453 /* This is the size of op from size of operand. */
7454 switch (GET_MODE_SIZE (GET_MODE (x
)))
7457 #ifdef HAVE_GAS_FILDS_FISTS
7463 if (GET_MODE (x
) == SFmode
)
7478 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7480 #ifdef GAS_MNEMONICS
7506 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7508 PRINT_OPERAND (file
, x
, 0);
7514 /* Little bit of braindamage here. The SSE compare instructions
7515 does use completely different names for the comparisons that the
7516 fp conditional moves. */
7517 switch (GET_CODE (x
))
7532 fputs ("unord", file
);
7536 fputs ("neq", file
);
7540 fputs ("nlt", file
);
7544 fputs ("nle", file
);
7547 fputs ("ord", file
);
7555 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7556 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7558 switch (GET_MODE (x
))
7560 case HImode
: putc ('w', file
); break;
7562 case SFmode
: putc ('l', file
); break;
7564 case DFmode
: putc ('q', file
); break;
7572 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7575 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7576 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7579 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7582 /* Like above, but reverse condition */
7584 /* Check to see if argument to %c is really a constant
7585 and not a condition code which needs to be reversed. */
7586 if (!COMPARISON_P (x
))
7588 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7591 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7594 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7595 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7598 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7604 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7607 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7610 int pred_val
= INTVAL (XEXP (x
, 0));
7612 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7613 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7615 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7616 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7618 /* Emit hints only in the case default branch prediction
7619 heuristics would fail. */
7620 if (taken
!= cputaken
)
7622 /* We use 3e (DS) prefix for taken branches and
7623 2e (CS) prefix for not taken branches. */
7625 fputs ("ds ; ", file
);
7627 fputs ("cs ; ", file
);
7634 output_operand_lossage ("invalid operand code `%c'", code
);
7638 if (GET_CODE (x
) == REG
)
7639 print_reg (x
, code
, file
);
7641 else if (GET_CODE (x
) == MEM
)
7643 /* No `byte ptr' prefix for call instructions. */
7644 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7647 switch (GET_MODE_SIZE (GET_MODE (x
)))
7649 case 1: size
= "BYTE"; break;
7650 case 2: size
= "WORD"; break;
7651 case 4: size
= "DWORD"; break;
7652 case 8: size
= "QWORD"; break;
7653 case 12: size
= "XWORD"; break;
7654 case 16: size
= "XMMWORD"; break;
7659 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7662 else if (code
== 'w')
7664 else if (code
== 'k')
7668 fputs (" PTR ", file
);
7672 /* Avoid (%rip) for call operands. */
7673 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7674 && GET_CODE (x
) != CONST_INT
)
7675 output_addr_const (file
, x
);
7676 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7677 output_operand_lossage ("invalid constraints for operand");
7682 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7687 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7688 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7690 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7692 fprintf (file
, "0x%08lx", l
);
7695 /* These float cases don't actually occur as immediate operands. */
7696 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7700 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7701 fprintf (file
, "%s", dstr
);
7704 else if (GET_CODE (x
) == CONST_DOUBLE
7705 && GET_MODE (x
) == XFmode
)
7709 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7710 fprintf (file
, "%s", dstr
);
7717 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7719 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7722 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7723 || GET_CODE (x
) == LABEL_REF
)
7725 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7728 fputs ("OFFSET FLAT:", file
);
7731 if (GET_CODE (x
) == CONST_INT
)
7732 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7734 output_pic_addr_const (file
, x
, code
);
7736 output_addr_const (file
, x
);
7740 /* Print a memory operand whose address is ADDR. */
7743 print_operand_address (FILE *file
, rtx addr
)
7745 struct ix86_address parts
;
7746 rtx base
, index
, disp
;
7749 if (! ix86_decompose_address (addr
, &parts
))
7753 index
= parts
.index
;
7755 scale
= parts
.scale
;
7763 if (USER_LABEL_PREFIX
[0] == 0)
7765 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
7771 if (!base
&& !index
)
7773 /* Displacement only requires special attention. */
7775 if (GET_CODE (disp
) == CONST_INT
)
7777 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
7779 if (USER_LABEL_PREFIX
[0] == 0)
7781 fputs ("ds:", file
);
7783 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
7786 output_pic_addr_const (file
, disp
, 0);
7788 output_addr_const (file
, disp
);
7790 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7792 && ((GET_CODE (disp
) == SYMBOL_REF
7793 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
7794 || GET_CODE (disp
) == LABEL_REF
7795 || (GET_CODE (disp
) == CONST
7796 && GET_CODE (XEXP (disp
, 0)) == PLUS
7797 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
7798 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
7799 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
7800 fputs ("(%rip)", file
);
7804 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7809 output_pic_addr_const (file
, disp
, 0);
7810 else if (GET_CODE (disp
) == LABEL_REF
)
7811 output_asm_label (disp
);
7813 output_addr_const (file
, disp
);
7818 print_reg (base
, 0, file
);
7822 print_reg (index
, 0, file
);
7824 fprintf (file
, ",%d", scale
);
7830 rtx offset
= NULL_RTX
;
7834 /* Pull out the offset of a symbol; print any symbol itself. */
7835 if (GET_CODE (disp
) == CONST
7836 && GET_CODE (XEXP (disp
, 0)) == PLUS
7837 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7839 offset
= XEXP (XEXP (disp
, 0), 1);
7840 disp
= gen_rtx_CONST (VOIDmode
,
7841 XEXP (XEXP (disp
, 0), 0));
7845 output_pic_addr_const (file
, disp
, 0);
7846 else if (GET_CODE (disp
) == LABEL_REF
)
7847 output_asm_label (disp
);
7848 else if (GET_CODE (disp
) == CONST_INT
)
7851 output_addr_const (file
, disp
);
7857 print_reg (base
, 0, file
);
7860 if (INTVAL (offset
) >= 0)
7862 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7866 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7873 print_reg (index
, 0, file
);
7875 fprintf (file
, "*%d", scale
);
7883 output_addr_const_extra (FILE *file
, rtx x
)
7887 if (GET_CODE (x
) != UNSPEC
)
7890 op
= XVECEXP (x
, 0, 0);
7891 switch (XINT (x
, 1))
7893 case UNSPEC_GOTTPOFF
:
7894 output_addr_const (file
, op
);
7895 /* FIXME: This might be @TPOFF in Sun ld. */
7896 fputs ("@GOTTPOFF", file
);
7899 output_addr_const (file
, op
);
7900 fputs ("@TPOFF", file
);
7903 output_addr_const (file
, op
);
7905 fputs ("@TPOFF", file
);
7907 fputs ("@NTPOFF", file
);
7910 output_addr_const (file
, op
);
7911 fputs ("@DTPOFF", file
);
7913 case UNSPEC_GOTNTPOFF
:
7914 output_addr_const (file
, op
);
7916 fputs ("@GOTTPOFF(%rip)", file
);
7918 fputs ("@GOTNTPOFF", file
);
7920 case UNSPEC_INDNTPOFF
:
7921 output_addr_const (file
, op
);
7922 fputs ("@INDNTPOFF", file
);
7932 /* Split one or more DImode RTL references into pairs of SImode
7933 references. The RTL can be REG, offsettable MEM, integer constant, or
7934 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7935 split and "num" is its length. lo_half and hi_half are output arrays
7936 that parallel "operands". */
7939 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7943 rtx op
= operands
[num
];
7945 /* simplify_subreg refuse to split volatile memory addresses,
7946 but we still have to handle it. */
7947 if (GET_CODE (op
) == MEM
)
7949 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7950 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7954 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7955 GET_MODE (op
) == VOIDmode
7956 ? DImode
: GET_MODE (op
), 0);
7957 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7958 GET_MODE (op
) == VOIDmode
7959 ? DImode
: GET_MODE (op
), 4);
7963 /* Split one or more TImode RTL references into pairs of SImode
7964 references. The RTL can be REG, offsettable MEM, integer constant, or
7965 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7966 split and "num" is its length. lo_half and hi_half are output arrays
7967 that parallel "operands". */
7970 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7974 rtx op
= operands
[num
];
7976 /* simplify_subreg refuse to split volatile memory addresses, but we
7977 still have to handle it. */
7978 if (GET_CODE (op
) == MEM
)
7980 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7981 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7985 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7986 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7991 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7992 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7993 is the expression of the binary operation. The output may either be
7994 emitted here, or returned to the caller, like all output_* functions.
7996 There is no guarantee that the operands are the same mode, as they
7997 might be within FLOAT or FLOAT_EXTEND expressions. */
7999 #ifndef SYSV386_COMPAT
8000 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8001 wants to fix the assemblers because that causes incompatibility
8002 with gcc. No-one wants to fix gcc because that causes
8003 incompatibility with assemblers... You can use the option of
8004 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8005 #define SYSV386_COMPAT 1
8009 output_387_binary_op (rtx insn
, rtx
*operands
)
8011 static char buf
[30];
8014 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
8016 #ifdef ENABLE_CHECKING
8017 /* Even if we do not want to check the inputs, this documents input
8018 constraints. Which helps in understanding the following code. */
8019 if (STACK_REG_P (operands
[0])
8020 && ((REG_P (operands
[1])
8021 && REGNO (operands
[0]) == REGNO (operands
[1])
8022 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
8023 || (REG_P (operands
[2])
8024 && REGNO (operands
[0]) == REGNO (operands
[2])
8025 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
8026 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8032 switch (GET_CODE (operands
[3]))
8035 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8036 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8044 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8045 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8053 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8054 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8062 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8063 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8077 if (GET_MODE (operands
[0]) == SFmode
)
8078 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8080 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8085 switch (GET_CODE (operands
[3]))
8089 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8091 rtx temp
= operands
[2];
8092 operands
[2] = operands
[1];
8096 /* know operands[0] == operands[1]. */
8098 if (GET_CODE (operands
[2]) == MEM
)
8104 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8106 if (STACK_TOP_P (operands
[0]))
8107 /* How is it that we are storing to a dead operand[2]?
8108 Well, presumably operands[1] is dead too. We can't
8109 store the result to st(0) as st(0) gets popped on this
8110 instruction. Instead store to operands[2] (which I
8111 think has to be st(1)). st(1) will be popped later.
8112 gcc <= 2.8.1 didn't have this check and generated
8113 assembly code that the Unixware assembler rejected. */
8114 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8116 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8120 if (STACK_TOP_P (operands
[0]))
8121 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8123 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8128 if (GET_CODE (operands
[1]) == MEM
)
8134 if (GET_CODE (operands
[2]) == MEM
)
8140 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8143 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8144 derived assemblers, confusingly reverse the direction of
8145 the operation for fsub{r} and fdiv{r} when the
8146 destination register is not st(0). The Intel assembler
8147 doesn't have this brain damage. Read !SYSV386_COMPAT to
8148 figure out what the hardware really does. */
8149 if (STACK_TOP_P (operands
[0]))
8150 p
= "{p\t%0, %2|rp\t%2, %0}";
8152 p
= "{rp\t%2, %0|p\t%0, %2}";
8154 if (STACK_TOP_P (operands
[0]))
8155 /* As above for fmul/fadd, we can't store to st(0). */
8156 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8158 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8163 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8166 if (STACK_TOP_P (operands
[0]))
8167 p
= "{rp\t%0, %1|p\t%1, %0}";
8169 p
= "{p\t%1, %0|rp\t%0, %1}";
8171 if (STACK_TOP_P (operands
[0]))
8172 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8174 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8179 if (STACK_TOP_P (operands
[0]))
8181 if (STACK_TOP_P (operands
[1]))
8182 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8184 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8187 else if (STACK_TOP_P (operands
[1]))
8190 p
= "{\t%1, %0|r\t%0, %1}";
8192 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8198 p
= "{r\t%2, %0|\t%0, %2}";
8200 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8213 /* Output code to initialize control word copies used by
8214 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8215 is set to control word rounding downwards. */
8217 emit_i387_cw_initialization (rtx normal
, rtx round_down
)
8219 rtx reg
= gen_reg_rtx (HImode
);
8221 emit_insn (gen_x86_fnstcw_1 (normal
));
8222 emit_move_insn (reg
, normal
);
8223 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
8225 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8227 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
8228 emit_move_insn (round_down
, reg
);
8231 /* Output code for INSN to convert a float to a signed int. OPERANDS
8232 are the insn operands. The output may be [HSD]Imode and the input
8233 operand may be [SDX]Fmode. */
8236 output_fix_trunc (rtx insn
, rtx
*operands
)
8238 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8239 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8241 /* Jump through a hoop or two for DImode, since the hardware has no
8242 non-popping instruction. We used to do this a different way, but
8243 that was somewhat fragile and broke with post-reload splitters. */
8244 if (dimode_p
&& !stack_top_dies
)
8245 output_asm_insn ("fld\t%y1", operands
);
8247 if (!STACK_TOP_P (operands
[1]))
8250 if (GET_CODE (operands
[0]) != MEM
)
8253 output_asm_insn ("fldcw\t%3", operands
);
8254 if (stack_top_dies
|| dimode_p
)
8255 output_asm_insn ("fistp%z0\t%0", operands
);
8257 output_asm_insn ("fist%z0\t%0", operands
);
8258 output_asm_insn ("fldcw\t%2", operands
);
8263 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8264 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8265 when fucom should be used. */
8268 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8271 rtx cmp_op0
= operands
[0];
8272 rtx cmp_op1
= operands
[1];
8273 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
8278 cmp_op1
= operands
[2];
8282 if (GET_MODE (operands
[0]) == SFmode
)
8284 return "ucomiss\t{%1, %0|%0, %1}";
8286 return "comiss\t{%1, %0|%0, %1}";
8289 return "ucomisd\t{%1, %0|%0, %1}";
8291 return "comisd\t{%1, %0|%0, %1}";
8294 if (! STACK_TOP_P (cmp_op0
))
8297 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8299 if (STACK_REG_P (cmp_op1
)
8301 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8302 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8304 /* If both the top of the 387 stack dies, and the other operand
8305 is also a stack register that dies, then this must be a
8306 `fcompp' float compare */
8310 /* There is no double popping fcomi variant. Fortunately,
8311 eflags is immune from the fstp's cc clobbering. */
8313 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8315 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8316 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
8323 return "fucompp\n\tfnstsw\t%0";
8325 return "fcompp\n\tfnstsw\t%0";
8338 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8340 static const char * const alt
[24] =
8352 "fcomi\t{%y1, %0|%0, %y1}",
8353 "fcomip\t{%y1, %0|%0, %y1}",
8354 "fucomi\t{%y1, %0|%0, %y1}",
8355 "fucomip\t{%y1, %0|%0, %y1}",
8362 "fcom%z2\t%y2\n\tfnstsw\t%0",
8363 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8364 "fucom%z2\t%y2\n\tfnstsw\t%0",
8365 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8367 "ficom%z2\t%y2\n\tfnstsw\t%0",
8368 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8376 mask
= eflags_p
<< 3;
8377 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
8378 mask
|= unordered_p
<< 1;
8379 mask
|= stack_top_dies
;
8392 ix86_output_addr_vec_elt (FILE *file
, int value
)
8394 const char *directive
= ASM_LONG
;
8399 directive
= ASM_QUAD
;
8405 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8409 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8412 fprintf (file
, "%s%s%d-%s%d\n",
8413 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8414 else if (HAVE_AS_GOTOFF_IN_DATA
)
8415 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8417 else if (TARGET_MACHO
)
8419 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8420 machopic_output_function_base_name (file
);
8421 fprintf(file
, "\n");
8425 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8426 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8429 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8433 ix86_expand_clear (rtx dest
)
8437 /* We play register width games, which are only valid after reload. */
8438 if (!reload_completed
)
8441 /* Avoid HImode and its attendant prefix byte. */
8442 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8443 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8445 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8447 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8448 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8450 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8451 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8457 /* X is an unchanging MEM. If it is a constant pool reference, return
8458 the constant pool rtx, else NULL. */
8461 maybe_get_pool_constant (rtx x
)
8463 x
= ix86_delegitimize_address (XEXP (x
, 0));
8465 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8466 return get_pool_constant (x
);
8472 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8474 int strict
= (reload_in_progress
|| reload_completed
);
8476 enum tls_model model
;
8481 model
= tls_symbolic_operand (op1
, Pmode
);
8484 op1
= legitimize_tls_address (op1
, model
, true);
8485 op1
= force_operand (op1
, op0
);
8490 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8495 rtx temp
= ((reload_in_progress
8496 || ((op0
&& GET_CODE (op0
) == REG
)
8498 ? op0
: gen_reg_rtx (Pmode
));
8499 op1
= machopic_indirect_data_reference (op1
, temp
);
8500 op1
= machopic_legitimize_pic_address (op1
, mode
,
8501 temp
== op1
? 0 : temp
);
8503 else if (MACHOPIC_INDIRECT
)
8504 op1
= machopic_indirect_data_reference (op1
, 0);
8508 if (GET_CODE (op0
) == MEM
)
8509 op1
= force_reg (Pmode
, op1
);
8511 op1
= legitimize_address (op1
, op1
, Pmode
);
8512 #endif /* TARGET_MACHO */
8516 if (GET_CODE (op0
) == MEM
8517 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8518 || !push_operand (op0
, mode
))
8519 && GET_CODE (op1
) == MEM
)
8520 op1
= force_reg (mode
, op1
);
8522 if (push_operand (op0
, mode
)
8523 && ! general_no_elim_operand (op1
, mode
))
8524 op1
= copy_to_mode_reg (mode
, op1
);
8526 /* Force large constants in 64bit compilation into register
8527 to get them CSEed. */
8528 if (TARGET_64BIT
&& mode
== DImode
8529 && immediate_operand (op1
, mode
)
8530 && !x86_64_zero_extended_value (op1
)
8531 && !register_operand (op0
, mode
)
8532 && optimize
&& !reload_completed
&& !reload_in_progress
)
8533 op1
= copy_to_mode_reg (mode
, op1
);
8535 if (FLOAT_MODE_P (mode
))
8537 /* If we are loading a floating point constant to a register,
8538 force the value to memory now, since we'll get better code
8539 out the back end. */
8543 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8545 op1
= validize_mem (force_const_mem (mode
, op1
));
8546 if (!register_operand (op0
, mode
))
8548 rtx temp
= gen_reg_rtx (mode
);
8549 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8550 emit_move_insn (op0
, temp
);
8557 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8561 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
8563 /* Force constants other than zero into memory. We do not know how
8564 the instructions used to build constants modify the upper 64 bits
8565 of the register, once we have that information we may be able
8566 to handle some of them more efficiently. */
8567 if ((reload_in_progress
| reload_completed
) == 0
8568 && register_operand (operands
[0], mode
)
8569 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
8570 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
8572 /* Make operand1 a register if it isn't already. */
8574 && !register_operand (operands
[0], mode
)
8575 && !register_operand (operands
[1], mode
))
8577 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
8578 emit_move_insn (operands
[0], temp
);
8582 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
8585 /* Attempt to expand a binary operator. Make the expansion closer to the
8586 actual machine, then just general_operand, which will allow 3 separate
8587 memory references (one output, two input) in a single insn. */
8590 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
8593 int matching_memory
;
8594 rtx src1
, src2
, dst
, op
, clob
;
8600 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8601 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8602 && (rtx_equal_p (dst
, src2
)
8603 || immediate_operand (src1
, mode
)))
8610 /* If the destination is memory, and we do not have matching source
8611 operands, do things in registers. */
8612 matching_memory
= 0;
8613 if (GET_CODE (dst
) == MEM
)
8615 if (rtx_equal_p (dst
, src1
))
8616 matching_memory
= 1;
8617 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8618 && rtx_equal_p (dst
, src2
))
8619 matching_memory
= 2;
8621 dst
= gen_reg_rtx (mode
);
8624 /* Both source operands cannot be in memory. */
8625 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8627 if (matching_memory
!= 2)
8628 src2
= force_reg (mode
, src2
);
8630 src1
= force_reg (mode
, src1
);
8633 /* If the operation is not commutable, source 1 cannot be a constant
8634 or non-matching memory. */
8635 if ((CONSTANT_P (src1
)
8636 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8637 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8638 src1
= force_reg (mode
, src1
);
8640 /* If optimizing, copy to regs to improve CSE */
8641 if (optimize
&& ! no_new_pseudos
)
8643 if (GET_CODE (dst
) == MEM
)
8644 dst
= gen_reg_rtx (mode
);
8645 if (GET_CODE (src1
) == MEM
)
8646 src1
= force_reg (mode
, src1
);
8647 if (GET_CODE (src2
) == MEM
)
8648 src2
= force_reg (mode
, src2
);
8651 /* Emit the instruction. */
8653 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8654 if (reload_in_progress
)
8656 /* Reload doesn't know about the flags register, and doesn't know that
8657 it doesn't want to clobber it. We can only do this with PLUS. */
8664 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8665 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8668 /* Fix up the destination if needed. */
8669 if (dst
!= operands
[0])
8670 emit_move_insn (operands
[0], dst
);
8673 /* Return TRUE or FALSE depending on whether the binary operator meets the
8674 appropriate constraints. */
8677 ix86_binary_operator_ok (enum rtx_code code
,
8678 enum machine_mode mode ATTRIBUTE_UNUSED
,
8681 /* Both source operands cannot be in memory. */
8682 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8684 /* If the operation is not commutable, source 1 cannot be a constant. */
8685 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8687 /* If the destination is memory, we must have a matching source operand. */
8688 if (GET_CODE (operands
[0]) == MEM
8689 && ! (rtx_equal_p (operands
[0], operands
[1])
8690 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8691 && rtx_equal_p (operands
[0], operands
[2]))))
8693 /* If the operation is not commutable and the source 1 is memory, we must
8694 have a matching destination. */
8695 if (GET_CODE (operands
[1]) == MEM
8696 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
8697 && ! rtx_equal_p (operands
[0], operands
[1]))
8702 /* Attempt to expand a unary operator. Make the expansion closer to the
8703 actual machine, then just general_operand, which will allow 2 separate
8704 memory references (one output, one input) in a single insn. */
8707 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
8710 int matching_memory
;
8711 rtx src
, dst
, op
, clob
;
8716 /* If the destination is memory, and we do not have matching source
8717 operands, do things in registers. */
8718 matching_memory
= 0;
8719 if (GET_CODE (dst
) == MEM
)
8721 if (rtx_equal_p (dst
, src
))
8722 matching_memory
= 1;
8724 dst
= gen_reg_rtx (mode
);
8727 /* When source operand is memory, destination must match. */
8728 if (!matching_memory
&& GET_CODE (src
) == MEM
)
8729 src
= force_reg (mode
, src
);
8731 /* If optimizing, copy to regs to improve CSE */
8732 if (optimize
&& ! no_new_pseudos
)
8734 if (GET_CODE (dst
) == MEM
)
8735 dst
= gen_reg_rtx (mode
);
8736 if (GET_CODE (src
) == MEM
)
8737 src
= force_reg (mode
, src
);
8740 /* Emit the instruction. */
8742 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8743 if (reload_in_progress
|| code
== NOT
)
8745 /* Reload doesn't know about the flags register, and doesn't know that
8746 it doesn't want to clobber it. */
8753 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8754 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8757 /* Fix up the destination if needed. */
8758 if (dst
!= operands
[0])
8759 emit_move_insn (operands
[0], dst
);
8762 /* Return TRUE or FALSE depending on whether the unary operator meets the
8763 appropriate constraints. */
8766 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
8767 enum machine_mode mode ATTRIBUTE_UNUSED
,
8768 rtx operands
[2] ATTRIBUTE_UNUSED
)
8770 /* If one of operands is memory, source and destination must match. */
8771 if ((GET_CODE (operands
[0]) == MEM
8772 || GET_CODE (operands
[1]) == MEM
)
8773 && ! rtx_equal_p (operands
[0], operands
[1]))
8778 /* Return TRUE or FALSE depending on whether the first SET in INSN
8779 has source and destination with matching CC modes, and that the
8780 CC mode is at least as constrained as REQ_MODE. */
8783 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8786 enum machine_mode set_mode
;
8788 set
= PATTERN (insn
);
8789 if (GET_CODE (set
) == PARALLEL
)
8790 set
= XVECEXP (set
, 0, 0);
8791 if (GET_CODE (set
) != SET
)
8793 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8796 set_mode
= GET_MODE (SET_DEST (set
));
8800 if (req_mode
!= CCNOmode
8801 && (req_mode
!= CCmode
8802 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8806 if (req_mode
== CCGCmode
)
8810 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8814 if (req_mode
== CCZmode
)
8824 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8827 /* Generate insn patterns to do an integer compare of OPERANDS. */
8830 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8832 enum machine_mode cmpmode
;
8835 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8836 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8838 /* This is very simple, but making the interface the same as in the
8839 FP case makes the rest of the code easier. */
8840 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8841 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8843 /* Return the test that should be put into the flags user, i.e.
8844 the bcc, scc, or cmov instruction. */
8845 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8848 /* Figure out whether to use ordered or unordered fp comparisons.
8849 Return the appropriate mode to use. */
8852 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8854 /* ??? In order to make all comparisons reversible, we do all comparisons
8855 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8856 all forms trapping and nontrapping comparisons, we can make inequality
8857 comparisons trapping again, since it results in better code when using
8858 FCOM based compares. */
8859 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8863 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8865 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8866 return ix86_fp_compare_mode (code
);
8869 /* Only zero flag is needed. */
8871 case NE
: /* ZF!=0 */
8873 /* Codes needing carry flag. */
8874 case GEU
: /* CF=0 */
8875 case GTU
: /* CF=0 & ZF=0 */
8876 case LTU
: /* CF=1 */
8877 case LEU
: /* CF=1 | ZF=1 */
8879 /* Codes possibly doable only with sign flag when
8880 comparing against zero. */
8881 case GE
: /* SF=OF or SF=0 */
8882 case LT
: /* SF<>OF or SF=1 */
8883 if (op1
== const0_rtx
)
8886 /* For other cases Carry flag is not required. */
8888 /* Codes doable only with sign flag when comparing
8889 against zero, but we miss jump instruction for it
8890 so we need to use relational tests against overflow
8891 that thus needs to be zero. */
8892 case GT
: /* ZF=0 & SF=OF */
8893 case LE
: /* ZF=1 | SF<>OF */
8894 if (op1
== const0_rtx
)
8898 /* strcmp pattern do (use flags) and combine may ask us for proper
8907 /* Return the fixed registers used for condition codes. */
8910 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8917 /* If two condition code modes are compatible, return a condition code
8918 mode which is compatible with both. Otherwise, return
8921 static enum machine_mode
8922 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8927 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8930 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8931 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8959 /* These are only compatible with themselves, which we already
8965 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8968 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8970 enum rtx_code swapped_code
= swap_condition (code
);
8971 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8972 || (ix86_fp_comparison_cost (swapped_code
)
8973 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8976 /* Swap, force into registers, or otherwise massage the two operands
8977 to a fp comparison. The operands are updated in place; the new
8978 comparison code is returned. */
8980 static enum rtx_code
8981 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8983 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8984 rtx op0
= *pop0
, op1
= *pop1
;
8985 enum machine_mode op_mode
= GET_MODE (op0
);
8986 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8988 /* All of the unordered compare instructions only work on registers.
8989 The same is true of the XFmode compare instructions. The same is
8990 true of the fcomi compare instructions. */
8993 && (fpcmp_mode
== CCFPUmode
8994 || op_mode
== XFmode
8995 || ix86_use_fcomi_compare (code
)))
8997 op0
= force_reg (op_mode
, op0
);
8998 op1
= force_reg (op_mode
, op1
);
9002 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9003 things around if they appear profitable, otherwise force op0
9006 if (standard_80387_constant_p (op0
) == 0
9007 || (GET_CODE (op0
) == MEM
9008 && ! (standard_80387_constant_p (op1
) == 0
9009 || GET_CODE (op1
) == MEM
)))
9012 tmp
= op0
, op0
= op1
, op1
= tmp
;
9013 code
= swap_condition (code
);
9016 if (GET_CODE (op0
) != REG
)
9017 op0
= force_reg (op_mode
, op0
);
9019 if (CONSTANT_P (op1
))
9021 if (standard_80387_constant_p (op1
))
9022 op1
= force_reg (op_mode
, op1
);
9024 op1
= validize_mem (force_const_mem (op_mode
, op1
));
9028 /* Try to rearrange the comparison to make it cheaper. */
9029 if (ix86_fp_comparison_cost (code
)
9030 > ix86_fp_comparison_cost (swap_condition (code
))
9031 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
9034 tmp
= op0
, op0
= op1
, op1
= tmp
;
9035 code
= swap_condition (code
);
9036 if (GET_CODE (op0
) != REG
)
9037 op0
= force_reg (op_mode
, op0
);
9045 /* Convert comparison codes we use to represent FP comparison to integer
9046 code that will result in proper branch. Return UNKNOWN if no such code
9048 static enum rtx_code
9049 ix86_fp_compare_code_to_integer (enum rtx_code code
)
9078 /* Split comparison code CODE into comparisons we can do using branch
9079 instructions. BYPASS_CODE is comparison code for branch that will
9080 branch around FIRST_CODE and SECOND_CODE. If some of branches
9081 is not required, set value to NIL.
9082 We never require more than two branches. */
9084 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
9085 enum rtx_code
*first_code
,
9086 enum rtx_code
*second_code
)
9092 /* The fcomi comparison sets flags as follows:
9102 case GT
: /* GTU - CF=0 & ZF=0 */
9103 case GE
: /* GEU - CF=0 */
9104 case ORDERED
: /* PF=0 */
9105 case UNORDERED
: /* PF=1 */
9106 case UNEQ
: /* EQ - ZF=1 */
9107 case UNLT
: /* LTU - CF=1 */
9108 case UNLE
: /* LEU - CF=1 | ZF=1 */
9109 case LTGT
: /* EQ - ZF=0 */
9111 case LT
: /* LTU - CF=1 - fails on unordered */
9113 *bypass_code
= UNORDERED
;
9115 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
9117 *bypass_code
= UNORDERED
;
9119 case EQ
: /* EQ - ZF=1 - fails on unordered */
9121 *bypass_code
= UNORDERED
;
9123 case NE
: /* NE - ZF=0 - fails on unordered */
9125 *second_code
= UNORDERED
;
9127 case UNGE
: /* GEU - CF=0 - fails on unordered */
9129 *second_code
= UNORDERED
;
9131 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
9133 *second_code
= UNORDERED
;
9138 if (!TARGET_IEEE_FP
)
9145 /* Return cost of comparison done fcom + arithmetics operations on AX.
9146 All following functions do use number of instructions as a cost metrics.
9147 In future this should be tweaked to compute bytes for optimize_size and
9148 take into account performance of various instructions on various CPUs. */
9150 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
9152 if (!TARGET_IEEE_FP
)
9154 /* The cost of code output by ix86_expand_fp_compare. */
9182 /* Return cost of comparison done using fcomi operation.
9183 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9185 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
9187 enum rtx_code bypass_code
, first_code
, second_code
;
9188 /* Return arbitrarily high cost when instruction is not supported - this
9189 prevents gcc from using it. */
9192 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9193 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
9196 /* Return cost of comparison done using sahf operation.
9197 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9199 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
9201 enum rtx_code bypass_code
, first_code
, second_code
;
9202 /* Return arbitrarily high cost when instruction is not preferred - this
9203 avoids gcc from using it. */
9204 if (!TARGET_USE_SAHF
&& !optimize_size
)
9206 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9207 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
9210 /* Compute cost of the comparison done using any method.
9211 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9213 ix86_fp_comparison_cost (enum rtx_code code
)
9215 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
9218 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
9219 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
9221 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
9222 if (min
> sahf_cost
)
9224 if (min
> fcomi_cost
)
9229 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9232 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
9233 rtx
*second_test
, rtx
*bypass_test
)
9235 enum machine_mode fpcmp_mode
, intcmp_mode
;
9237 int cost
= ix86_fp_comparison_cost (code
);
9238 enum rtx_code bypass_code
, first_code
, second_code
;
9240 fpcmp_mode
= ix86_fp_compare_mode (code
);
9241 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
9244 *second_test
= NULL_RTX
;
9246 *bypass_test
= NULL_RTX
;
9248 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9250 /* Do fcomi/sahf based test when profitable. */
9251 if ((bypass_code
== NIL
|| bypass_test
)
9252 && (second_code
== NIL
|| second_test
)
9253 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
9257 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9258 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
9264 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9265 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9267 scratch
= gen_reg_rtx (HImode
);
9268 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9269 emit_insn (gen_x86_sahf_1 (scratch
));
9272 /* The FP codes work out to act like unsigned. */
9273 intcmp_mode
= fpcmp_mode
;
9275 if (bypass_code
!= NIL
)
9276 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
9277 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9279 if (second_code
!= NIL
)
9280 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
9281 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9286 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9287 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9288 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9290 scratch
= gen_reg_rtx (HImode
);
9291 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9293 /* In the unordered case, we have to check C2 for NaN's, which
9294 doesn't happen to work out to anything nice combination-wise.
9295 So do some bit twiddling on the value we've got in AH to come
9296 up with an appropriate set of condition codes. */
9298 intcmp_mode
= CCNOmode
;
9303 if (code
== GT
|| !TARGET_IEEE_FP
)
9305 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9310 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9311 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9312 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
9313 intcmp_mode
= CCmode
;
9319 if (code
== LT
&& TARGET_IEEE_FP
)
9321 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9322 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
9323 intcmp_mode
= CCmode
;
9328 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
9334 if (code
== GE
|| !TARGET_IEEE_FP
)
9336 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
9341 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9342 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9349 if (code
== LE
&& TARGET_IEEE_FP
)
9351 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9352 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9353 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9354 intcmp_mode
= CCmode
;
9359 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9365 if (code
== EQ
&& TARGET_IEEE_FP
)
9367 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9368 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9369 intcmp_mode
= CCmode
;
9374 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9381 if (code
== NE
&& TARGET_IEEE_FP
)
9383 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9384 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9390 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9396 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9400 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9409 /* Return the test that should be put into the flags user, i.e.
9410 the bcc, scc, or cmov instruction. */
9411 return gen_rtx_fmt_ee (code
, VOIDmode
,
9412 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9417 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
9420 op0
= ix86_compare_op0
;
9421 op1
= ix86_compare_op1
;
9424 *second_test
= NULL_RTX
;
9426 *bypass_test
= NULL_RTX
;
9428 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9429 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9430 second_test
, bypass_test
);
9432 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9437 /* Return true if the CODE will result in nontrivial jump sequence. */
9439 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
9441 enum rtx_code bypass_code
, first_code
, second_code
;
9444 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9445 return bypass_code
!= NIL
|| second_code
!= NIL
;
9449 ix86_expand_branch (enum rtx_code code
, rtx label
)
9453 switch (GET_MODE (ix86_compare_op0
))
9459 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9460 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9461 gen_rtx_LABEL_REF (VOIDmode
, label
),
9463 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9472 enum rtx_code bypass_code
, first_code
, second_code
;
9474 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9477 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9479 /* Check whether we will use the natural sequence with one jump. If
9480 so, we can expand jump early. Otherwise delay expansion by
9481 creating compound insn to not confuse optimizers. */
9482 if (bypass_code
== NIL
&& second_code
== NIL
9485 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9486 gen_rtx_LABEL_REF (VOIDmode
, label
),
9491 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9492 ix86_compare_op0
, ix86_compare_op1
);
9493 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9494 gen_rtx_LABEL_REF (VOIDmode
, label
),
9496 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9498 use_fcomi
= ix86_use_fcomi_compare (code
);
9499 vec
= rtvec_alloc (3 + !use_fcomi
);
9500 RTVEC_ELT (vec
, 0) = tmp
;
9502 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9504 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9507 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9509 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9517 /* Expand DImode branch into multiple compare+branch. */
9519 rtx lo
[2], hi
[2], label2
;
9520 enum rtx_code code1
, code2
, code3
;
9522 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9524 tmp
= ix86_compare_op0
;
9525 ix86_compare_op0
= ix86_compare_op1
;
9526 ix86_compare_op1
= tmp
;
9527 code
= swap_condition (code
);
9529 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9530 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9532 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9533 avoid two branches. This costs one extra insn, so disable when
9534 optimizing for size. */
9536 if ((code
== EQ
|| code
== NE
)
9538 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9543 if (hi
[1] != const0_rtx
)
9544 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9545 NULL_RTX
, 0, OPTAB_WIDEN
);
9548 if (lo
[1] != const0_rtx
)
9549 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9550 NULL_RTX
, 0, OPTAB_WIDEN
);
9552 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9553 NULL_RTX
, 0, OPTAB_WIDEN
);
9555 ix86_compare_op0
= tmp
;
9556 ix86_compare_op1
= const0_rtx
;
9557 ix86_expand_branch (code
, label
);
9561 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9562 op1 is a constant and the low word is zero, then we can just
9563 examine the high word. */
9565 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9568 case LT
: case LTU
: case GE
: case GEU
:
9569 ix86_compare_op0
= hi
[0];
9570 ix86_compare_op1
= hi
[1];
9571 ix86_expand_branch (code
, label
);
9577 /* Otherwise, we need two or three jumps. */
9579 label2
= gen_label_rtx ();
9582 code2
= swap_condition (code
);
9583 code3
= unsigned_condition (code
);
9587 case LT
: case GT
: case LTU
: case GTU
:
9590 case LE
: code1
= LT
; code2
= GT
; break;
9591 case GE
: code1
= GT
; code2
= LT
; break;
9592 case LEU
: code1
= LTU
; code2
= GTU
; break;
9593 case GEU
: code1
= GTU
; code2
= LTU
; break;
9595 case EQ
: code1
= NIL
; code2
= NE
; break;
9596 case NE
: code2
= NIL
; break;
9604 * if (hi(a) < hi(b)) goto true;
9605 * if (hi(a) > hi(b)) goto false;
9606 * if (lo(a) < lo(b)) goto true;
9610 ix86_compare_op0
= hi
[0];
9611 ix86_compare_op1
= hi
[1];
9614 ix86_expand_branch (code1
, label
);
9616 ix86_expand_branch (code2
, label2
);
9618 ix86_compare_op0
= lo
[0];
9619 ix86_compare_op1
= lo
[1];
9620 ix86_expand_branch (code3
, label
);
9623 emit_label (label2
);
9632 /* Split branch based on floating point condition. */
9634 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
9635 rtx target1
, rtx target2
, rtx tmp
)
9638 rtx label
= NULL_RTX
;
9640 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9643 if (target2
!= pc_rtx
)
9646 code
= reverse_condition_maybe_unordered (code
);
9651 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9652 tmp
, &second
, &bypass
);
9654 if (split_branch_probability
>= 0)
9656 /* Distribute the probabilities across the jumps.
9657 Assume the BYPASS and SECOND to be always test
9659 probability
= split_branch_probability
;
9661 /* Value of 1 is low enough to make no need for probability
9662 to be updated. Later we may run some experiments and see
9663 if unordered values are more frequent in practice. */
9665 bypass_probability
= 1;
9667 second_probability
= 1;
9669 if (bypass
!= NULL_RTX
)
9671 label
= gen_label_rtx ();
9672 i
= emit_jump_insn (gen_rtx_SET
9674 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9676 gen_rtx_LABEL_REF (VOIDmode
,
9679 if (bypass_probability
>= 0)
9681 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9682 GEN_INT (bypass_probability
),
9685 i
= emit_jump_insn (gen_rtx_SET
9687 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9688 condition
, target1
, target2
)));
9689 if (probability
>= 0)
9691 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9692 GEN_INT (probability
),
9694 if (second
!= NULL_RTX
)
9696 i
= emit_jump_insn (gen_rtx_SET
9698 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9700 if (second_probability
>= 0)
9702 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9703 GEN_INT (second_probability
),
9706 if (label
!= NULL_RTX
)
9711 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9713 rtx ret
, tmp
, tmpreg
, equiv
;
9714 rtx second_test
, bypass_test
;
9716 if (GET_MODE (ix86_compare_op0
) == DImode
9718 return 0; /* FAIL */
9720 if (GET_MODE (dest
) != QImode
)
9723 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9724 PUT_MODE (ret
, QImode
);
9729 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9730 if (bypass_test
|| second_test
)
9732 rtx test
= second_test
;
9734 rtx tmp2
= gen_reg_rtx (QImode
);
9741 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9743 PUT_MODE (test
, QImode
);
9744 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9747 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9749 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9752 /* Attach a REG_EQUAL note describing the comparison result. */
9753 equiv
= simplify_gen_relational (code
, QImode
,
9754 GET_MODE (ix86_compare_op0
),
9755 ix86_compare_op0
, ix86_compare_op1
);
9756 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9758 return 1; /* DONE */
9761 /* Expand comparison setting or clearing carry flag. Return true when
9762 successful and set pop for the operation. */
9764 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9766 enum machine_mode mode
=
9767 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9769 /* Do not handle DImode compares that go trought special path. Also we can't
9770 deal with FP compares yet. This is possible to add. */
9771 if ((mode
== DImode
&& !TARGET_64BIT
))
9773 if (FLOAT_MODE_P (mode
))
9775 rtx second_test
= NULL
, bypass_test
= NULL
;
9776 rtx compare_op
, compare_seq
;
9778 /* Shortcut: following common codes never translate into carry flag compares. */
9779 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9780 || code
== ORDERED
|| code
== UNORDERED
)
9783 /* These comparisons require zero flag; swap operands so they won't. */
9784 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9790 code
= swap_condition (code
);
9793 /* Try to expand the comparison and verify that we end up with carry flag
9794 based comparison. This is fails to be true only when we decide to expand
9795 comparison using arithmetic that is not too common scenario. */
9797 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9798 &second_test
, &bypass_test
);
9799 compare_seq
= get_insns ();
9802 if (second_test
|| bypass_test
)
9804 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9805 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9806 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9808 code
= GET_CODE (compare_op
);
9809 if (code
!= LTU
&& code
!= GEU
)
9811 emit_insn (compare_seq
);
9815 if (!INTEGRAL_MODE_P (mode
))
9823 /* Convert a==0 into (unsigned)a<1. */
9826 if (op1
!= const0_rtx
)
9829 code
= (code
== EQ
? LTU
: GEU
);
9832 /* Convert a>b into b<a or a>=b-1. */
9835 if (GET_CODE (op1
) == CONST_INT
)
9837 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9838 /* Bail out on overflow. We still can swap operands but that
9839 would force loading of the constant into register. */
9840 if (op1
== const0_rtx
9841 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9843 code
= (code
== GTU
? GEU
: LTU
);
9850 code
= (code
== GTU
? LTU
: GEU
);
9854 /* Convert a>=0 into (unsigned)a<0x80000000. */
9857 if (mode
== DImode
|| op1
!= const0_rtx
)
9859 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9860 code
= (code
== LT
? GEU
: LTU
);
9864 if (mode
== DImode
|| op1
!= constm1_rtx
)
9866 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9867 code
= (code
== LE
? GEU
: LTU
);
9873 /* Swapping operands may cause constant to appear as first operand. */
9874 if (!nonimmediate_operand (op0
, VOIDmode
))
9878 op0
= force_reg (mode
, op0
);
9880 ix86_compare_op0
= op0
;
9881 ix86_compare_op1
= op1
;
9882 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9883 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9889 ix86_expand_int_movcc (rtx operands
[])
9891 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9892 rtx compare_seq
, compare_op
;
9893 rtx second_test
, bypass_test
;
9894 enum machine_mode mode
= GET_MODE (operands
[0]);
9895 bool sign_bit_compare_p
= false;;
9898 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9899 compare_seq
= get_insns ();
9902 compare_code
= GET_CODE (compare_op
);
9904 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9905 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9906 sign_bit_compare_p
= true;
9908 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9909 HImode insns, we'd be swallowed in word prefix ops. */
9911 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9912 && (mode
!= DImode
|| TARGET_64BIT
)
9913 && GET_CODE (operands
[2]) == CONST_INT
9914 && GET_CODE (operands
[3]) == CONST_INT
)
9916 rtx out
= operands
[0];
9917 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9918 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9922 /* Sign bit compares are better done using shifts than we do by using
9924 if (sign_bit_compare_p
9925 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9926 ix86_compare_op1
, &compare_op
))
9928 /* Detect overlap between destination and compare sources. */
9931 if (!sign_bit_compare_p
)
9935 compare_code
= GET_CODE (compare_op
);
9937 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9938 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9941 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9944 /* To simplify rest of code, restrict to the GEU case. */
9945 if (compare_code
== LTU
)
9947 HOST_WIDE_INT tmp
= ct
;
9950 compare_code
= reverse_condition (compare_code
);
9951 code
= reverse_condition (code
);
9956 PUT_CODE (compare_op
,
9957 reverse_condition_maybe_unordered
9958 (GET_CODE (compare_op
)));
9960 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9964 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9965 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9966 tmp
= gen_reg_rtx (mode
);
9969 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9971 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9975 if (code
== GT
|| code
== GE
)
9976 code
= reverse_condition (code
);
9979 HOST_WIDE_INT tmp
= ct
;
9984 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9985 ix86_compare_op1
, VOIDmode
, 0, -1);
9998 tmp
= expand_simple_binop (mode
, PLUS
,
10000 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10011 tmp
= expand_simple_binop (mode
, IOR
,
10013 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10015 else if (diff
== -1 && ct
)
10025 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10027 tmp
= expand_simple_binop (mode
, PLUS
,
10028 copy_rtx (tmp
), GEN_INT (cf
),
10029 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10037 * andl cf - ct, dest
10047 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10050 tmp
= expand_simple_binop (mode
, AND
,
10052 gen_int_mode (cf
- ct
, mode
),
10053 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10055 tmp
= expand_simple_binop (mode
, PLUS
,
10056 copy_rtx (tmp
), GEN_INT (ct
),
10057 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10060 if (!rtx_equal_p (tmp
, out
))
10061 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
10063 return 1; /* DONE */
10069 tmp
= ct
, ct
= cf
, cf
= tmp
;
10071 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10073 /* We may be reversing unordered compare to normal compare, that
10074 is not valid in general (we may convert non-trapping condition
10075 to trapping one), however on i386 we currently emit all
10076 comparisons unordered. */
10077 compare_code
= reverse_condition_maybe_unordered (compare_code
);
10078 code
= reverse_condition_maybe_unordered (code
);
10082 compare_code
= reverse_condition (compare_code
);
10083 code
= reverse_condition (code
);
10087 compare_code
= NIL
;
10088 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
10089 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
10091 if (ix86_compare_op1
== const0_rtx
10092 && (code
== LT
|| code
== GE
))
10093 compare_code
= code
;
10094 else if (ix86_compare_op1
== constm1_rtx
)
10098 else if (code
== GT
)
10103 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10104 if (compare_code
!= NIL
10105 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
10106 && (cf
== -1 || ct
== -1))
10108 /* If lea code below could be used, only optimize
10109 if it results in a 2 insn sequence. */
10111 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10112 || diff
== 3 || diff
== 5 || diff
== 9)
10113 || (compare_code
== LT
&& ct
== -1)
10114 || (compare_code
== GE
&& cf
== -1))
10117 * notl op1 (if necessary)
10125 code
= reverse_condition (code
);
10128 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10129 ix86_compare_op1
, VOIDmode
, 0, -1);
10131 out
= expand_simple_binop (mode
, IOR
,
10133 out
, 1, OPTAB_DIRECT
);
10134 if (out
!= operands
[0])
10135 emit_move_insn (operands
[0], out
);
10137 return 1; /* DONE */
10142 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10143 || diff
== 3 || diff
== 5 || diff
== 9)
10144 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
10145 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
10151 * lea cf(dest*(ct-cf)),dest
10155 * This also catches the degenerate setcc-only case.
10161 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10162 ix86_compare_op1
, VOIDmode
, 0, 1);
10165 /* On x86_64 the lea instruction operates on Pmode, so we need
10166 to get arithmetics done in proper mode to match. */
10168 tmp
= copy_rtx (out
);
10172 out1
= copy_rtx (out
);
10173 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
10177 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
10183 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
10186 if (!rtx_equal_p (tmp
, out
))
10189 out
= force_operand (tmp
, copy_rtx (out
));
10191 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
10193 if (!rtx_equal_p (out
, operands
[0]))
10194 emit_move_insn (operands
[0], copy_rtx (out
));
10196 return 1; /* DONE */
10200 * General case: Jumpful:
10201 * xorl dest,dest cmpl op1, op2
10202 * cmpl op1, op2 movl ct, dest
10203 * setcc dest jcc 1f
10204 * decl dest movl cf, dest
10205 * andl (cf-ct),dest 1:
10208 * Size 20. Size 14.
10210 * This is reasonably steep, but branch mispredict costs are
10211 * high on modern cpus, so consider failing only if optimizing
10215 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10216 && BRANCH_COST
>= 2)
10222 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10223 /* We may be reversing unordered compare to normal compare,
10224 that is not valid in general (we may convert non-trapping
10225 condition to trapping one), however on i386 we currently
10226 emit all comparisons unordered. */
10227 code
= reverse_condition_maybe_unordered (code
);
10230 code
= reverse_condition (code
);
10231 if (compare_code
!= NIL
)
10232 compare_code
= reverse_condition (compare_code
);
10236 if (compare_code
!= NIL
)
10238 /* notl op1 (if needed)
10243 For x < 0 (resp. x <= -1) there will be no notl,
10244 so if possible swap the constants to get rid of the
10246 True/false will be -1/0 while code below (store flag
10247 followed by decrement) is 0/-1, so the constants need
10248 to be exchanged once more. */
10250 if (compare_code
== GE
|| !cf
)
10252 code
= reverse_condition (code
);
10257 HOST_WIDE_INT tmp
= cf
;
10262 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10263 ix86_compare_op1
, VOIDmode
, 0, -1);
10267 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10268 ix86_compare_op1
, VOIDmode
, 0, 1);
10270 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
10271 copy_rtx (out
), 1, OPTAB_DIRECT
);
10274 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
10275 gen_int_mode (cf
- ct
, mode
),
10276 copy_rtx (out
), 1, OPTAB_DIRECT
);
10278 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
10279 copy_rtx (out
), 1, OPTAB_DIRECT
);
10280 if (!rtx_equal_p (out
, operands
[0]))
10281 emit_move_insn (operands
[0], copy_rtx (out
));
10283 return 1; /* DONE */
10287 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10289 /* Try a few things more with specific constants and a variable. */
10292 rtx var
, orig_out
, out
, tmp
;
10294 if (BRANCH_COST
<= 2)
10295 return 0; /* FAIL */
10297 /* If one of the two operands is an interesting constant, load a
10298 constant with the above and mask it in with a logical operation. */
10300 if (GET_CODE (operands
[2]) == CONST_INT
)
10303 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
10304 operands
[3] = constm1_rtx
, op
= and_optab
;
10305 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
10306 operands
[3] = const0_rtx
, op
= ior_optab
;
10308 return 0; /* FAIL */
10310 else if (GET_CODE (operands
[3]) == CONST_INT
)
10313 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
10314 operands
[2] = constm1_rtx
, op
= and_optab
;
10315 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
10316 operands
[2] = const0_rtx
, op
= ior_optab
;
10318 return 0; /* FAIL */
10321 return 0; /* FAIL */
10323 orig_out
= operands
[0];
10324 tmp
= gen_reg_rtx (mode
);
10327 /* Recurse to get the constant loaded. */
10328 if (ix86_expand_int_movcc (operands
) == 0)
10329 return 0; /* FAIL */
10331 /* Mask in the interesting variable. */
10332 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
10334 if (!rtx_equal_p (out
, orig_out
))
10335 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
10337 return 1; /* DONE */
10341 * For comparison with above,
10351 if (! nonimmediate_operand (operands
[2], mode
))
10352 operands
[2] = force_reg (mode
, operands
[2]);
10353 if (! nonimmediate_operand (operands
[3], mode
))
10354 operands
[3] = force_reg (mode
, operands
[3]);
10356 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10358 rtx tmp
= gen_reg_rtx (mode
);
10359 emit_move_insn (tmp
, operands
[3]);
10362 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10364 rtx tmp
= gen_reg_rtx (mode
);
10365 emit_move_insn (tmp
, operands
[2]);
10369 if (! register_operand (operands
[2], VOIDmode
)
10371 || ! register_operand (operands
[3], VOIDmode
)))
10372 operands
[2] = force_reg (mode
, operands
[2]);
10375 && ! register_operand (operands
[3], VOIDmode
))
10376 operands
[3] = force_reg (mode
, operands
[3]);
10378 emit_insn (compare_seq
);
10379 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10380 gen_rtx_IF_THEN_ELSE (mode
,
10381 compare_op
, operands
[2],
10384 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10385 gen_rtx_IF_THEN_ELSE (mode
,
10387 copy_rtx (operands
[3]),
10388 copy_rtx (operands
[0]))));
10390 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10391 gen_rtx_IF_THEN_ELSE (mode
,
10393 copy_rtx (operands
[2]),
10394 copy_rtx (operands
[0]))));
10396 return 1; /* DONE */
10400 ix86_expand_fp_movcc (rtx operands
[])
10402 enum rtx_code code
;
10404 rtx compare_op
, second_test
, bypass_test
;
10406 /* For SF/DFmode conditional moves based on comparisons
10407 in same mode, we may want to use SSE min/max instructions. */
10408 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
10409 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
10410 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
10411 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10412 && (!TARGET_IEEE_FP
10413 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
10414 /* We may be called from the post-reload splitter. */
10415 && (!REG_P (operands
[0])
10416 || SSE_REG_P (operands
[0])
10417 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
10419 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
10420 code
= GET_CODE (operands
[1]);
10422 /* See if we have (cross) match between comparison operands and
10423 conditional move operands. */
10424 if (rtx_equal_p (operands
[2], op1
))
10429 code
= reverse_condition_maybe_unordered (code
);
10431 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
10433 /* Check for min operation. */
10434 if (code
== LT
|| code
== UNLE
)
10442 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10443 if (memory_operand (op0
, VOIDmode
))
10444 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10445 if (GET_MODE (operands
[0]) == SFmode
)
10446 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
10448 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
10451 /* Check for max operation. */
10452 if (code
== GT
|| code
== UNGE
)
10460 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10461 if (memory_operand (op0
, VOIDmode
))
10462 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10463 if (GET_MODE (operands
[0]) == SFmode
)
10464 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
10466 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
10470 /* Manage condition to be sse_comparison_operator. In case we are
10471 in non-ieee mode, try to canonicalize the destination operand
10472 to be first in the comparison - this helps reload to avoid extra
10474 if (!sse_comparison_operator (operands
[1], VOIDmode
)
10475 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
10477 rtx tmp
= ix86_compare_op0
;
10478 ix86_compare_op0
= ix86_compare_op1
;
10479 ix86_compare_op1
= tmp
;
10480 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
10481 VOIDmode
, ix86_compare_op0
,
10484 /* Similarly try to manage result to be first operand of conditional
10485 move. We also don't support the NE comparison on SSE, so try to
10487 if ((rtx_equal_p (operands
[0], operands
[3])
10488 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
10489 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
10491 rtx tmp
= operands
[2];
10492 operands
[2] = operands
[3];
10494 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10495 (GET_CODE (operands
[1])),
10496 VOIDmode
, ix86_compare_op0
,
10499 if (GET_MODE (operands
[0]) == SFmode
)
10500 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
10501 operands
[2], operands
[3],
10502 ix86_compare_op0
, ix86_compare_op1
));
10504 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
10505 operands
[2], operands
[3],
10506 ix86_compare_op0
, ix86_compare_op1
));
10510 /* The floating point conditional move instructions don't directly
10511 support conditions resulting from a signed integer comparison. */
10513 code
= GET_CODE (operands
[1]);
10514 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10516 /* The floating point conditional move instructions don't directly
10517 support signed integer comparisons. */
10519 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10521 if (second_test
!= NULL
|| bypass_test
!= NULL
)
10523 tmp
= gen_reg_rtx (QImode
);
10524 ix86_expand_setcc (code
, tmp
);
10526 ix86_compare_op0
= tmp
;
10527 ix86_compare_op1
= const0_rtx
;
10528 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10530 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10532 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10533 emit_move_insn (tmp
, operands
[3]);
10536 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10538 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10539 emit_move_insn (tmp
, operands
[2]);
10543 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10544 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10549 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10550 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10555 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10556 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10564 /* Expand conditional increment or decrement using adb/sbb instructions.
10565 The default case using setcc followed by the conditional move can be
10566 done by generic code. */
10568 ix86_expand_int_addcc (rtx operands
[])
10570 enum rtx_code code
= GET_CODE (operands
[1]);
10572 rtx val
= const0_rtx
;
10573 bool fpcmp
= false;
10574 enum machine_mode mode
= GET_MODE (operands
[0]);
10576 if (operands
[3] != const1_rtx
10577 && operands
[3] != constm1_rtx
)
10579 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10580 ix86_compare_op1
, &compare_op
))
10582 code
= GET_CODE (compare_op
);
10584 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10585 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10588 code
= ix86_fp_compare_code_to_integer (code
);
10595 PUT_CODE (compare_op
,
10596 reverse_condition_maybe_unordered
10597 (GET_CODE (compare_op
)));
10599 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10601 PUT_MODE (compare_op
, mode
);
10603 /* Construct either adc or sbb insn. */
10604 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10606 switch (GET_MODE (operands
[0]))
10609 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10612 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10615 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10618 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10626 switch (GET_MODE (operands
[0]))
10629 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10632 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10635 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10638 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10644 return 1; /* DONE */
10648 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10649 works for floating pointer parameters and nonoffsetable memories.
10650 For pushes, it returns just stack offsets; the values will be saved
10651 in the right order. Maximally three parts are generated. */
10654 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
10659 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
10661 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10663 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
10665 if (size
< 2 || size
> 3)
10668 /* Optimize constant pool reference to immediates. This is used by fp
10669 moves, that force all constants to memory to allow combining. */
10670 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
10672 rtx tmp
= maybe_get_pool_constant (operand
);
10677 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10679 /* The only non-offsetable memories we handle are pushes. */
10680 if (! push_operand (operand
, VOIDmode
))
10683 operand
= copy_rtx (operand
);
10684 PUT_MODE (operand
, Pmode
);
10685 parts
[0] = parts
[1] = parts
[2] = operand
;
10687 else if (!TARGET_64BIT
)
10689 if (mode
== DImode
)
10690 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10693 if (REG_P (operand
))
10695 if (!reload_completed
)
10697 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10698 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10700 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10702 else if (offsettable_memref_p (operand
))
10704 operand
= adjust_address (operand
, SImode
, 0);
10705 parts
[0] = operand
;
10706 parts
[1] = adjust_address (operand
, SImode
, 4);
10708 parts
[2] = adjust_address (operand
, SImode
, 8);
10710 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10715 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10719 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10720 parts
[2] = gen_int_mode (l
[2], SImode
);
10723 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10728 parts
[1] = gen_int_mode (l
[1], SImode
);
10729 parts
[0] = gen_int_mode (l
[0], SImode
);
10737 if (mode
== TImode
)
10738 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10739 if (mode
== XFmode
|| mode
== TFmode
)
10741 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10742 if (REG_P (operand
))
10744 if (!reload_completed
)
10746 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10747 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10749 else if (offsettable_memref_p (operand
))
10751 operand
= adjust_address (operand
, DImode
, 0);
10752 parts
[0] = operand
;
10753 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10755 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10760 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10761 real_to_target (l
, &r
, mode
);
10762 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10763 if (HOST_BITS_PER_WIDE_INT
>= 64)
10766 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10767 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10770 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10771 if (upper_mode
== SImode
)
10772 parts
[1] = gen_int_mode (l
[2], SImode
);
10773 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10776 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10777 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10780 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10790 /* Emit insns to perform a move or push of DI, DF, and XF values.
10791 Return false when normal moves are needed; true when all required
10792 insns have been emitted. Operands 2-4 contain the input values
10793 int the correct order; operands 5-7 contain the output values. */
10796 ix86_split_long_move (rtx operands
[])
10801 int collisions
= 0;
10802 enum machine_mode mode
= GET_MODE (operands
[0]);
10804 /* The DFmode expanders may ask us to move double.
10805 For 64bit target this is single move. By hiding the fact
10806 here we simplify i386.md splitters. */
10807 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10809 /* Optimize constant pool reference to immediates. This is used by
10810 fp moves, that force all constants to memory to allow combining. */
10812 if (GET_CODE (operands
[1]) == MEM
10813 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10814 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10815 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10816 if (push_operand (operands
[0], VOIDmode
))
10818 operands
[0] = copy_rtx (operands
[0]);
10819 PUT_MODE (operands
[0], Pmode
);
10822 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10823 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10824 emit_move_insn (operands
[0], operands
[1]);
10828 /* The only non-offsettable memory we handle is push. */
10829 if (push_operand (operands
[0], VOIDmode
))
10831 else if (GET_CODE (operands
[0]) == MEM
10832 && ! offsettable_memref_p (operands
[0]))
10835 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10836 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10838 /* When emitting push, take care for source operands on the stack. */
10839 if (push
&& GET_CODE (operands
[1]) == MEM
10840 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10843 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10844 XEXP (part
[1][2], 0));
10845 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10846 XEXP (part
[1][1], 0));
10849 /* We need to do copy in the right order in case an address register
10850 of the source overlaps the destination. */
10851 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10853 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10855 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10858 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10861 /* Collision in the middle part can be handled by reordering. */
10862 if (collisions
== 1 && nparts
== 3
10863 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10866 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10867 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10870 /* If there are more collisions, we can't handle it by reordering.
10871 Do an lea to the last part and use only one colliding move. */
10872 else if (collisions
> 1)
10878 base
= part
[0][nparts
- 1];
10880 /* Handle the case when the last part isn't valid for lea.
10881 Happens in 64-bit mode storing the 12-byte XFmode. */
10882 if (GET_MODE (base
) != Pmode
)
10883 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10885 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10886 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10887 part
[1][1] = replace_equiv_address (part
[1][1],
10888 plus_constant (base
, UNITS_PER_WORD
));
10890 part
[1][2] = replace_equiv_address (part
[1][2],
10891 plus_constant (base
, 8));
10901 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10902 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10903 emit_move_insn (part
[0][2], part
[1][2]);
10908 /* In 64bit mode we don't have 32bit push available. In case this is
10909 register, it is OK - we will just use larger counterpart. We also
10910 retype memory - these comes from attempt to avoid REX prefix on
10911 moving of second half of TFmode value. */
10912 if (GET_MODE (part
[1][1]) == SImode
)
10914 if (GET_CODE (part
[1][1]) == MEM
)
10915 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10916 else if (REG_P (part
[1][1]))
10917 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10920 if (GET_MODE (part
[1][0]) == SImode
)
10921 part
[1][0] = part
[1][1];
10924 emit_move_insn (part
[0][1], part
[1][1]);
10925 emit_move_insn (part
[0][0], part
[1][0]);
10929 /* Choose correct order to not overwrite the source before it is copied. */
10930 if ((REG_P (part
[0][0])
10931 && REG_P (part
[1][1])
10932 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10934 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10936 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10940 operands
[2] = part
[0][2];
10941 operands
[3] = part
[0][1];
10942 operands
[4] = part
[0][0];
10943 operands
[5] = part
[1][2];
10944 operands
[6] = part
[1][1];
10945 operands
[7] = part
[1][0];
10949 operands
[2] = part
[0][1];
10950 operands
[3] = part
[0][0];
10951 operands
[5] = part
[1][1];
10952 operands
[6] = part
[1][0];
10959 operands
[2] = part
[0][0];
10960 operands
[3] = part
[0][1];
10961 operands
[4] = part
[0][2];
10962 operands
[5] = part
[1][0];
10963 operands
[6] = part
[1][1];
10964 operands
[7] = part
[1][2];
10968 operands
[2] = part
[0][0];
10969 operands
[3] = part
[0][1];
10970 operands
[5] = part
[1][0];
10971 operands
[6] = part
[1][1];
10974 emit_move_insn (operands
[2], operands
[5]);
10975 emit_move_insn (operands
[3], operands
[6]);
10977 emit_move_insn (operands
[4], operands
[7]);
10983 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10985 rtx low
[2], high
[2];
10988 if (GET_CODE (operands
[2]) == CONST_INT
)
10990 split_di (operands
, 2, low
, high
);
10991 count
= INTVAL (operands
[2]) & 63;
10995 emit_move_insn (high
[0], low
[1]);
10996 emit_move_insn (low
[0], const0_rtx
);
10999 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
11003 if (!rtx_equal_p (operands
[0], operands
[1]))
11004 emit_move_insn (operands
[0], operands
[1]);
11005 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
11006 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
11011 if (!rtx_equal_p (operands
[0], operands
[1]))
11012 emit_move_insn (operands
[0], operands
[1]);
11014 split_di (operands
, 1, low
, high
);
11016 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
11017 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
11019 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
11021 if (! no_new_pseudos
)
11022 scratch
= force_reg (SImode
, const0_rtx
);
11024 emit_move_insn (scratch
, const0_rtx
);
11026 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
11030 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
11035 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
11037 rtx low
[2], high
[2];
11040 if (GET_CODE (operands
[2]) == CONST_INT
)
11042 split_di (operands
, 2, low
, high
);
11043 count
= INTVAL (operands
[2]) & 63;
11047 emit_move_insn (high
[0], high
[1]);
11048 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
11049 emit_move_insn (low
[0], high
[0]);
11052 else if (count
>= 32)
11054 emit_move_insn (low
[0], high
[1]);
11056 if (! reload_completed
)
11057 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
11060 emit_move_insn (high
[0], low
[0]);
11061 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
11065 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
11069 if (!rtx_equal_p (operands
[0], operands
[1]))
11070 emit_move_insn (operands
[0], operands
[1]);
11071 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
11072 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
11077 if (!rtx_equal_p (operands
[0], operands
[1]))
11078 emit_move_insn (operands
[0], operands
[1]);
11080 split_di (operands
, 1, low
, high
);
11082 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
11083 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
11085 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
11087 if (! no_new_pseudos
)
11088 scratch
= gen_reg_rtx (SImode
);
11089 emit_move_insn (scratch
, high
[0]);
11090 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
11091 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11095 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
11100 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
11102 rtx low
[2], high
[2];
11105 if (GET_CODE (operands
[2]) == CONST_INT
)
11107 split_di (operands
, 2, low
, high
);
11108 count
= INTVAL (operands
[2]) & 63;
11112 emit_move_insn (low
[0], high
[1]);
11113 emit_move_insn (high
[0], const0_rtx
);
11116 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
11120 if (!rtx_equal_p (operands
[0], operands
[1]))
11121 emit_move_insn (operands
[0], operands
[1]);
11122 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
11123 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
11128 if (!rtx_equal_p (operands
[0], operands
[1]))
11129 emit_move_insn (operands
[0], operands
[1]);
11131 split_di (operands
, 1, low
, high
);
11133 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
11134 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
11136 /* Heh. By reversing the arguments, we can reuse this pattern. */
11137 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
11139 if (! no_new_pseudos
)
11140 scratch
= force_reg (SImode
, const0_rtx
);
11142 emit_move_insn (scratch
, const0_rtx
);
11144 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11148 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
11152 /* Helper function for the string operations below. Dest VARIABLE whether
11153 it is aligned to VALUE bytes. If true, jump to the label. */
11155 ix86_expand_aligntest (rtx variable
, int value
)
11157 rtx label
= gen_label_rtx ();
11158 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
11159 if (GET_MODE (variable
) == DImode
)
11160 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
11162 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
11163 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
11168 /* Adjust COUNTER by the VALUE. */
11170 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
11172 if (GET_MODE (countreg
) == DImode
)
11173 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
11175 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
11178 /* Zero extend possibly SImode EXP to Pmode register. */
11180 ix86_zero_extend_to_Pmode (rtx exp
)
11183 if (GET_MODE (exp
) == VOIDmode
)
11184 return force_reg (Pmode
, exp
);
11185 if (GET_MODE (exp
) == Pmode
)
11186 return copy_to_mode_reg (Pmode
, exp
);
11187 r
= gen_reg_rtx (Pmode
);
11188 emit_insn (gen_zero_extendsidi2 (r
, exp
));
11192 /* Expand string move (memcpy) operation. Use i386 string operations when
11193 profitable. expand_clrmem contains similar code. */
11195 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
11197 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
11198 enum machine_mode counter_mode
;
11199 HOST_WIDE_INT align
= 0;
11200 unsigned HOST_WIDE_INT count
= 0;
11202 if (GET_CODE (align_exp
) == CONST_INT
)
11203 align
= INTVAL (align_exp
);
11205 /* Can't use any of this if the user has appropriated esi or edi. */
11206 if (global_regs
[4] || global_regs
[5])
11209 /* This simple hack avoids all inlining code and simplifies code below. */
11210 if (!TARGET_ALIGN_STRINGOPS
)
11213 if (GET_CODE (count_exp
) == CONST_INT
)
11215 count
= INTVAL (count_exp
);
11216 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11220 /* Figure out proper mode for counter. For 32bits it is always SImode,
11221 for 64bits use SImode when possible, otherwise DImode.
11222 Set count to number of bytes copied when known at compile time. */
11223 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11224 || x86_64_zero_extended_value (count_exp
))
11225 counter_mode
= SImode
;
11227 counter_mode
= DImode
;
11229 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
11232 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11233 if (destreg
!= XEXP (dst
, 0))
11234 dst
= replace_equiv_address_nv (dst
, destreg
);
11235 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11236 if (srcreg
!= XEXP (src
, 0))
11237 src
= replace_equiv_address_nv (src
, srcreg
);
11239 /* When optimizing for size emit simple rep ; movsb instruction for
11240 counts not divisible by 4. */
11242 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11244 emit_insn (gen_cld ());
11245 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11246 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11247 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
11248 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
11252 /* For constant aligned (or small unaligned) copies use rep movsl
11253 followed by code copying the rest. For PentiumPro ensure 8 byte
11254 alignment to allow rep movsl acceleration. */
11256 else if (count
!= 0
11258 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11259 || optimize_size
|| count
< (unsigned int) 64))
11261 unsigned HOST_WIDE_INT offset
= 0;
11262 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11263 rtx srcmem
, dstmem
;
11265 emit_insn (gen_cld ());
11266 if (count
& ~(size
- 1))
11268 countreg
= copy_to_mode_reg (counter_mode
,
11269 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11270 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11271 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11273 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11274 GEN_INT (size
== 4 ? 2 : 3));
11275 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11276 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11278 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11279 countreg
, destexp
, srcexp
));
11280 offset
= count
& ~(size
- 1);
11282 if (size
== 8 && (count
& 0x04))
11284 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
11286 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11288 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11293 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
11295 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11297 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11302 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
11304 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11306 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11309 /* The generic code based on the glibc implementation:
11310 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11311 allowing accelerated copying there)
11312 - copy the data using rep movsl
11313 - copy the rest. */
11318 rtx srcmem
, dstmem
;
11319 int desired_alignment
= (TARGET_PENTIUMPRO
11320 && (count
== 0 || count
>= (unsigned int) 260)
11321 ? 8 : UNITS_PER_WORD
);
11322 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11323 dst
= change_address (dst
, BLKmode
, destreg
);
11324 src
= change_address (src
, BLKmode
, srcreg
);
11326 /* In case we don't know anything about the alignment, default to
11327 library version, since it is usually equally fast and result in
11330 Also emit call when we know that the count is large and call overhead
11331 will not be important. */
11332 if (!TARGET_INLINE_ALL_STRINGOPS
11333 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11336 if (TARGET_SINGLE_STRINGOP
)
11337 emit_insn (gen_cld ());
11339 countreg2
= gen_reg_rtx (Pmode
);
11340 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11342 /* We don't use loops to align destination and to copy parts smaller
11343 than 4 bytes, because gcc is able to optimize such code better (in
11344 the case the destination or the count really is aligned, gcc is often
11345 able to predict the branches) and also it is friendlier to the
11346 hardware branch prediction.
11348 Using loops is beneficial for generic case, because we can
11349 handle small counts using the loops. Many CPUs (such as Athlon)
11350 have large REP prefix setup costs.
11352 This is quite costly. Maybe we can revisit this decision later or
11353 add some customizability to this code. */
11355 if (count
== 0 && align
< desired_alignment
)
11357 label
= gen_label_rtx ();
11358 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11359 LEU
, 0, counter_mode
, 1, label
);
11363 rtx label
= ix86_expand_aligntest (destreg
, 1);
11364 srcmem
= change_address (src
, QImode
, srcreg
);
11365 dstmem
= change_address (dst
, QImode
, destreg
);
11366 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11367 ix86_adjust_counter (countreg
, 1);
11368 emit_label (label
);
11369 LABEL_NUSES (label
) = 1;
11373 rtx label
= ix86_expand_aligntest (destreg
, 2);
11374 srcmem
= change_address (src
, HImode
, srcreg
);
11375 dstmem
= change_address (dst
, HImode
, destreg
);
11376 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11377 ix86_adjust_counter (countreg
, 2);
11378 emit_label (label
);
11379 LABEL_NUSES (label
) = 1;
11381 if (align
<= 4 && desired_alignment
> 4)
11383 rtx label
= ix86_expand_aligntest (destreg
, 4);
11384 srcmem
= change_address (src
, SImode
, srcreg
);
11385 dstmem
= change_address (dst
, SImode
, destreg
);
11386 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11387 ix86_adjust_counter (countreg
, 4);
11388 emit_label (label
);
11389 LABEL_NUSES (label
) = 1;
11392 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11394 emit_label (label
);
11395 LABEL_NUSES (label
) = 1;
11398 if (!TARGET_SINGLE_STRINGOP
)
11399 emit_insn (gen_cld ());
11402 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11404 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11408 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11409 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11411 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11412 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11413 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11414 countreg2
, destexp
, srcexp
));
11418 emit_label (label
);
11419 LABEL_NUSES (label
) = 1;
11421 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11423 srcmem
= change_address (src
, SImode
, srcreg
);
11424 dstmem
= change_address (dst
, SImode
, destreg
);
11425 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11427 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
11429 rtx label
= ix86_expand_aligntest (countreg
, 4);
11430 srcmem
= change_address (src
, SImode
, srcreg
);
11431 dstmem
= change_address (dst
, SImode
, destreg
);
11432 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11433 emit_label (label
);
11434 LABEL_NUSES (label
) = 1;
11436 if (align
> 2 && count
!= 0 && (count
& 2))
11438 srcmem
= change_address (src
, HImode
, srcreg
);
11439 dstmem
= change_address (dst
, HImode
, destreg
);
11440 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11442 if (align
<= 2 || count
== 0)
11444 rtx label
= ix86_expand_aligntest (countreg
, 2);
11445 srcmem
= change_address (src
, HImode
, srcreg
);
11446 dstmem
= change_address (dst
, HImode
, destreg
);
11447 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11448 emit_label (label
);
11449 LABEL_NUSES (label
) = 1;
11451 if (align
> 1 && count
!= 0 && (count
& 1))
11453 srcmem
= change_address (src
, QImode
, srcreg
);
11454 dstmem
= change_address (dst
, QImode
, destreg
);
11455 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11457 if (align
<= 1 || count
== 0)
11459 rtx label
= ix86_expand_aligntest (countreg
, 1);
11460 srcmem
= change_address (src
, QImode
, srcreg
);
11461 dstmem
= change_address (dst
, QImode
, destreg
);
11462 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11463 emit_label (label
);
11464 LABEL_NUSES (label
) = 1;
11471 /* Expand string clear operation (bzero). Use i386 string operations when
11472 profitable. expand_movmem contains similar code. */
11474 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
11476 rtx destreg
, zeroreg
, countreg
, destexp
;
11477 enum machine_mode counter_mode
;
11478 HOST_WIDE_INT align
= 0;
11479 unsigned HOST_WIDE_INT count
= 0;
11481 if (GET_CODE (align_exp
) == CONST_INT
)
11482 align
= INTVAL (align_exp
);
11484 /* Can't use any of this if the user has appropriated esi. */
11485 if (global_regs
[4])
11488 /* This simple hack avoids all inlining code and simplifies code below. */
11489 if (!TARGET_ALIGN_STRINGOPS
)
11492 if (GET_CODE (count_exp
) == CONST_INT
)
11494 count
= INTVAL (count_exp
);
11495 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11498 /* Figure out proper mode for counter. For 32bits it is always SImode,
11499 for 64bits use SImode when possible, otherwise DImode.
11500 Set count to number of bytes copied when known at compile time. */
11501 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11502 || x86_64_zero_extended_value (count_exp
))
11503 counter_mode
= SImode
;
11505 counter_mode
= DImode
;
11507 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11508 if (destreg
!= XEXP (dst
, 0))
11509 dst
= replace_equiv_address_nv (dst
, destreg
);
11511 emit_insn (gen_cld ());
11513 /* When optimizing for size emit simple rep ; movsb instruction for
11514 counts not divisible by 4. */
11516 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11518 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11519 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11520 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11521 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11523 else if (count
!= 0
11525 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11526 || optimize_size
|| count
< (unsigned int) 64))
11528 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11529 unsigned HOST_WIDE_INT offset
= 0;
11531 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11532 if (count
& ~(size
- 1))
11534 countreg
= copy_to_mode_reg (counter_mode
,
11535 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11536 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11537 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11538 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
, GEN_INT (size
== 4 ? 2 : 3));
11539 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11540 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11541 offset
= count
& ~(size
- 1);
11543 if (size
== 8 && (count
& 0x04))
11545 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11547 emit_insn (gen_strset (destreg
, mem
,
11548 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11553 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11555 emit_insn (gen_strset (destreg
, mem
,
11556 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11561 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11563 emit_insn (gen_strset (destreg
, mem
,
11564 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11571 /* Compute desired alignment of the string operation. */
11572 int desired_alignment
= (TARGET_PENTIUMPRO
11573 && (count
== 0 || count
>= (unsigned int) 260)
11574 ? 8 : UNITS_PER_WORD
);
11576 /* In case we don't know anything about the alignment, default to
11577 library version, since it is usually equally fast and result in
11580 Also emit call when we know that the count is large and call overhead
11581 will not be important. */
11582 if (!TARGET_INLINE_ALL_STRINGOPS
11583 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11586 if (TARGET_SINGLE_STRINGOP
)
11587 emit_insn (gen_cld ());
11589 countreg2
= gen_reg_rtx (Pmode
);
11590 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11591 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11592 /* Get rid of MEM_OFFSET, it won't be accurate. */
11593 dst
= change_address (dst
, BLKmode
, destreg
);
11595 if (count
== 0 && align
< desired_alignment
)
11597 label
= gen_label_rtx ();
11598 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11599 LEU
, 0, counter_mode
, 1, label
);
11603 rtx label
= ix86_expand_aligntest (destreg
, 1);
11604 emit_insn (gen_strset (destreg
, dst
,
11605 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11606 ix86_adjust_counter (countreg
, 1);
11607 emit_label (label
);
11608 LABEL_NUSES (label
) = 1;
11612 rtx label
= ix86_expand_aligntest (destreg
, 2);
11613 emit_insn (gen_strset (destreg
, dst
,
11614 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11615 ix86_adjust_counter (countreg
, 2);
11616 emit_label (label
);
11617 LABEL_NUSES (label
) = 1;
11619 if (align
<= 4 && desired_alignment
> 4)
11621 rtx label
= ix86_expand_aligntest (destreg
, 4);
11622 emit_insn (gen_strset (destreg
, dst
,
11624 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11626 ix86_adjust_counter (countreg
, 4);
11627 emit_label (label
);
11628 LABEL_NUSES (label
) = 1;
11631 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11633 emit_label (label
);
11634 LABEL_NUSES (label
) = 1;
11638 if (!TARGET_SINGLE_STRINGOP
)
11639 emit_insn (gen_cld ());
11642 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11644 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11648 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11649 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11651 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11652 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11656 emit_label (label
);
11657 LABEL_NUSES (label
) = 1;
11660 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11661 emit_insn (gen_strset (destreg
, dst
,
11662 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11663 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11665 rtx label
= ix86_expand_aligntest (countreg
, 4);
11666 emit_insn (gen_strset (destreg
, dst
,
11667 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11668 emit_label (label
);
11669 LABEL_NUSES (label
) = 1;
11671 if (align
> 2 && count
!= 0 && (count
& 2))
11672 emit_insn (gen_strset (destreg
, dst
,
11673 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11674 if (align
<= 2 || count
== 0)
11676 rtx label
= ix86_expand_aligntest (countreg
, 2);
11677 emit_insn (gen_strset (destreg
, dst
,
11678 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11679 emit_label (label
);
11680 LABEL_NUSES (label
) = 1;
11682 if (align
> 1 && count
!= 0 && (count
& 1))
11683 emit_insn (gen_strset (destreg
, dst
,
11684 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11685 if (align
<= 1 || count
== 0)
11687 rtx label
= ix86_expand_aligntest (countreg
, 1);
11688 emit_insn (gen_strset (destreg
, dst
,
11689 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11690 emit_label (label
);
11691 LABEL_NUSES (label
) = 1;
11697 /* Expand strlen. */
11699 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11701 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11703 /* The generic case of strlen expander is long. Avoid it's
11704 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11706 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11707 && !TARGET_INLINE_ALL_STRINGOPS
11709 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11712 addr
= force_reg (Pmode
, XEXP (src
, 0));
11713 scratch1
= gen_reg_rtx (Pmode
);
11715 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11718 /* Well it seems that some optimizer does not combine a call like
11719 foo(strlen(bar), strlen(bar));
11720 when the move and the subtraction is done here. It does calculate
11721 the length just once when these instructions are done inside of
11722 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11723 often used and I use one fewer register for the lifetime of
11724 output_strlen_unroll() this is better. */
11726 emit_move_insn (out
, addr
);
11728 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11730 /* strlensi_unroll_1 returns the address of the zero at the end of
11731 the string, like memchr(), so compute the length by subtracting
11732 the start address. */
11734 emit_insn (gen_subdi3 (out
, out
, addr
));
11736 emit_insn (gen_subsi3 (out
, out
, addr
));
11741 scratch2
= gen_reg_rtx (Pmode
);
11742 scratch3
= gen_reg_rtx (Pmode
);
11743 scratch4
= force_reg (Pmode
, constm1_rtx
);
11745 emit_move_insn (scratch3
, addr
);
11746 eoschar
= force_reg (QImode
, eoschar
);
11748 emit_insn (gen_cld ());
11749 src
= replace_equiv_address_nv (src
, scratch3
);
11751 /* If .md starts supporting :P, this can be done in .md. */
11752 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11753 scratch4
), UNSPEC_SCAS
);
11754 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11757 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11758 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11762 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11763 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11769 /* Expand the appropriate insns for doing strlen if not just doing
11772 out = result, initialized with the start address
11773 align_rtx = alignment of the address.
11774 scratch = scratch register, initialized with the startaddress when
11775 not aligned, otherwise undefined
11777 This is just the body. It needs the initializations mentioned above and
11778 some address computing at the end. These things are done in i386.md. */
11781 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11785 rtx align_2_label
= NULL_RTX
;
11786 rtx align_3_label
= NULL_RTX
;
11787 rtx align_4_label
= gen_label_rtx ();
11788 rtx end_0_label
= gen_label_rtx ();
11790 rtx tmpreg
= gen_reg_rtx (SImode
);
11791 rtx scratch
= gen_reg_rtx (SImode
);
11795 if (GET_CODE (align_rtx
) == CONST_INT
)
11796 align
= INTVAL (align_rtx
);
11798 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11800 /* Is there a known alignment and is it less than 4? */
11803 rtx scratch1
= gen_reg_rtx (Pmode
);
11804 emit_move_insn (scratch1
, out
);
11805 /* Is there a known alignment and is it not 2? */
11808 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11809 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11811 /* Leave just the 3 lower bits. */
11812 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11813 NULL_RTX
, 0, OPTAB_WIDEN
);
11815 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11816 Pmode
, 1, align_4_label
);
11817 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
11818 Pmode
, 1, align_2_label
);
11819 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
11820 Pmode
, 1, align_3_label
);
11824 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11825 check if is aligned to 4 - byte. */
11827 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
11828 NULL_RTX
, 0, OPTAB_WIDEN
);
11830 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11831 Pmode
, 1, align_4_label
);
11834 mem
= change_address (src
, QImode
, out
);
11836 /* Now compare the bytes. */
11838 /* Compare the first n unaligned byte on a byte per byte basis. */
11839 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11840 QImode
, 1, end_0_label
);
11842 /* Increment the address. */
11844 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11846 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11848 /* Not needed with an alignment of 2 */
11851 emit_label (align_2_label
);
11853 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11857 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11859 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11861 emit_label (align_3_label
);
11864 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11868 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11870 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11873 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11874 align this loop. It gives only huge programs, but does not help to
11876 emit_label (align_4_label
);
11878 mem
= change_address (src
, SImode
, out
);
11879 emit_move_insn (scratch
, mem
);
11881 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11883 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11885 /* This formula yields a nonzero result iff one of the bytes is zero.
11886 This saves three branches inside loop and many cycles. */
11888 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11889 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11890 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11891 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11892 gen_int_mode (0x80808080, SImode
)));
11893 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11898 rtx reg
= gen_reg_rtx (SImode
);
11899 rtx reg2
= gen_reg_rtx (Pmode
);
11900 emit_move_insn (reg
, tmpreg
);
11901 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11903 /* If zero is not in the first two bytes, move two bytes forward. */
11904 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11905 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11906 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11907 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11908 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11911 /* Emit lea manually to avoid clobbering of flags. */
11912 emit_insn (gen_rtx_SET (SImode
, reg2
,
11913 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
11915 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11916 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11917 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11918 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11925 rtx end_2_label
= gen_label_rtx ();
11926 /* Is zero in the first two bytes? */
11928 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11929 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11930 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11931 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11932 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11934 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11935 JUMP_LABEL (tmp
) = end_2_label
;
11937 /* Not in the first two. Move two bytes forward. */
11938 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11940 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
11942 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
11944 emit_label (end_2_label
);
11948 /* Avoid branch in fixing the byte. */
11949 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11950 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11951 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11953 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11955 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11957 emit_label (end_0_label
);
11961 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11962 rtx callarg2 ATTRIBUTE_UNUSED
,
11963 rtx pop
, int sibcall
)
11965 rtx use
= NULL
, call
;
11967 if (pop
== const0_rtx
)
11969 if (TARGET_64BIT
&& pop
)
11973 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11974 fnaddr
= machopic_indirect_call_target (fnaddr
);
11976 /* Static functions and indirect calls don't need the pic register. */
11977 if (! TARGET_64BIT
&& flag_pic
11978 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11979 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11980 use_reg (&use
, pic_offset_table_rtx
);
11982 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11984 rtx al
= gen_rtx_REG (QImode
, 0);
11985 emit_move_insn (al
, callarg2
);
11986 use_reg (&use
, al
);
11988 #endif /* TARGET_MACHO */
11990 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11992 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11993 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11995 if (sibcall
&& TARGET_64BIT
11996 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11999 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
12000 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
12001 emit_move_insn (fnaddr
, addr
);
12002 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
12005 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
12007 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
12010 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
12011 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
12012 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
12015 call
= emit_call_insn (call
);
12017 CALL_INSN_FUNCTION_USAGE (call
) = use
;
12021 /* Clear stack slot assignments remembered from previous functions.
12022 This is called from INIT_EXPANDERS once before RTL is emitted for each
12025 static struct machine_function
*
12026 ix86_init_machine_status (void)
12028 struct machine_function
*f
;
12030 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
12031 f
->use_fast_prologue_epilogue_nregs
= -1;
12036 /* Return a MEM corresponding to a stack slot with mode MODE.
12037 Allocate a new slot if necessary.
12039 The RTL for a function can have several slots available: N is
12040 which slot to use. */
12043 assign_386_stack_local (enum machine_mode mode
, int n
)
12045 struct stack_local_entry
*s
;
12047 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
12050 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
12051 if (s
->mode
== mode
&& s
->n
== n
)
12054 s
= (struct stack_local_entry
*)
12055 ggc_alloc (sizeof (struct stack_local_entry
));
12058 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
12060 s
->next
= ix86_stack_locals
;
12061 ix86_stack_locals
= s
;
12065 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12067 static GTY(()) rtx ix86_tls_symbol
;
12069 ix86_tls_get_addr (void)
12072 if (!ix86_tls_symbol
)
12074 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
12075 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
12076 ? "___tls_get_addr"
12077 : "__tls_get_addr");
12080 return ix86_tls_symbol
;
12083 /* Calculate the length of the memory address in the instruction
12084 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12087 memory_address_length (rtx addr
)
12089 struct ix86_address parts
;
12090 rtx base
, index
, disp
;
12093 if (GET_CODE (addr
) == PRE_DEC
12094 || GET_CODE (addr
) == POST_INC
12095 || GET_CODE (addr
) == PRE_MODIFY
12096 || GET_CODE (addr
) == POST_MODIFY
)
12099 if (! ix86_decompose_address (addr
, &parts
))
12103 index
= parts
.index
;
12108 - esp as the base always wants an index,
12109 - ebp as the base always wants a displacement. */
12111 /* Register Indirect. */
12112 if (base
&& !index
&& !disp
)
12114 /* esp (for its index) and ebp (for its displacement) need
12115 the two-byte modrm form. */
12116 if (addr
== stack_pointer_rtx
12117 || addr
== arg_pointer_rtx
12118 || addr
== frame_pointer_rtx
12119 || addr
== hard_frame_pointer_rtx
)
12123 /* Direct Addressing. */
12124 else if (disp
&& !base
&& !index
)
12129 /* Find the length of the displacement constant. */
12132 if (GET_CODE (disp
) == CONST_INT
12133 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
12139 /* ebp always wants a displacement. */
12140 else if (base
== hard_frame_pointer_rtx
)
12143 /* An index requires the two-byte modrm form.... */
12145 /* ...like esp, which always wants an index. */
12146 || base
== stack_pointer_rtx
12147 || base
== arg_pointer_rtx
12148 || base
== frame_pointer_rtx
)
12155 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12156 is set, expect that insn have 8bit immediate alternative. */
12158 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
12162 extract_insn_cached (insn
);
12163 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12164 if (CONSTANT_P (recog_data
.operand
[i
]))
12169 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
12170 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
12174 switch (get_attr_mode (insn
))
12185 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12190 fatal_insn ("unknown insn mode", insn
);
12196 /* Compute default value for "length_address" attribute. */
12198 ix86_attr_length_address_default (rtx insn
)
12202 if (get_attr_type (insn
) == TYPE_LEA
)
12204 rtx set
= PATTERN (insn
);
12205 if (GET_CODE (set
) == SET
)
12207 else if (GET_CODE (set
) == PARALLEL
12208 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
12209 set
= XVECEXP (set
, 0, 0);
12212 #ifdef ENABLE_CHECKING
12218 return memory_address_length (SET_SRC (set
));
12221 extract_insn_cached (insn
);
12222 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12223 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12225 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
12231 /* Return the maximum number of instructions a cpu can issue. */
12234 ix86_issue_rate (void)
12238 case PROCESSOR_PENTIUM
:
12242 case PROCESSOR_PENTIUMPRO
:
12243 case PROCESSOR_PENTIUM4
:
12244 case PROCESSOR_ATHLON
:
12246 case PROCESSOR_NOCONA
:
12254 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12255 by DEP_INSN and nothing set by DEP_INSN. */
12258 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12262 /* Simplify the test for uninteresting insns. */
12263 if (insn_type
!= TYPE_SETCC
12264 && insn_type
!= TYPE_ICMOV
12265 && insn_type
!= TYPE_FCMOV
12266 && insn_type
!= TYPE_IBR
)
12269 if ((set
= single_set (dep_insn
)) != 0)
12271 set
= SET_DEST (set
);
12274 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
12275 && XVECLEN (PATTERN (dep_insn
), 0) == 2
12276 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
12277 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
12279 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12280 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12285 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
12288 /* This test is true if the dependent insn reads the flags but
12289 not any other potentially set register. */
12290 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
12293 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
12299 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12300 address with operands set by DEP_INSN. */
12303 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12307 if (insn_type
== TYPE_LEA
12310 addr
= PATTERN (insn
);
12311 if (GET_CODE (addr
) == SET
)
12313 else if (GET_CODE (addr
) == PARALLEL
12314 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
12315 addr
= XVECEXP (addr
, 0, 0);
12318 addr
= SET_SRC (addr
);
12323 extract_insn_cached (insn
);
12324 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12325 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12327 addr
= XEXP (recog_data
.operand
[i
], 0);
12334 return modified_in_p (addr
, dep_insn
);
12338 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
12340 enum attr_type insn_type
, dep_insn_type
;
12341 enum attr_memory memory
;
12343 int dep_insn_code_number
;
12345 /* Anti and output dependencies have zero cost on all CPUs. */
12346 if (REG_NOTE_KIND (link
) != 0)
12349 dep_insn_code_number
= recog_memoized (dep_insn
);
12351 /* If we can't recognize the insns, we can't really do anything. */
12352 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
12355 insn_type
= get_attr_type (insn
);
12356 dep_insn_type
= get_attr_type (dep_insn
);
12360 case PROCESSOR_PENTIUM
:
12361 /* Address Generation Interlock adds a cycle of latency. */
12362 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12365 /* ??? Compares pair with jump/setcc. */
12366 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
12369 /* Floating point stores require value to be ready one cycle earlier. */
12370 if (insn_type
== TYPE_FMOV
12371 && get_attr_memory (insn
) == MEMORY_STORE
12372 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12376 case PROCESSOR_PENTIUMPRO
:
12377 memory
= get_attr_memory (insn
);
12379 /* INT->FP conversion is expensive. */
12380 if (get_attr_fp_int_src (dep_insn
))
12383 /* There is one cycle extra latency between an FP op and a store. */
12384 if (insn_type
== TYPE_FMOV
12385 && (set
= single_set (dep_insn
)) != NULL_RTX
12386 && (set2
= single_set (insn
)) != NULL_RTX
12387 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
12388 && GET_CODE (SET_DEST (set2
)) == MEM
)
12391 /* Show ability of reorder buffer to hide latency of load by executing
12392 in parallel with previous instruction in case
12393 previous instruction is not needed to compute the address. */
12394 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12395 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12397 /* Claim moves to take one cycle, as core can issue one load
12398 at time and the next load can start cycle later. */
12399 if (dep_insn_type
== TYPE_IMOV
12400 || dep_insn_type
== TYPE_FMOV
)
12408 memory
= get_attr_memory (insn
);
12410 /* The esp dependency is resolved before the instruction is really
12412 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
12413 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
12416 /* INT->FP conversion is expensive. */
12417 if (get_attr_fp_int_src (dep_insn
))
12420 /* Show ability of reorder buffer to hide latency of load by executing
12421 in parallel with previous instruction in case
12422 previous instruction is not needed to compute the address. */
12423 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12424 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12426 /* Claim moves to take one cycle, as core can issue one load
12427 at time and the next load can start cycle later. */
12428 if (dep_insn_type
== TYPE_IMOV
12429 || dep_insn_type
== TYPE_FMOV
)
12438 case PROCESSOR_ATHLON
:
12440 memory
= get_attr_memory (insn
);
12442 /* Show ability of reorder buffer to hide latency of load by executing
12443 in parallel with previous instruction in case
12444 previous instruction is not needed to compute the address. */
12445 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12446 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12448 enum attr_unit unit
= get_attr_unit (insn
);
12451 /* Because of the difference between the length of integer and
12452 floating unit pipeline preparation stages, the memory operands
12453 for floating point are cheaper.
12455 ??? For Athlon it the difference is most probably 2. */
12456 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
12459 loadcost
= TARGET_ATHLON
? 2 : 0;
12461 if (cost
>= loadcost
)
12474 /* How many alternative schedules to try. This should be as wide as the
12475 scheduling freedom in the DFA, but no wider. Making this value too
12476 large results extra work for the scheduler. */
12479 ia32_multipass_dfa_lookahead (void)
12481 if (ix86_tune
== PROCESSOR_PENTIUM
)
12484 if (ix86_tune
== PROCESSOR_PENTIUMPRO
12485 || ix86_tune
== PROCESSOR_K6
)
12493 /* Compute the alignment given to a constant that is being placed in memory.
12494 EXP is the constant and ALIGN is the alignment that the object would
12496 The value of this function is used instead of that alignment to align
12500 ix86_constant_alignment (tree exp
, int align
)
12502 if (TREE_CODE (exp
) == REAL_CST
)
12504 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12506 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12509 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
12510 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
12511 return BITS_PER_WORD
;
12516 /* Compute the alignment for a static variable.
12517 TYPE is the data type, and ALIGN is the alignment that
12518 the object would ordinarily have. The value of this function is used
12519 instead of that alignment to align the object. */
12522 ix86_data_alignment (tree type
, int align
)
12524 if (AGGREGATE_TYPE_P (type
)
12525 && TYPE_SIZE (type
)
12526 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12527 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12528 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12531 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12532 to 16byte boundary. */
12535 if (AGGREGATE_TYPE_P (type
)
12536 && TYPE_SIZE (type
)
12537 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12538 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12539 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12543 if (TREE_CODE (type
) == ARRAY_TYPE
)
12545 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12547 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12550 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12553 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12555 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12558 else if ((TREE_CODE (type
) == RECORD_TYPE
12559 || TREE_CODE (type
) == UNION_TYPE
12560 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12561 && TYPE_FIELDS (type
))
12563 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12565 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12568 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12569 || TREE_CODE (type
) == INTEGER_TYPE
)
12571 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12573 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12580 /* Compute the alignment for a local variable.
12581 TYPE is the data type, and ALIGN is the alignment that
12582 the object would ordinarily have. The value of this macro is used
12583 instead of that alignment to align the object. */
12586 ix86_local_alignment (tree type
, int align
)
12588 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12589 to 16byte boundary. */
12592 if (AGGREGATE_TYPE_P (type
)
12593 && TYPE_SIZE (type
)
12594 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12595 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12596 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12599 if (TREE_CODE (type
) == ARRAY_TYPE
)
12601 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12603 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12606 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12608 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12610 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12613 else if ((TREE_CODE (type
) == RECORD_TYPE
12614 || TREE_CODE (type
) == UNION_TYPE
12615 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12616 && TYPE_FIELDS (type
))
12618 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12620 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12623 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12624 || TREE_CODE (type
) == INTEGER_TYPE
)
12627 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12629 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12635 /* Emit RTL insns to initialize the variable parts of a trampoline.
12636 FNADDR is an RTX for the address of the function's pure code.
12637 CXT is an RTX for the static chain value for the function. */
12639 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12643 /* Compute offset from the end of the jmp to the target function. */
12644 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12645 plus_constant (tramp
, 10),
12646 NULL_RTX
, 1, OPTAB_DIRECT
);
12647 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12648 gen_int_mode (0xb9, QImode
));
12649 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12650 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12651 gen_int_mode (0xe9, QImode
));
12652 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12657 /* Try to load address using shorter movl instead of movabs.
12658 We may want to support movq for kernel mode, but kernel does not use
12659 trampolines at the moment. */
12660 if (x86_64_zero_extended_value (fnaddr
))
12662 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12663 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12664 gen_int_mode (0xbb41, HImode
));
12665 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12666 gen_lowpart (SImode
, fnaddr
));
12671 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12672 gen_int_mode (0xbb49, HImode
));
12673 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12677 /* Load static chain using movabs to r10. */
12678 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12679 gen_int_mode (0xba49, HImode
));
12680 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12683 /* Jump to the r11 */
12684 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12685 gen_int_mode (0xff49, HImode
));
12686 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12687 gen_int_mode (0xe3, QImode
));
12689 if (offset
> TRAMPOLINE_SIZE
)
12693 #ifdef ENABLE_EXECUTE_STACK
12694 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
12695 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12699 #define def_builtin(MASK, NAME, TYPE, CODE) \
12701 if ((MASK) & target_flags \
12702 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12703 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12704 NULL, NULL_TREE); \
12707 struct builtin_description
12709 const unsigned int mask
;
12710 const enum insn_code icode
;
12711 const char *const name
;
12712 const enum ix86_builtins code
;
12713 const enum rtx_code comparison
;
12714 const unsigned int flag
;
12717 static const struct builtin_description bdesc_comi
[] =
12719 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12720 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12721 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12722 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12723 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12724 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12725 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12726 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12727 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12728 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12729 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12730 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12731 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12732 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12733 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12734 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12735 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12736 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12737 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12738 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12739 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12740 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12741 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12742 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12745 static const struct builtin_description bdesc_2arg
[] =
12748 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12749 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12750 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12751 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12752 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12753 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12754 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12755 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12757 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12758 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12759 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12760 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
12761 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
12762 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12763 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
12764 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
12765 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
12766 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
12767 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
12768 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
12769 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12770 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12771 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12772 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12773 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
12774 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
12775 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
12776 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12778 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12779 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12780 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12781 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12783 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12784 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12785 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12786 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12788 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12789 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12790 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12791 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12792 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12795 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12796 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12797 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12798 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
12799 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12800 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12801 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12802 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
12804 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12805 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12806 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12807 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12808 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12809 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12810 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12811 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12813 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12814 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12815 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12817 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12818 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12819 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12820 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12822 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12823 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12825 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12826 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12827 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12828 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12829 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12830 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12832 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12833 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12834 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12835 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12837 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12838 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12839 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12840 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12841 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12842 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12845 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12846 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12847 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12849 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12850 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12851 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
12853 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12854 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12855 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12856 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12857 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12858 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12860 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12861 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12862 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12863 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12864 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12865 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12867 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12868 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12869 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12870 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12872 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12873 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12876 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12877 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12878 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12879 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12880 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12881 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12882 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12883 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12885 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12886 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12887 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12888 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12889 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12890 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12891 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12892 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12893 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12894 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12895 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12896 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12897 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12898 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12899 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12900 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12901 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12902 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12903 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12904 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12906 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12907 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12908 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12909 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12911 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12912 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12913 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12914 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12916 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12917 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12918 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12921 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12922 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12923 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12924 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12925 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12926 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12927 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12928 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12930 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12931 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12932 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12933 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12934 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12935 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12936 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12937 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12939 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12940 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12942 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12943 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12944 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12945 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12947 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12948 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12950 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12951 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12952 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12953 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12954 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12955 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12957 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12958 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12959 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12960 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12962 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12963 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12964 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12965 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12966 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12967 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12968 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12969 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12971 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12972 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12973 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12975 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12976 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12978 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
12979 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12981 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12982 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12983 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12984 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12985 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12986 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12988 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12989 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12990 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12991 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12992 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12993 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12995 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12996 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12997 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12998 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
13000 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
13002 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
13003 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
13004 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
13005 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
13008 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
13009 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
13010 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
13011 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
13012 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
13013 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
13016 static const struct builtin_description bdesc_1arg
[] =
13018 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
13019 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
13021 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
13022 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
13023 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
13025 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
13026 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
13027 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
13028 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
13029 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
13030 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
13032 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
13033 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
13034 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
13035 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
13037 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
13039 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
13040 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
13042 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
13043 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
13044 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
13045 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
13046 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
13048 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
13050 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
13051 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
13052 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
13053 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
13055 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
13056 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
13057 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
13059 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
13062 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
13063 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
13064 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
13068 ix86_init_builtins (void)
13071 ix86_init_mmx_sse_builtins ();
13074 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13075 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13078 ix86_init_mmx_sse_builtins (void)
13080 const struct builtin_description
* d
;
13083 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
13084 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
13085 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
13086 tree V2DI_type_node
= build_vector_type_for_mode (intDI_type_node
, V2DImode
);
13087 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
13088 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
13089 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
13090 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
13091 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
13092 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
13094 tree pchar_type_node
= build_pointer_type (char_type_node
);
13095 tree pcchar_type_node
= build_pointer_type (
13096 build_type_variant (char_type_node
, 1, 0));
13097 tree pfloat_type_node
= build_pointer_type (float_type_node
);
13098 tree pcfloat_type_node
= build_pointer_type (
13099 build_type_variant (float_type_node
, 1, 0));
13100 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
13101 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
13102 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
13105 tree int_ftype_v4sf_v4sf
13106 = build_function_type_list (integer_type_node
,
13107 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13108 tree v4si_ftype_v4sf_v4sf
13109 = build_function_type_list (V4SI_type_node
,
13110 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13111 /* MMX/SSE/integer conversions. */
13112 tree int_ftype_v4sf
13113 = build_function_type_list (integer_type_node
,
13114 V4SF_type_node
, NULL_TREE
);
13115 tree int64_ftype_v4sf
13116 = build_function_type_list (long_long_integer_type_node
,
13117 V4SF_type_node
, NULL_TREE
);
13118 tree int_ftype_v8qi
13119 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
13120 tree v4sf_ftype_v4sf_int
13121 = build_function_type_list (V4SF_type_node
,
13122 V4SF_type_node
, integer_type_node
, NULL_TREE
);
13123 tree v4sf_ftype_v4sf_int64
13124 = build_function_type_list (V4SF_type_node
,
13125 V4SF_type_node
, long_long_integer_type_node
,
13127 tree v4sf_ftype_v4sf_v2si
13128 = build_function_type_list (V4SF_type_node
,
13129 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
13130 tree int_ftype_v4hi_int
13131 = build_function_type_list (integer_type_node
,
13132 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13133 tree v4hi_ftype_v4hi_int_int
13134 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
13135 integer_type_node
, integer_type_node
,
13137 /* Miscellaneous. */
13138 tree v8qi_ftype_v4hi_v4hi
13139 = build_function_type_list (V8QI_type_node
,
13140 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13141 tree v4hi_ftype_v2si_v2si
13142 = build_function_type_list (V4HI_type_node
,
13143 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13144 tree v4sf_ftype_v4sf_v4sf_int
13145 = build_function_type_list (V4SF_type_node
,
13146 V4SF_type_node
, V4SF_type_node
,
13147 integer_type_node
, NULL_TREE
);
13148 tree v2si_ftype_v4hi_v4hi
13149 = build_function_type_list (V2SI_type_node
,
13150 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13151 tree v4hi_ftype_v4hi_int
13152 = build_function_type_list (V4HI_type_node
,
13153 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13154 tree v4hi_ftype_v4hi_di
13155 = build_function_type_list (V4HI_type_node
,
13156 V4HI_type_node
, long_long_unsigned_type_node
,
13158 tree v2si_ftype_v2si_di
13159 = build_function_type_list (V2SI_type_node
,
13160 V2SI_type_node
, long_long_unsigned_type_node
,
13162 tree void_ftype_void
13163 = build_function_type (void_type_node
, void_list_node
);
13164 tree void_ftype_unsigned
13165 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
13166 tree void_ftype_unsigned_unsigned
13167 = build_function_type_list (void_type_node
, unsigned_type_node
,
13168 unsigned_type_node
, NULL_TREE
);
13169 tree void_ftype_pcvoid_unsigned_unsigned
13170 = build_function_type_list (void_type_node
, const_ptr_type_node
,
13171 unsigned_type_node
, unsigned_type_node
,
13173 tree unsigned_ftype_void
13174 = build_function_type (unsigned_type_node
, void_list_node
);
13176 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
13177 tree v4sf_ftype_void
13178 = build_function_type (V4SF_type_node
, void_list_node
);
13179 tree v2si_ftype_v4sf
13180 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
13181 /* Loads/stores. */
13182 tree void_ftype_v8qi_v8qi_pchar
13183 = build_function_type_list (void_type_node
,
13184 V8QI_type_node
, V8QI_type_node
,
13185 pchar_type_node
, NULL_TREE
);
13186 tree v4sf_ftype_pcfloat
13187 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
13188 /* @@@ the type is bogus */
13189 tree v4sf_ftype_v4sf_pv2si
13190 = build_function_type_list (V4SF_type_node
,
13191 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
13192 tree void_ftype_pv2si_v4sf
13193 = build_function_type_list (void_type_node
,
13194 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
13195 tree void_ftype_pfloat_v4sf
13196 = build_function_type_list (void_type_node
,
13197 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
13198 tree void_ftype_pdi_di
13199 = build_function_type_list (void_type_node
,
13200 pdi_type_node
, long_long_unsigned_type_node
,
13202 tree void_ftype_pv2di_v2di
13203 = build_function_type_list (void_type_node
,
13204 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
13205 /* Normal vector unops. */
13206 tree v4sf_ftype_v4sf
13207 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13209 /* Normal vector binops. */
13210 tree v4sf_ftype_v4sf_v4sf
13211 = build_function_type_list (V4SF_type_node
,
13212 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13213 tree v8qi_ftype_v8qi_v8qi
13214 = build_function_type_list (V8QI_type_node
,
13215 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13216 tree v4hi_ftype_v4hi_v4hi
13217 = build_function_type_list (V4HI_type_node
,
13218 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13219 tree v2si_ftype_v2si_v2si
13220 = build_function_type_list (V2SI_type_node
,
13221 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13222 tree di_ftype_di_di
13223 = build_function_type_list (long_long_unsigned_type_node
,
13224 long_long_unsigned_type_node
,
13225 long_long_unsigned_type_node
, NULL_TREE
);
13227 tree v2si_ftype_v2sf
13228 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
13229 tree v2sf_ftype_v2si
13230 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
13231 tree v2si_ftype_v2si
13232 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13233 tree v2sf_ftype_v2sf
13234 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13235 tree v2sf_ftype_v2sf_v2sf
13236 = build_function_type_list (V2SF_type_node
,
13237 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13238 tree v2si_ftype_v2sf_v2sf
13239 = build_function_type_list (V2SI_type_node
,
13240 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13241 tree pint_type_node
= build_pointer_type (integer_type_node
);
13242 tree pcint_type_node
= build_pointer_type (
13243 build_type_variant (integer_type_node
, 1, 0));
13244 tree pdouble_type_node
= build_pointer_type (double_type_node
);
13245 tree pcdouble_type_node
= build_pointer_type (
13246 build_type_variant (double_type_node
, 1, 0));
13247 tree int_ftype_v2df_v2df
13248 = build_function_type_list (integer_type_node
,
13249 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13252 = build_function_type (intTI_type_node
, void_list_node
);
13253 tree v2di_ftype_void
13254 = build_function_type (V2DI_type_node
, void_list_node
);
13255 tree ti_ftype_ti_ti
13256 = build_function_type_list (intTI_type_node
,
13257 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13258 tree void_ftype_pcvoid
13259 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13261 = build_function_type_list (V2DI_type_node
,
13262 long_long_unsigned_type_node
, NULL_TREE
);
13264 = build_function_type_list (long_long_unsigned_type_node
,
13265 V2DI_type_node
, NULL_TREE
);
13266 tree v4sf_ftype_v4si
13267 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13268 tree v4si_ftype_v4sf
13269 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13270 tree v2df_ftype_v4si
13271 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13272 tree v4si_ftype_v2df
13273 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13274 tree v2si_ftype_v2df
13275 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13276 tree v4sf_ftype_v2df
13277 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13278 tree v2df_ftype_v2si
13279 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13280 tree v2df_ftype_v4sf
13281 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13282 tree int_ftype_v2df
13283 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13284 tree int64_ftype_v2df
13285 = build_function_type_list (long_long_integer_type_node
,
13286 V2DF_type_node
, NULL_TREE
);
13287 tree v2df_ftype_v2df_int
13288 = build_function_type_list (V2DF_type_node
,
13289 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13290 tree v2df_ftype_v2df_int64
13291 = build_function_type_list (V2DF_type_node
,
13292 V2DF_type_node
, long_long_integer_type_node
,
13294 tree v4sf_ftype_v4sf_v2df
13295 = build_function_type_list (V4SF_type_node
,
13296 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13297 tree v2df_ftype_v2df_v4sf
13298 = build_function_type_list (V2DF_type_node
,
13299 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13300 tree v2df_ftype_v2df_v2df_int
13301 = build_function_type_list (V2DF_type_node
,
13302 V2DF_type_node
, V2DF_type_node
,
13305 tree v2df_ftype_v2df_pv2si
13306 = build_function_type_list (V2DF_type_node
,
13307 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
13308 tree void_ftype_pv2si_v2df
13309 = build_function_type_list (void_type_node
,
13310 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
13311 tree void_ftype_pdouble_v2df
13312 = build_function_type_list (void_type_node
,
13313 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13314 tree void_ftype_pint_int
13315 = build_function_type_list (void_type_node
,
13316 pint_type_node
, integer_type_node
, NULL_TREE
);
13317 tree void_ftype_v16qi_v16qi_pchar
13318 = build_function_type_list (void_type_node
,
13319 V16QI_type_node
, V16QI_type_node
,
13320 pchar_type_node
, NULL_TREE
);
13321 tree v2df_ftype_pcdouble
13322 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13323 tree v2df_ftype_v2df_v2df
13324 = build_function_type_list (V2DF_type_node
,
13325 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13326 tree v16qi_ftype_v16qi_v16qi
13327 = build_function_type_list (V16QI_type_node
,
13328 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13329 tree v8hi_ftype_v8hi_v8hi
13330 = build_function_type_list (V8HI_type_node
,
13331 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13332 tree v4si_ftype_v4si_v4si
13333 = build_function_type_list (V4SI_type_node
,
13334 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13335 tree v2di_ftype_v2di_v2di
13336 = build_function_type_list (V2DI_type_node
,
13337 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13338 tree v2di_ftype_v2df_v2df
13339 = build_function_type_list (V2DI_type_node
,
13340 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13341 tree v2df_ftype_v2df
13342 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13343 tree v2df_ftype_double
13344 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
13345 tree v2df_ftype_double_double
13346 = build_function_type_list (V2DF_type_node
,
13347 double_type_node
, double_type_node
, NULL_TREE
);
13348 tree int_ftype_v8hi_int
13349 = build_function_type_list (integer_type_node
,
13350 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13351 tree v8hi_ftype_v8hi_int_int
13352 = build_function_type_list (V8HI_type_node
,
13353 V8HI_type_node
, integer_type_node
,
13354 integer_type_node
, NULL_TREE
);
13355 tree v2di_ftype_v2di_int
13356 = build_function_type_list (V2DI_type_node
,
13357 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13358 tree v4si_ftype_v4si_int
13359 = build_function_type_list (V4SI_type_node
,
13360 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13361 tree v8hi_ftype_v8hi_int
13362 = build_function_type_list (V8HI_type_node
,
13363 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13364 tree v8hi_ftype_v8hi_v2di
13365 = build_function_type_list (V8HI_type_node
,
13366 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13367 tree v4si_ftype_v4si_v2di
13368 = build_function_type_list (V4SI_type_node
,
13369 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13370 tree v4si_ftype_v8hi_v8hi
13371 = build_function_type_list (V4SI_type_node
,
13372 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13373 tree di_ftype_v8qi_v8qi
13374 = build_function_type_list (long_long_unsigned_type_node
,
13375 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13376 tree di_ftype_v2si_v2si
13377 = build_function_type_list (long_long_unsigned_type_node
,
13378 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13379 tree v2di_ftype_v16qi_v16qi
13380 = build_function_type_list (V2DI_type_node
,
13381 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13382 tree v2di_ftype_v4si_v4si
13383 = build_function_type_list (V2DI_type_node
,
13384 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13385 tree int_ftype_v16qi
13386 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13387 tree v16qi_ftype_pcchar
13388 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13389 tree void_ftype_pchar_v16qi
13390 = build_function_type_list (void_type_node
,
13391 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13392 tree v4si_ftype_pcint
13393 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
13394 tree void_ftype_pcint_v4si
13395 = build_function_type_list (void_type_node
,
13396 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
13397 tree v2di_ftype_v2di
13398 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13401 tree float128_type
;
13403 /* The __float80 type. */
13404 if (TYPE_MODE (long_double_type_node
) == XFmode
)
13405 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
13409 /* The __float80 type. */
13410 float80_type
= make_node (REAL_TYPE
);
13411 TYPE_PRECISION (float80_type
) = 80;
13412 layout_type (float80_type
);
13413 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
13416 float128_type
= make_node (REAL_TYPE
);
13417 TYPE_PRECISION (float128_type
) = 128;
13418 layout_type (float128_type
);
13419 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
13421 /* Add all builtins that are more or less simple operations on two
13423 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13425 /* Use one of the operands; the target can have a different mode for
13426 mask-generating compares. */
13427 enum machine_mode mode
;
13432 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13437 type
= v16qi_ftype_v16qi_v16qi
;
13440 type
= v8hi_ftype_v8hi_v8hi
;
13443 type
= v4si_ftype_v4si_v4si
;
13446 type
= v2di_ftype_v2di_v2di
;
13449 type
= v2df_ftype_v2df_v2df
;
13452 type
= ti_ftype_ti_ti
;
13455 type
= v4sf_ftype_v4sf_v4sf
;
13458 type
= v8qi_ftype_v8qi_v8qi
;
13461 type
= v4hi_ftype_v4hi_v4hi
;
13464 type
= v2si_ftype_v2si_v2si
;
13467 type
= di_ftype_di_di
;
13474 /* Override for comparisons. */
13475 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13476 || d
->icode
== CODE_FOR_maskncmpv4sf3
13477 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13478 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
13479 type
= v4si_ftype_v4sf_v4sf
;
13481 if (d
->icode
== CODE_FOR_maskcmpv2df3
13482 || d
->icode
== CODE_FOR_maskncmpv2df3
13483 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13484 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13485 type
= v2di_ftype_v2df_v2df
;
13487 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13490 /* Add the remaining MMX insns with somewhat more complicated types. */
13491 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
13492 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13493 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13494 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
13495 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
13497 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
13498 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
13499 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
13501 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
13502 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
13504 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
13505 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
13507 /* comi/ucomi insns. */
13508 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13509 if (d
->mask
== MASK_SSE2
)
13510 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
13512 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
13514 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
13515 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
13516 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
13518 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
13519 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
13520 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
13521 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
13522 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
13523 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
13524 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
13525 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
13526 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
13527 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
13528 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
13530 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
13531 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
13533 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
13535 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
13536 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
13537 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
13538 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
13539 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
13540 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
13542 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
13543 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
13544 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
13545 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13547 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13548 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13549 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13550 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13552 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13554 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13556 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13557 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13558 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13559 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13560 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13561 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13563 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13565 /* Original 3DNow! */
13566 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13567 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13568 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13569 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13570 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13571 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13572 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13573 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13574 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13575 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13576 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13577 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13578 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13579 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13580 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13581 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13582 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13583 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13584 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13585 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13587 /* 3DNow! extension as used in the Athlon CPU. */
13588 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13589 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13590 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13591 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13592 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13593 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13595 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13598 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13599 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13601 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13602 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13603 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13605 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13606 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13607 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13608 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13609 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13610 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13612 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
13613 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
13614 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
13615 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
13617 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13618 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13619 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13620 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13621 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13623 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13624 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13625 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13626 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13628 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13629 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13631 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13633 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13634 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13636 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13637 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13638 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13639 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13640 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13642 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13644 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13645 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13646 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13647 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13649 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13650 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13651 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13653 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13654 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13655 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13656 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13658 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13659 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13660 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13661 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13662 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13663 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13664 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13666 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13667 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13668 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13670 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13671 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13672 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13673 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13674 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13675 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13676 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13678 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13680 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
13681 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
13683 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13684 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13685 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13687 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13688 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13689 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13691 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13692 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13694 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13695 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13696 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13697 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13699 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13700 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13701 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13702 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13704 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13705 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13707 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13709 /* Prescott New Instructions. */
13710 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
13711 void_ftype_pcvoid_unsigned_unsigned
,
13712 IX86_BUILTIN_MONITOR
);
13713 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
13714 void_ftype_unsigned_unsigned
,
13715 IX86_BUILTIN_MWAIT
);
13716 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
13718 IX86_BUILTIN_MOVSHDUP
);
13719 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
13721 IX86_BUILTIN_MOVSLDUP
);
13722 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
13723 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
13724 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
13725 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
13726 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
13727 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
13730 /* Errors in the source file can cause expand_expr to return const0_rtx
13731 where we expect a vector. To avoid crashing, use one of the vector
13732 clear instructions. */
13734 safe_vector_operand (rtx x
, enum machine_mode mode
)
13736 if (x
!= const0_rtx
)
13738 x
= gen_reg_rtx (mode
);
13740 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
13741 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
13742 : gen_rtx_SUBREG (DImode
, x
, 0)));
13744 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
13745 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
13746 CONST0_RTX (V4SFmode
)));
13750 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13753 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13756 tree arg0
= TREE_VALUE (arglist
);
13757 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13758 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13759 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13760 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13761 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13762 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13764 if (VECTOR_MODE_P (mode0
))
13765 op0
= safe_vector_operand (op0
, mode0
);
13766 if (VECTOR_MODE_P (mode1
))
13767 op1
= safe_vector_operand (op1
, mode1
);
13770 || GET_MODE (target
) != tmode
13771 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13772 target
= gen_reg_rtx (tmode
);
13774 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
13776 rtx x
= gen_reg_rtx (V4SImode
);
13777 emit_insn (gen_sse2_loadd (x
, op1
));
13778 op1
= gen_lowpart (TImode
, x
);
13781 /* In case the insn wants input operands in modes different from
13782 the result, abort. */
13783 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
13784 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
13787 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13788 op0
= copy_to_mode_reg (mode0
, op0
);
13789 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13790 op1
= copy_to_mode_reg (mode1
, op1
);
13792 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13793 yet one of the two must not be a memory. This is normally enforced
13794 by expanders, but we didn't bother to create one here. */
13795 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
13796 op0
= copy_to_mode_reg (mode0
, op0
);
13798 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13805 /* Subroutine of ix86_expand_builtin to take care of stores. */
13808 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
13811 tree arg0
= TREE_VALUE (arglist
);
13812 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13813 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13814 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13815 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13816 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13818 if (VECTOR_MODE_P (mode1
))
13819 op1
= safe_vector_operand (op1
, mode1
);
13821 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13822 op1
= copy_to_mode_reg (mode1
, op1
);
13824 pat
= GEN_FCN (icode
) (op0
, op1
);
13830 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13833 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
13834 rtx target
, int do_load
)
13837 tree arg0
= TREE_VALUE (arglist
);
13838 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13839 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13840 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13843 || GET_MODE (target
) != tmode
13844 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13845 target
= gen_reg_rtx (tmode
);
13847 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13850 if (VECTOR_MODE_P (mode0
))
13851 op0
= safe_vector_operand (op0
, mode0
);
13853 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13854 op0
= copy_to_mode_reg (mode0
, op0
);
13857 pat
= GEN_FCN (icode
) (target
, op0
);
13864 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13865 sqrtss, rsqrtss, rcpss. */
13868 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13871 tree arg0
= TREE_VALUE (arglist
);
13872 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13873 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13874 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13877 || GET_MODE (target
) != tmode
13878 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13879 target
= gen_reg_rtx (tmode
);
13881 if (VECTOR_MODE_P (mode0
))
13882 op0
= safe_vector_operand (op0
, mode0
);
13884 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13885 op0
= copy_to_mode_reg (mode0
, op0
);
13888 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13889 op1
= copy_to_mode_reg (mode0
, op1
);
13891 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13898 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13901 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
13905 tree arg0
= TREE_VALUE (arglist
);
13906 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13907 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13908 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13910 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13911 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13912 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13913 enum rtx_code comparison
= d
->comparison
;
13915 if (VECTOR_MODE_P (mode0
))
13916 op0
= safe_vector_operand (op0
, mode0
);
13917 if (VECTOR_MODE_P (mode1
))
13918 op1
= safe_vector_operand (op1
, mode1
);
13920 /* Swap operands if we have a comparison that isn't available in
13924 rtx tmp
= gen_reg_rtx (mode1
);
13925 emit_move_insn (tmp
, op1
);
13931 || GET_MODE (target
) != tmode
13932 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13933 target
= gen_reg_rtx (tmode
);
13935 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13936 op0
= copy_to_mode_reg (mode0
, op0
);
13937 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13938 op1
= copy_to_mode_reg (mode1
, op1
);
13940 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13941 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13948 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13951 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
13955 tree arg0
= TREE_VALUE (arglist
);
13956 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13957 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13958 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13960 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13961 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13962 enum rtx_code comparison
= d
->comparison
;
13964 if (VECTOR_MODE_P (mode0
))
13965 op0
= safe_vector_operand (op0
, mode0
);
13966 if (VECTOR_MODE_P (mode1
))
13967 op1
= safe_vector_operand (op1
, mode1
);
13969 /* Swap operands if we have a comparison that isn't available in
13978 target
= gen_reg_rtx (SImode
);
13979 emit_move_insn (target
, const0_rtx
);
13980 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13982 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13983 op0
= copy_to_mode_reg (mode0
, op0
);
13984 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13985 op1
= copy_to_mode_reg (mode1
, op1
);
13987 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13988 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13992 emit_insn (gen_rtx_SET (VOIDmode
,
13993 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13994 gen_rtx_fmt_ee (comparison
, QImode
,
13998 return SUBREG_REG (target
);
14001 /* Expand an expression EXP that calls a built-in function,
14002 with result going to TARGET if that's convenient
14003 (and in mode MODE if that's convenient).
14004 SUBTARGET may be used as the target for computing one of EXP's operands.
14005 IGNORE is nonzero if the value is to be ignored. */
14008 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
14009 enum machine_mode mode ATTRIBUTE_UNUSED
,
14010 int ignore ATTRIBUTE_UNUSED
)
14012 const struct builtin_description
*d
;
14014 enum insn_code icode
;
14015 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
14016 tree arglist
= TREE_OPERAND (exp
, 1);
14017 tree arg0
, arg1
, arg2
;
14018 rtx op0
, op1
, op2
, pat
;
14019 enum machine_mode tmode
, mode0
, mode1
, mode2
;
14020 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
14024 case IX86_BUILTIN_EMMS
:
14025 emit_insn (gen_emms ());
14028 case IX86_BUILTIN_SFENCE
:
14029 emit_insn (gen_sfence ());
14032 case IX86_BUILTIN_PEXTRW
:
14033 case IX86_BUILTIN_PEXTRW128
:
14034 icode
= (fcode
== IX86_BUILTIN_PEXTRW
14035 ? CODE_FOR_mmx_pextrw
14036 : CODE_FOR_sse2_pextrw
);
14037 arg0
= TREE_VALUE (arglist
);
14038 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14039 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14040 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14041 tmode
= insn_data
[icode
].operand
[0].mode
;
14042 mode0
= insn_data
[icode
].operand
[1].mode
;
14043 mode1
= insn_data
[icode
].operand
[2].mode
;
14045 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14046 op0
= copy_to_mode_reg (mode0
, op0
);
14047 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14049 error ("selector must be an integer constant in the range 0..%i",
14050 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
14051 return gen_reg_rtx (tmode
);
14054 || GET_MODE (target
) != tmode
14055 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14056 target
= gen_reg_rtx (tmode
);
14057 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14063 case IX86_BUILTIN_PINSRW
:
14064 case IX86_BUILTIN_PINSRW128
:
14065 icode
= (fcode
== IX86_BUILTIN_PINSRW
14066 ? CODE_FOR_mmx_pinsrw
14067 : CODE_FOR_sse2_pinsrw
);
14068 arg0
= TREE_VALUE (arglist
);
14069 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14070 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14071 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14072 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14073 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14074 tmode
= insn_data
[icode
].operand
[0].mode
;
14075 mode0
= insn_data
[icode
].operand
[1].mode
;
14076 mode1
= insn_data
[icode
].operand
[2].mode
;
14077 mode2
= insn_data
[icode
].operand
[3].mode
;
14079 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14080 op0
= copy_to_mode_reg (mode0
, op0
);
14081 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14082 op1
= copy_to_mode_reg (mode1
, op1
);
14083 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14085 error ("selector must be an integer constant in the range 0..%i",
14086 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
14090 || GET_MODE (target
) != tmode
14091 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14092 target
= gen_reg_rtx (tmode
);
14093 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14099 case IX86_BUILTIN_MASKMOVQ
:
14100 case IX86_BUILTIN_MASKMOVDQU
:
14101 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
14102 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
14103 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
14104 : CODE_FOR_sse2_maskmovdqu
));
14105 /* Note the arg order is different from the operand order. */
14106 arg1
= TREE_VALUE (arglist
);
14107 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
14108 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14109 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14110 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14111 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14112 mode0
= insn_data
[icode
].operand
[0].mode
;
14113 mode1
= insn_data
[icode
].operand
[1].mode
;
14114 mode2
= insn_data
[icode
].operand
[2].mode
;
14116 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14117 op0
= copy_to_mode_reg (mode0
, op0
);
14118 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14119 op1
= copy_to_mode_reg (mode1
, op1
);
14120 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
14121 op2
= copy_to_mode_reg (mode2
, op2
);
14122 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
14128 case IX86_BUILTIN_SQRTSS
:
14129 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
14130 case IX86_BUILTIN_RSQRTSS
:
14131 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
14132 case IX86_BUILTIN_RCPSS
:
14133 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
14135 case IX86_BUILTIN_LOADAPS
:
14136 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
14138 case IX86_BUILTIN_LOADUPS
:
14139 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
14141 case IX86_BUILTIN_STOREAPS
:
14142 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
14144 case IX86_BUILTIN_STOREUPS
:
14145 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
14147 case IX86_BUILTIN_LOADSS
:
14148 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
14150 case IX86_BUILTIN_STORESS
:
14151 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
14153 case IX86_BUILTIN_LOADHPS
:
14154 case IX86_BUILTIN_LOADLPS
:
14155 case IX86_BUILTIN_LOADHPD
:
14156 case IX86_BUILTIN_LOADLPD
:
14157 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
14158 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
14159 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
14160 : CODE_FOR_sse2_movsd
);
14161 arg0
= TREE_VALUE (arglist
);
14162 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14163 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14164 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14165 tmode
= insn_data
[icode
].operand
[0].mode
;
14166 mode0
= insn_data
[icode
].operand
[1].mode
;
14167 mode1
= insn_data
[icode
].operand
[2].mode
;
14169 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14170 op0
= copy_to_mode_reg (mode0
, op0
);
14171 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
14173 || GET_MODE (target
) != tmode
14174 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14175 target
= gen_reg_rtx (tmode
);
14176 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14182 case IX86_BUILTIN_STOREHPS
:
14183 case IX86_BUILTIN_STORELPS
:
14184 case IX86_BUILTIN_STOREHPD
:
14185 case IX86_BUILTIN_STORELPD
:
14186 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
14187 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
14188 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
14189 : CODE_FOR_sse2_movsd
);
14190 arg0
= TREE_VALUE (arglist
);
14191 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14192 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14193 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14194 mode0
= insn_data
[icode
].operand
[1].mode
;
14195 mode1
= insn_data
[icode
].operand
[2].mode
;
14197 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14198 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14199 op1
= copy_to_mode_reg (mode1
, op1
);
14201 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
14207 case IX86_BUILTIN_MOVNTPS
:
14208 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
14209 case IX86_BUILTIN_MOVNTQ
:
14210 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
14212 case IX86_BUILTIN_LDMXCSR
:
14213 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14214 target
= assign_386_stack_local (SImode
, 0);
14215 emit_move_insn (target
, op0
);
14216 emit_insn (gen_ldmxcsr (target
));
14219 case IX86_BUILTIN_STMXCSR
:
14220 target
= assign_386_stack_local (SImode
, 0);
14221 emit_insn (gen_stmxcsr (target
));
14222 return copy_to_mode_reg (SImode
, target
);
14224 case IX86_BUILTIN_SHUFPS
:
14225 case IX86_BUILTIN_SHUFPD
:
14226 icode
= (fcode
== IX86_BUILTIN_SHUFPS
14227 ? CODE_FOR_sse_shufps
14228 : CODE_FOR_sse2_shufpd
);
14229 arg0
= TREE_VALUE (arglist
);
14230 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14231 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14232 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14233 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14234 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14235 tmode
= insn_data
[icode
].operand
[0].mode
;
14236 mode0
= insn_data
[icode
].operand
[1].mode
;
14237 mode1
= insn_data
[icode
].operand
[2].mode
;
14238 mode2
= insn_data
[icode
].operand
[3].mode
;
14240 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14241 op0
= copy_to_mode_reg (mode0
, op0
);
14242 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14243 op1
= copy_to_mode_reg (mode1
, op1
);
14244 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14246 /* @@@ better error message */
14247 error ("mask must be an immediate");
14248 return gen_reg_rtx (tmode
);
14251 || GET_MODE (target
) != tmode
14252 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14253 target
= gen_reg_rtx (tmode
);
14254 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14260 case IX86_BUILTIN_PSHUFW
:
14261 case IX86_BUILTIN_PSHUFD
:
14262 case IX86_BUILTIN_PSHUFHW
:
14263 case IX86_BUILTIN_PSHUFLW
:
14264 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
14265 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
14266 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
14267 : CODE_FOR_mmx_pshufw
);
14268 arg0
= TREE_VALUE (arglist
);
14269 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14270 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14271 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14272 tmode
= insn_data
[icode
].operand
[0].mode
;
14273 mode1
= insn_data
[icode
].operand
[1].mode
;
14274 mode2
= insn_data
[icode
].operand
[2].mode
;
14276 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14277 op0
= copy_to_mode_reg (mode1
, op0
);
14278 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14280 /* @@@ better error message */
14281 error ("mask must be an immediate");
14285 || GET_MODE (target
) != tmode
14286 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14287 target
= gen_reg_rtx (tmode
);
14288 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14294 case IX86_BUILTIN_PSLLDQI128
:
14295 case IX86_BUILTIN_PSRLDQI128
:
14296 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
14297 : CODE_FOR_sse2_lshrti3
);
14298 arg0
= TREE_VALUE (arglist
);
14299 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14300 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14301 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14302 tmode
= insn_data
[icode
].operand
[0].mode
;
14303 mode1
= insn_data
[icode
].operand
[1].mode
;
14304 mode2
= insn_data
[icode
].operand
[2].mode
;
14306 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14308 op0
= copy_to_reg (op0
);
14309 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14311 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14313 error ("shift must be an immediate");
14316 target
= gen_reg_rtx (V2DImode
);
14317 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14323 case IX86_BUILTIN_FEMMS
:
14324 emit_insn (gen_femms ());
14327 case IX86_BUILTIN_PAVGUSB
:
14328 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
14330 case IX86_BUILTIN_PF2ID
:
14331 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
14333 case IX86_BUILTIN_PFACC
:
14334 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
14336 case IX86_BUILTIN_PFADD
:
14337 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
14339 case IX86_BUILTIN_PFCMPEQ
:
14340 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
14342 case IX86_BUILTIN_PFCMPGE
:
14343 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
14345 case IX86_BUILTIN_PFCMPGT
:
14346 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
14348 case IX86_BUILTIN_PFMAX
:
14349 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
14351 case IX86_BUILTIN_PFMIN
:
14352 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
14354 case IX86_BUILTIN_PFMUL
:
14355 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
14357 case IX86_BUILTIN_PFRCP
:
14358 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
14360 case IX86_BUILTIN_PFRCPIT1
:
14361 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
14363 case IX86_BUILTIN_PFRCPIT2
:
14364 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
14366 case IX86_BUILTIN_PFRSQIT1
:
14367 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
14369 case IX86_BUILTIN_PFRSQRT
:
14370 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
14372 case IX86_BUILTIN_PFSUB
:
14373 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
14375 case IX86_BUILTIN_PFSUBR
:
14376 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
14378 case IX86_BUILTIN_PI2FD
:
14379 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
14381 case IX86_BUILTIN_PMULHRW
:
14382 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
14384 case IX86_BUILTIN_PF2IW
:
14385 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
14387 case IX86_BUILTIN_PFNACC
:
14388 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
14390 case IX86_BUILTIN_PFPNACC
:
14391 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
14393 case IX86_BUILTIN_PI2FW
:
14394 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
14396 case IX86_BUILTIN_PSWAPDSI
:
14397 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
14399 case IX86_BUILTIN_PSWAPDSF
:
14400 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
14402 case IX86_BUILTIN_SSE_ZERO
:
14403 target
= gen_reg_rtx (V4SFmode
);
14404 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
14407 case IX86_BUILTIN_MMX_ZERO
:
14408 target
= gen_reg_rtx (DImode
);
14409 emit_insn (gen_mmx_clrdi (target
));
14412 case IX86_BUILTIN_CLRTI
:
14413 target
= gen_reg_rtx (V2DImode
);
14414 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
14418 case IX86_BUILTIN_SQRTSD
:
14419 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
14420 case IX86_BUILTIN_LOADAPD
:
14421 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
14422 case IX86_BUILTIN_LOADUPD
:
14423 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14425 case IX86_BUILTIN_STOREAPD
:
14426 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14427 case IX86_BUILTIN_STOREUPD
:
14428 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14430 case IX86_BUILTIN_LOADSD
:
14431 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
14433 case IX86_BUILTIN_STORESD
:
14434 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
14436 case IX86_BUILTIN_SETPD1
:
14437 target
= assign_386_stack_local (DFmode
, 0);
14438 arg0
= TREE_VALUE (arglist
);
14439 emit_move_insn (adjust_address (target
, DFmode
, 0),
14440 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14441 op0
= gen_reg_rtx (V2DFmode
);
14442 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
14443 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
14446 case IX86_BUILTIN_SETPD
:
14447 target
= assign_386_stack_local (V2DFmode
, 0);
14448 arg0
= TREE_VALUE (arglist
);
14449 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14450 emit_move_insn (adjust_address (target
, DFmode
, 0),
14451 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14452 emit_move_insn (adjust_address (target
, DFmode
, 8),
14453 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
14454 op0
= gen_reg_rtx (V2DFmode
);
14455 emit_insn (gen_sse2_movapd (op0
, target
));
14458 case IX86_BUILTIN_LOADRPD
:
14459 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
14460 gen_reg_rtx (V2DFmode
), 1);
14461 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
14464 case IX86_BUILTIN_LOADPD1
:
14465 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
14466 gen_reg_rtx (V2DFmode
), 1);
14467 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
14470 case IX86_BUILTIN_STOREPD1
:
14471 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14472 case IX86_BUILTIN_STORERPD
:
14473 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14475 case IX86_BUILTIN_CLRPD
:
14476 target
= gen_reg_rtx (V2DFmode
);
14477 emit_insn (gen_sse_clrv2df (target
));
14480 case IX86_BUILTIN_MFENCE
:
14481 emit_insn (gen_sse2_mfence ());
14483 case IX86_BUILTIN_LFENCE
:
14484 emit_insn (gen_sse2_lfence ());
14487 case IX86_BUILTIN_CLFLUSH
:
14488 arg0
= TREE_VALUE (arglist
);
14489 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14490 icode
= CODE_FOR_sse2_clflush
;
14491 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
14492 op0
= copy_to_mode_reg (Pmode
, op0
);
14494 emit_insn (gen_sse2_clflush (op0
));
14497 case IX86_BUILTIN_MOVNTPD
:
14498 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
14499 case IX86_BUILTIN_MOVNTDQ
:
14500 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
14501 case IX86_BUILTIN_MOVNTI
:
14502 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
14504 case IX86_BUILTIN_LOADDQA
:
14505 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
14506 case IX86_BUILTIN_LOADDQU
:
14507 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
14508 case IX86_BUILTIN_LOADD
:
14509 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
14511 case IX86_BUILTIN_STOREDQA
:
14512 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
14513 case IX86_BUILTIN_STOREDQU
:
14514 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
14515 case IX86_BUILTIN_STORED
:
14516 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
14518 case IX86_BUILTIN_MONITOR
:
14519 arg0
= TREE_VALUE (arglist
);
14520 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14521 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14522 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14523 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14524 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14526 op0
= copy_to_mode_reg (SImode
, op0
);
14528 op1
= copy_to_mode_reg (SImode
, op1
);
14530 op2
= copy_to_mode_reg (SImode
, op2
);
14531 emit_insn (gen_monitor (op0
, op1
, op2
));
14534 case IX86_BUILTIN_MWAIT
:
14535 arg0
= TREE_VALUE (arglist
);
14536 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14537 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14538 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14540 op0
= copy_to_mode_reg (SImode
, op0
);
14542 op1
= copy_to_mode_reg (SImode
, op1
);
14543 emit_insn (gen_mwait (op0
, op1
));
14546 case IX86_BUILTIN_LOADDDUP
:
14547 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
14549 case IX86_BUILTIN_LDDQU
:
14550 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
14557 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14558 if (d
->code
== fcode
)
14560 /* Compares are treated specially. */
14561 if (d
->icode
== CODE_FOR_maskcmpv4sf3
14562 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
14563 || d
->icode
== CODE_FOR_maskncmpv4sf3
14564 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
14565 || d
->icode
== CODE_FOR_maskcmpv2df3
14566 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
14567 || d
->icode
== CODE_FOR_maskncmpv2df3
14568 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
14569 return ix86_expand_sse_compare (d
, arglist
, target
);
14571 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14574 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14575 if (d
->code
== fcode
)
14576 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14578 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14579 if (d
->code
== fcode
)
14580 return ix86_expand_sse_comi (d
, arglist
, target
);
14582 /* @@@ Should really do something sensible here. */
14586 /* Store OPERAND to the memory after reload is completed. This means
14587 that we can't easily use assign_stack_local. */
14589 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
14592 if (!reload_completed
)
14594 if (TARGET_RED_ZONE
)
14596 result
= gen_rtx_MEM (mode
,
14597 gen_rtx_PLUS (Pmode
,
14599 GEN_INT (-RED_ZONE_SIZE
)));
14600 emit_move_insn (result
, operand
);
14602 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
14608 operand
= gen_lowpart (DImode
, operand
);
14612 gen_rtx_SET (VOIDmode
,
14613 gen_rtx_MEM (DImode
,
14614 gen_rtx_PRE_DEC (DImode
,
14615 stack_pointer_rtx
)),
14621 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14630 split_di (&operand
, 1, operands
, operands
+ 1);
14632 gen_rtx_SET (VOIDmode
,
14633 gen_rtx_MEM (SImode
,
14634 gen_rtx_PRE_DEC (Pmode
,
14635 stack_pointer_rtx
)),
14638 gen_rtx_SET (VOIDmode
,
14639 gen_rtx_MEM (SImode
,
14640 gen_rtx_PRE_DEC (Pmode
,
14641 stack_pointer_rtx
)),
14646 /* It is better to store HImodes as SImodes. */
14647 if (!TARGET_PARTIAL_REG_STALL
)
14648 operand
= gen_lowpart (SImode
, operand
);
14652 gen_rtx_SET (VOIDmode
,
14653 gen_rtx_MEM (GET_MODE (operand
),
14654 gen_rtx_PRE_DEC (SImode
,
14655 stack_pointer_rtx
)),
14661 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14666 /* Free operand from the memory. */
14668 ix86_free_from_memory (enum machine_mode mode
)
14670 if (!TARGET_RED_ZONE
)
14674 if (mode
== DImode
|| TARGET_64BIT
)
14676 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14680 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14681 to pop or add instruction if registers are available. */
14682 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14683 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14688 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14689 QImode must go into class Q_REGS.
14690 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14691 movdf to do mem-to-mem moves through integer regs. */
14693 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
14695 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14697 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14699 /* SSE can't load any constant directly yet. */
14700 if (SSE_CLASS_P (class))
14702 /* Floats can load 0 and 1. */
14703 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14705 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14706 if (MAYBE_SSE_CLASS_P (class))
14707 return (reg_class_subset_p (class, GENERAL_REGS
)
14708 ? GENERAL_REGS
: FLOAT_REGS
);
14712 /* General regs can load everything. */
14713 if (reg_class_subset_p (class, GENERAL_REGS
))
14714 return GENERAL_REGS
;
14715 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14716 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14719 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14721 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14726 /* If we are copying between general and FP registers, we need a memory
14727 location. The same is true for SSE and MMX registers.
14729 The macro can't work reliably when one of the CLASSES is class containing
14730 registers from multiple units (SSE, MMX, integer). We avoid this by never
14731 combining those units in single alternative in the machine description.
14732 Ensure that this constraint holds to avoid unexpected surprises.
14734 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14735 enforce these sanity checks. */
14737 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
14738 enum machine_mode mode
, int strict
)
14740 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14741 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14742 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14743 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14744 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14745 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14752 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14753 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14754 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
14755 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
14756 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
14758 /* Return the cost of moving data from a register in class CLASS1 to
14759 one in class CLASS2.
14761 It is not required that the cost always equal 2 when FROM is the same as TO;
14762 on some machines it is expensive to move between registers if they are not
14763 general registers. */
14765 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
14766 enum reg_class class2
)
14768 /* In case we require secondary memory, compute cost of the store followed
14769 by load. In order to avoid bad register allocation choices, we need
14770 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14772 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14776 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14777 MEMORY_MOVE_COST (mode
, class1
, 1));
14778 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14779 MEMORY_MOVE_COST (mode
, class2
, 1));
14781 /* In case of copying from general_purpose_register we may emit multiple
14782 stores followed by single load causing memory size mismatch stall.
14783 Count this as arbitrarily high cost of 20. */
14784 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14787 /* In the case of FP/MMX moves, the registers actually overlap, and we
14788 have to switch modes in order to treat them differently. */
14789 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14790 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14796 /* Moves between SSE/MMX and integer unit are expensive. */
14797 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14798 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14799 return ix86_cost
->mmxsse_to_integer
;
14800 if (MAYBE_FLOAT_CLASS_P (class1
))
14801 return ix86_cost
->fp_move
;
14802 if (MAYBE_SSE_CLASS_P (class1
))
14803 return ix86_cost
->sse_move
;
14804 if (MAYBE_MMX_CLASS_P (class1
))
14805 return ix86_cost
->mmx_move
;
14809 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14811 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
14813 /* Flags and only flags can only hold CCmode values. */
14814 if (CC_REGNO_P (regno
))
14815 return GET_MODE_CLASS (mode
) == MODE_CC
;
14816 if (GET_MODE_CLASS (mode
) == MODE_CC
14817 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14818 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14820 if (FP_REGNO_P (regno
))
14821 return VALID_FP_MODE_P (mode
);
14822 if (SSE_REGNO_P (regno
))
14823 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
14824 if (MMX_REGNO_P (regno
))
14826 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
14827 /* We handle both integer and floats in the general purpose registers.
14828 In future we should be able to handle vector modes as well. */
14829 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14831 /* Take care for QImode values - they can be in non-QI regs, but then
14832 they do cause partial register stalls. */
14833 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14835 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14838 /* Return the cost of moving data of mode M between a
14839 register and memory. A value of 2 is the default; this cost is
14840 relative to those in `REGISTER_MOVE_COST'.
14842 If moving between registers and memory is more expensive than
14843 between two registers, you should define this macro to express the
14846 Model also increased moving costs of QImode registers in non
14850 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
14852 if (FLOAT_CLASS_P (class))
14869 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14871 if (SSE_CLASS_P (class))
14874 switch (GET_MODE_SIZE (mode
))
14888 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14890 if (MMX_CLASS_P (class))
14893 switch (GET_MODE_SIZE (mode
))
14904 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14906 switch (GET_MODE_SIZE (mode
))
14910 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14911 : ix86_cost
->movzbl_load
);
14913 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14914 : ix86_cost
->int_store
[0] + 4);
14917 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14919 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14920 if (mode
== TFmode
)
14922 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14923 * (((int) GET_MODE_SIZE (mode
)
14924 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
14928 /* Compute a (partial) cost for rtx X. Return true if the complete
14929 cost has been computed, and false if subexpressions should be
14930 scanned. In either case, *TOTAL contains the cost result. */
14933 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
14935 enum machine_mode mode
= GET_MODE (x
);
14943 if (TARGET_64BIT
&& !x86_64_sign_extended_value (x
))
14945 else if (TARGET_64BIT
&& !x86_64_zero_extended_value (x
))
14947 else if (flag_pic
&& SYMBOLIC_CONST (x
)
14949 || (!GET_CODE (x
) != LABEL_REF
14950 && (GET_CODE (x
) != SYMBOL_REF
14951 || !SYMBOL_REF_LOCAL_P (x
)))))
14958 if (mode
== VOIDmode
)
14961 switch (standard_80387_constant_p (x
))
14966 default: /* Other constants */
14971 /* Start with (MEM (SYMBOL_REF)), since that's where
14972 it'll probably end up. Add a penalty for size. */
14973 *total
= (COSTS_N_INSNS (1)
14974 + (flag_pic
!= 0 && !TARGET_64BIT
)
14975 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
14981 /* The zero extensions is often completely free on x86_64, so make
14982 it as cheap as possible. */
14983 if (TARGET_64BIT
&& mode
== DImode
14984 && GET_MODE (XEXP (x
, 0)) == SImode
)
14986 else if (TARGET_ZERO_EXTEND_WITH_AND
)
14987 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14989 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
14993 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
14997 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
14998 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
15000 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15003 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15006 if ((value
== 2 || value
== 3)
15007 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
15009 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15019 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
15021 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15023 if (INTVAL (XEXP (x
, 1)) > 32)
15024 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
15026 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
15030 if (GET_CODE (XEXP (x
, 1)) == AND
)
15031 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
15033 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
15038 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15039 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
15041 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
15046 if (FLOAT_MODE_P (mode
))
15048 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
15053 rtx op0
= XEXP (x
, 0);
15054 rtx op1
= XEXP (x
, 1);
15056 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15058 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15059 for (nbits
= 0; value
!= 0; value
&= value
- 1)
15063 /* This is arbitrary. */
15066 /* Compute costs correctly for widening multiplication. */
15067 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
15068 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
15069 == GET_MODE_SIZE (mode
))
15071 int is_mulwiden
= 0;
15072 enum machine_mode inner_mode
= GET_MODE (op0
);
15074 if (GET_CODE (op0
) == GET_CODE (op1
))
15075 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
15076 else if (GET_CODE (op1
) == CONST_INT
)
15078 if (GET_CODE (op0
) == SIGN_EXTEND
)
15079 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
15082 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
15086 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
15089 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
15090 + nbits
* ix86_cost
->mult_bit
)
15091 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
15100 if (FLOAT_MODE_P (mode
))
15101 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
15103 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
15107 if (FLOAT_MODE_P (mode
))
15108 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15109 else if (GET_MODE_CLASS (mode
) == MODE_INT
15110 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
15112 if (GET_CODE (XEXP (x
, 0)) == PLUS
15113 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
15114 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
15115 && CONSTANT_P (XEXP (x
, 1)))
15117 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
15118 if (val
== 2 || val
== 4 || val
== 8)
15120 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15121 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15122 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
15124 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15128 else if (GET_CODE (XEXP (x
, 0)) == MULT
15129 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
15131 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
15132 if (val
== 2 || val
== 4 || val
== 8)
15134 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15135 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15136 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15140 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
15142 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15143 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15144 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15145 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15152 if (FLOAT_MODE_P (mode
))
15154 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15162 if (!TARGET_64BIT
&& mode
== DImode
)
15164 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
15165 + (rtx_cost (XEXP (x
, 0), outer_code
)
15166 << (GET_MODE (XEXP (x
, 0)) != DImode
))
15167 + (rtx_cost (XEXP (x
, 1), outer_code
)
15168 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
15174 if (FLOAT_MODE_P (mode
))
15176 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
15182 if (!TARGET_64BIT
&& mode
== DImode
)
15183 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
15185 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15189 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
15194 if (FLOAT_MODE_P (mode
))
15195 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
15199 if (FLOAT_MODE_P (mode
))
15200 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
15204 if (XINT (x
, 1) == UNSPEC_TP
)
15213 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15215 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
15218 fputs ("\tpushl $", asm_out_file
);
15219 assemble_name (asm_out_file
, XSTR (symbol
, 0));
15220 fputc ('\n', asm_out_file
);
15226 static int current_machopic_label_num
;
15228 /* Given a symbol name and its associated stub, write out the
15229 definition of the stub. */
15232 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
15234 unsigned int length
;
15235 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
15236 int label
= ++current_machopic_label_num
;
15238 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15239 symb
= (*targetm
.strip_name_encoding
) (symb
);
15241 length
= strlen (stub
);
15242 binder_name
= alloca (length
+ 32);
15243 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
15245 length
= strlen (symb
);
15246 symbol_name
= alloca (length
+ 32);
15247 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
15249 sprintf (lazy_ptr_name
, "L%d$lz", label
);
15252 machopic_picsymbol_stub_section ();
15254 machopic_symbol_stub_section ();
15256 fprintf (file
, "%s:\n", stub
);
15257 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15261 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
15262 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
15263 fprintf (file
, "\tjmp %%edx\n");
15266 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
15268 fprintf (file
, "%s:\n", binder_name
);
15272 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
15273 fprintf (file
, "\tpushl %%eax\n");
15276 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
15278 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
15280 machopic_lazy_symbol_ptr_section ();
15281 fprintf (file
, "%s:\n", lazy_ptr_name
);
15282 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15283 fprintf (file
, "\t.long %s\n", binder_name
);
15285 #endif /* TARGET_MACHO */
15287 /* Order the registers for register allocator. */
15290 x86_order_regs_for_local_alloc (void)
15295 /* First allocate the local general purpose registers. */
15296 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15297 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
15298 reg_alloc_order
[pos
++] = i
;
15300 /* Global general purpose registers. */
15301 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15302 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
15303 reg_alloc_order
[pos
++] = i
;
15305 /* x87 registers come first in case we are doing FP math
15307 if (!TARGET_SSE_MATH
)
15308 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15309 reg_alloc_order
[pos
++] = i
;
15311 /* SSE registers. */
15312 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15313 reg_alloc_order
[pos
++] = i
;
15314 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15315 reg_alloc_order
[pos
++] = i
;
15317 /* x87 registers. */
15318 if (TARGET_SSE_MATH
)
15319 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15320 reg_alloc_order
[pos
++] = i
;
15322 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
15323 reg_alloc_order
[pos
++] = i
;
15325 /* Initialize the rest of array as we do not allocate some registers
15327 while (pos
< FIRST_PSEUDO_REGISTER
)
15328 reg_alloc_order
[pos
++] = 0;
15331 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15332 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15335 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15336 struct attribute_spec.handler. */
15338 ix86_handle_struct_attribute (tree
*node
, tree name
,
15339 tree args ATTRIBUTE_UNUSED
,
15340 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
15343 if (DECL_P (*node
))
15345 if (TREE_CODE (*node
) == TYPE_DECL
)
15346 type
= &TREE_TYPE (*node
);
15351 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
15352 || TREE_CODE (*type
) == UNION_TYPE
)))
15354 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
15355 *no_add_attrs
= true;
15358 else if ((is_attribute_p ("ms_struct", name
)
15359 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
15360 || ((is_attribute_p ("gcc_struct", name
)
15361 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
15363 warning ("`%s' incompatible attribute ignored",
15364 IDENTIFIER_POINTER (name
));
15365 *no_add_attrs
= true;
15372 ix86_ms_bitfield_layout_p (tree record_type
)
15374 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
15375 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
15376 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
15379 /* Returns an expression indicating where the this parameter is
15380 located on entry to the FUNCTION. */
15383 x86_this_parameter (tree function
)
15385 tree type
= TREE_TYPE (function
);
15389 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
15390 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
15393 if (ix86_function_regparm (type
, function
) > 0)
15397 parm
= TYPE_ARG_TYPES (type
);
15398 /* Figure out whether or not the function has a variable number of
15400 for (; parm
; parm
= TREE_CHAIN (parm
))
15401 if (TREE_VALUE (parm
) == void_type_node
)
15403 /* If not, the this parameter is in the first argument. */
15407 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
15409 return gen_rtx_REG (SImode
, regno
);
15413 if (aggregate_value_p (TREE_TYPE (type
), type
))
15414 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
15416 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
15419 /* Determine whether x86_output_mi_thunk can succeed. */
15422 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
15423 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
15424 HOST_WIDE_INT vcall_offset
, tree function
)
15426 /* 64-bit can handle anything. */
15430 /* For 32-bit, everything's fine if we have one free register. */
15431 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
15434 /* Need a free register for vcall_offset. */
15438 /* Need a free register for GOT references. */
15439 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
15442 /* Otherwise ok. */
15446 /* Output the assembler code for a thunk function. THUNK_DECL is the
15447 declaration for the thunk function itself, FUNCTION is the decl for
15448 the target function. DELTA is an immediate constant offset to be
15449 added to THIS. If VCALL_OFFSET is nonzero, the word at
15450 *(*this + vcall_offset) should be added to THIS. */
15453 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
15454 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
15455 HOST_WIDE_INT vcall_offset
, tree function
)
15458 rtx
this = x86_this_parameter (function
);
15461 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15462 pull it in now and let DELTA benefit. */
15465 else if (vcall_offset
)
15467 /* Put the this parameter into %eax. */
15469 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
15470 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15473 this_reg
= NULL_RTX
;
15475 /* Adjust the this parameter by a fixed constant. */
15478 xops
[0] = GEN_INT (delta
);
15479 xops
[1] = this_reg
? this_reg
: this;
15482 if (!x86_64_general_operand (xops
[0], DImode
))
15484 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15486 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
15490 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15493 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15496 /* Adjust the this parameter by a value stored in the vtable. */
15500 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15503 int tmp_regno
= 2 /* ECX */;
15504 if (lookup_attribute ("fastcall",
15505 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
15506 tmp_regno
= 0 /* EAX */;
15507 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
15510 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
15513 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15515 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15517 /* Adjust the this parameter. */
15518 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
15519 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
15521 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
15522 xops
[0] = GEN_INT (vcall_offset
);
15524 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15525 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
15527 xops
[1] = this_reg
;
15529 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15531 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15534 /* If necessary, drop THIS back to its stack slot. */
15535 if (this_reg
&& this_reg
!= this)
15537 xops
[0] = this_reg
;
15539 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15542 xops
[0] = XEXP (DECL_RTL (function
), 0);
15545 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15546 output_asm_insn ("jmp\t%P0", xops
);
15549 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
15550 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15551 tmp
= gen_rtx_MEM (QImode
, tmp
);
15553 output_asm_insn ("jmp\t%A0", xops
);
15558 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15559 output_asm_insn ("jmp\t%P0", xops
);
15564 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
15565 tmp
= (gen_rtx_SYMBOL_REF
15567 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
15568 tmp
= gen_rtx_MEM (QImode
, tmp
);
15570 output_asm_insn ("jmp\t%0", xops
);
15573 #endif /* TARGET_MACHO */
15575 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15576 output_set_got (tmp
);
15579 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15580 output_asm_insn ("jmp\t{*}%1", xops
);
15586 x86_file_start (void)
15588 default_file_start ();
15589 if (X86_FILE_START_VERSION_DIRECTIVE
)
15590 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
15591 if (X86_FILE_START_FLTUSED
)
15592 fputs ("\t.global\t__fltused\n", asm_out_file
);
15593 if (ix86_asm_dialect
== ASM_INTEL
)
15594 fputs ("\t.intel_syntax\n", asm_out_file
);
15598 x86_field_alignment (tree field
, int computed
)
15600 enum machine_mode mode
;
15601 tree type
= TREE_TYPE (field
);
15603 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
15605 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
15606 ? get_inner_array_type (type
) : type
);
15607 if (mode
== DFmode
|| mode
== DCmode
15608 || GET_MODE_CLASS (mode
) == MODE_INT
15609 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
15610 return MIN (32, computed
);
15614 /* Output assembler code to FILE to increment profiler label # LABELNO
15615 for profiling a function entry. */
15617 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
15622 #ifndef NO_PROFILE_COUNTERS
15623 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
15625 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
15629 #ifndef NO_PROFILE_COUNTERS
15630 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
15632 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15636 #ifndef NO_PROFILE_COUNTERS
15637 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15638 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
15640 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
15644 #ifndef NO_PROFILE_COUNTERS
15645 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
15646 PROFILE_COUNT_REGISTER
);
15648 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15652 /* We don't have exact information about the insn sizes, but we may assume
15653 quite safely that we are informed about all 1 byte insns and memory
15654 address sizes. This is enough to eliminate unnecessary padding in
15658 min_insn_size (rtx insn
)
15662 if (!INSN_P (insn
) || !active_insn_p (insn
))
15665 /* Discard alignments we've emit and jump instructions. */
15666 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
15667 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
15669 if (GET_CODE (insn
) == JUMP_INSN
15670 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
15671 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
15674 /* Important case - calls are always 5 bytes.
15675 It is common to have many calls in the row. */
15676 if (GET_CODE (insn
) == CALL_INSN
15677 && symbolic_reference_mentioned_p (PATTERN (insn
))
15678 && !SIBLING_CALL_P (insn
))
15680 if (get_attr_length (insn
) <= 1)
15683 /* For normal instructions we may rely on the sizes of addresses
15684 and the presence of symbol to require 4 bytes of encoding.
15685 This is not the case for jumps where references are PC relative. */
15686 if (GET_CODE (insn
) != JUMP_INSN
)
15688 l
= get_attr_length_address (insn
);
15689 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
15698 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15702 ix86_avoid_jump_misspredicts (void)
15704 rtx insn
, start
= get_insns ();
15705 int nbytes
= 0, njumps
= 0;
15708 /* Look for all minimal intervals of instructions containing 4 jumps.
15709 The intervals are bounded by START and INSN. NBYTES is the total
15710 size of instructions in the interval including INSN and not including
15711 START. When the NBYTES is smaller than 16 bytes, it is possible
15712 that the end of START and INSN ends up in the same 16byte page.
15714 The smallest offset in the page INSN can start is the case where START
15715 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15716 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15718 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15721 nbytes
+= min_insn_size (insn
);
15723 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
15724 INSN_UID (insn
), min_insn_size (insn
));
15725 if ((GET_CODE (insn
) == JUMP_INSN
15726 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
15727 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
15728 || GET_CODE (insn
) == CALL_INSN
)
15735 start
= NEXT_INSN (start
);
15736 if ((GET_CODE (start
) == JUMP_INSN
15737 && GET_CODE (PATTERN (start
)) != ADDR_VEC
15738 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
15739 || GET_CODE (start
) == CALL_INSN
)
15740 njumps
--, isjump
= 1;
15743 nbytes
-= min_insn_size (start
);
15748 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
15749 INSN_UID (start
), INSN_UID (insn
), nbytes
);
15751 if (njumps
== 3 && isjump
&& nbytes
< 16)
15753 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
15756 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
15757 INSN_UID (insn
), padsize
);
15758 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
15763 /* AMD Athlon works faster
15764 when RET is not destination of conditional jump or directly preceded
15765 by other jump instruction. We avoid the penalty by inserting NOP just
15766 before the RET instructions in such cases. */
15768 ix86_pad_returns (void)
15772 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
15774 basic_block bb
= e
->src
;
15775 rtx ret
= BB_END (bb
);
15777 bool replace
= false;
15779 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
15780 || !maybe_hot_bb_p (bb
))
15782 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
15783 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
15785 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
15788 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
15789 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
15790 && !(e
->flags
& EDGE_FALLTHRU
))
15795 prev
= prev_active_insn (ret
);
15797 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
15798 || GET_CODE (prev
) == CALL_INSN
))
15800 /* Empty functions get branch mispredict even when the jump destination
15801 is not visible to us. */
15802 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
15807 emit_insn_before (gen_return_internal_long (), ret
);
15813 /* Implement machine specific optimizations. We implement padding of returns
15814 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15818 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
15819 ix86_pad_returns ();
15820 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
15821 ix86_avoid_jump_misspredicts ();
15824 /* Return nonzero when QImode register that must be represented via REX prefix
15827 x86_extended_QIreg_mentioned_p (rtx insn
)
15830 extract_insn_cached (insn
);
15831 for (i
= 0; i
< recog_data
.n_operands
; i
++)
15832 if (REG_P (recog_data
.operand
[i
])
15833 && REGNO (recog_data
.operand
[i
]) >= 4)
15838 /* Return nonzero when P points to register encoded via REX prefix.
15839 Called via for_each_rtx. */
15841 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
15843 unsigned int regno
;
15846 regno
= REGNO (*p
);
15847 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
15850 /* Return true when INSN mentions register that must be encoded using REX
15853 x86_extended_reg_mentioned_p (rtx insn
)
15855 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
15858 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15859 optabs would emit if we didn't have TFmode patterns. */
15862 x86_emit_floatuns (rtx operands
[2])
15864 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
15865 enum machine_mode mode
, inmode
;
15867 inmode
= GET_MODE (operands
[1]);
15868 if (inmode
!= SImode
15869 && inmode
!= DImode
)
15873 in
= force_reg (inmode
, operands
[1]);
15874 mode
= GET_MODE (out
);
15875 neglab
= gen_label_rtx ();
15876 donelab
= gen_label_rtx ();
15877 i1
= gen_reg_rtx (Pmode
);
15878 f0
= gen_reg_rtx (mode
);
15880 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
15882 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
15883 emit_jump_insn (gen_jump (donelab
));
15886 emit_label (neglab
);
15888 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15889 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15890 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
15891 expand_float (f0
, i0
, 0);
15892 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
15894 emit_label (donelab
);
15897 /* Initialize vector TARGET via VALS. */
15899 ix86_expand_vector_init (rtx target
, rtx vals
)
15901 enum machine_mode mode
= GET_MODE (target
);
15902 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15903 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
15906 for (i
= n_elts
- 1; i
>= 0; i
--)
15907 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
15908 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
15911 /* Few special cases first...
15912 ... constants are best loaded from constant pool. */
15915 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15919 /* ... values where only first field is non-constant are best loaded
15920 from the pool and overwritten via move later. */
15923 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
15924 GET_MODE_INNER (mode
), 0);
15926 op
= force_reg (mode
, op
);
15927 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
15928 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15929 switch (GET_MODE (target
))
15932 emit_insn (gen_sse2_movsd (target
, target
, op
));
15935 emit_insn (gen_sse_movss (target
, target
, op
));
15943 /* And the busy sequence doing rotations. */
15944 switch (GET_MODE (target
))
15949 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
15951 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
15953 vecop0
= force_reg (V2DFmode
, vecop0
);
15954 vecop1
= force_reg (V2DFmode
, vecop1
);
15955 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
15961 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
15963 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
15965 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
15967 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
15968 rtx tmp1
= gen_reg_rtx (V4SFmode
);
15969 rtx tmp2
= gen_reg_rtx (V4SFmode
);
15971 vecop0
= force_reg (V4SFmode
, vecop0
);
15972 vecop1
= force_reg (V4SFmode
, vecop1
);
15973 vecop2
= force_reg (V4SFmode
, vecop2
);
15974 vecop3
= force_reg (V4SFmode
, vecop3
);
15975 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
15976 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
15977 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
15985 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15987 We do this in the new i386 backend to maintain source compatibility
15988 with the old cc0-based compiler. */
15991 ix86_md_asm_clobbers (tree clobbers
)
15993 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
15995 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
15997 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
16002 /* Worker function for REVERSE_CONDITION. */
16005 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
16007 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
16008 ? reverse_condition (code
)
16009 : reverse_condition_maybe_unordered (code
));
16012 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16016 output_387_reg_move (rtx insn
, rtx
*operands
)
16018 if (REG_P (operands
[1])
16019 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
16021 if (REGNO (operands
[0]) == FIRST_STACK_REG
16022 && TARGET_USE_FFREEP
)
16023 return "ffreep\t%y0";
16024 return "fstp\t%y0";
16026 if (STACK_TOP_P (operands
[0]))
16027 return "fld%z1\t%y1";
16031 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16032 FP status register is set. */
16035 ix86_emit_fp_unordered_jump (rtx label
)
16037 rtx reg
= gen_reg_rtx (HImode
);
16040 emit_insn (gen_x86_fnstsw_1 (reg
));
16042 if (TARGET_USE_SAHF
)
16044 emit_insn (gen_x86_sahf_1 (reg
));
16046 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
16047 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
16051 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
16053 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16054 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
16057 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
16058 gen_rtx_LABEL_REF (VOIDmode
, label
),
16060 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
16061 emit_jump_insn (temp
);
16064 /* Output code to perform a log1p XFmode calculation. */
16066 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
16068 rtx label1
= gen_label_rtx ();
16069 rtx label2
= gen_label_rtx ();
16071 rtx tmp
= gen_reg_rtx (XFmode
);
16072 rtx tmp2
= gen_reg_rtx (XFmode
);
16074 emit_insn (gen_absxf2 (tmp
, op1
));
16075 emit_insn (gen_cmpxf (tmp
,
16076 CONST_DOUBLE_FROM_REAL_VALUE (
16077 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
16079 emit_jump_insn (gen_bge (label1
));
16081 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
16082 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
16083 emit_jump (label2
);
16085 emit_label (label1
);
16086 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
16087 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
16088 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
16089 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
16091 emit_label (label2
);
16094 #include "gt-i386.h"