1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_3dnow_a
= m_ATHLON_K8
;
529 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
530 const int x86_branch_hints
= m_PENT4
| m_NOCONA
;
531 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
532 const int x86_partial_reg_stall
= m_PPRO
;
533 const int x86_use_loop
= m_K6
;
534 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
535 const int x86_use_mov0
= m_K6
;
536 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
537 const int x86_read_modify_write
= ~m_PENT
;
538 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
539 const int x86_split_long_moves
= m_PPRO
;
540 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
541 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
542 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
543 const int x86_qimode_math
= ~(0);
544 const int x86_promote_qi_regs
= 0;
545 const int x86_himode_math
= ~(m_PPRO
);
546 const int x86_promote_hi_regs
= m_PPRO
;
547 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
548 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
549 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
550 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
551 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
552 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
553 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
554 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
555 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
556 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
557 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
558 const int x86_shift1
= ~m_486
;
559 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
560 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs
= m_ATHLON_K8
;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
568 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
569 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
570 const int x86_use_ffreep
= m_ATHLON_K8
;
571 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
572 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
573 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
585 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
586 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
594 AREG
, DREG
, CREG
, BREG
,
596 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
598 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
599 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
604 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
606 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
608 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
609 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
610 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers
[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
633 static int const x86_64_int_return_registers
[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0
= NULL_RTX
;
719 rtx ix86_compare_op1
= NULL_RTX
;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry
GTY(())
732 struct stack_local_entry
*next
;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
749 > to_allocate <- FRAME_POINTER
761 int outgoing_arguments_size
;
764 HOST_WIDE_INT to_allocate
;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset
;
767 HOST_WIDE_INT hard_frame_pointer_offset
;
768 HOST_WIDE_INT stack_pointer_offset
;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov
;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string
;
780 enum cmodel ix86_cmodel
;
782 const char *ix86_asm_string
;
783 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
785 const char *ix86_tls_dialect_string
;
786 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath
;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune
;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch
;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string
; /* for -mtune=<xxx> */
798 const char *ix86_arch_string
; /* for -march=<xxx> */
799 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string
;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse
;
807 /* ix86_regparm_string as a number */
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string
;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string
;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string
;
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary
;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost
;
826 const char *ix86_branch_cost_string
;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string
;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix
[16];
833 static int internal_label_prefix_len
;
835 static int local_symbolic_operand (rtx
, enum machine_mode
);
836 static int tls_symbolic_operand_1 (rtx
, enum tls_model
);
837 static void output_pic_addr_const (FILE *, rtx
, int);
838 static void put_condition_code (enum rtx_code
, enum machine_mode
,
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx
*, void *);
842 static rtx
maybe_get_pool_constant (rtx
);
843 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
844 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
849 static rtx
get_thread_pointer (int);
850 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx
gen_push (rtx
);
853 static int memory_address_length (rtx addr
);
854 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
855 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
856 static struct machine_function
* ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
861 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
863 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
865 static rtx
ix86_expand_aligntest (rtx
, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx
x86_this_parameter (tree
);
872 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
873 HOST_WIDE_INT
, tree
);
874 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
878 static tree
ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
881 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
885 rtx base
, index
, disp
;
887 enum ix86_address_seg
{ SEG_DEFAULT
, SEG_FS
, SEG_GS
} seg
;
890 static int ix86_decompose_address (rtx
, struct ix86_address
*);
891 static int ix86_address_cost (rtx
);
892 static bool ix86_cannot_force_const_mem (rtx
);
893 static rtx
ix86_delegitimize_address (rtx
);
895 struct builtin_description
;
896 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
898 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
900 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
901 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
902 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
903 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
904 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
905 static enum rtx_code
ix86_fp_compare_code_to_integer (enum rtx_code
);
906 static void ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*,
907 enum rtx_code
*, enum rtx_code
*);
908 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
912 static int ix86_fp_comparison_cost (enum rtx_code code
);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame
*);
916 static int ix86_comp_type_attributes (tree
, tree
);
917 static int ix86_function_regparm (tree
, tree
);
918 const struct attribute_spec ix86_attribute_table
[];
919 static bool ix86_function_ok_for_sibcall (tree
, tree
);
920 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
921 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
922 static int ix86_value_regno (enum machine_mode
);
923 static bool contains_128bit_aligned_vector_p (tree
);
924 static rtx
ix86_struct_value_rtx (tree
, int);
925 static bool ix86_ms_bitfield_layout_p (tree
);
926 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
927 static int extended_reg_mentioned_1 (rtx
*, void *);
928 static bool ix86_rtx_costs (rtx
, int, int, int *);
929 static int min_insn_size (rtx
);
930 static tree
ix86_md_asm_clobbers (tree clobbers
);
932 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
933 static void ix86_svr3_asm_out_constructor (rtx
, int);
936 /* Register class used for passing given 64bit part of the argument.
937 These represent classes as documented by the PS ABI, with the exception
938 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
939 use SF or DFmode move instead of DImode to avoid reformatting penalties.
941 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
942 whenever possible (upper half does contain padding).
944 enum x86_64_reg_class
947 X86_64_INTEGER_CLASS
,
948 X86_64_INTEGERSI_CLASS
,
957 static const char * const x86_64_reg_class_name
[] =
958 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
960 #define MAX_CLASSES 4
961 static int classify_argument (enum machine_mode
, tree
,
962 enum x86_64_reg_class
[MAX_CLASSES
], int);
963 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
964 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
966 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
967 enum x86_64_reg_class
);
969 /* Table of constants used by fldpi, fldln2, etc.... */
970 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
971 static bool ext_80387_constants_init
= 0;
972 static void init_ext_80387_constants (void);
974 /* Initialize the GCC target structure. */
975 #undef TARGET_ATTRIBUTE_TABLE
976 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
977 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
978 # undef TARGET_MERGE_DECL_ATTRIBUTES
979 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
982 #undef TARGET_COMP_TYPE_ATTRIBUTES
983 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
985 #undef TARGET_INIT_BUILTINS
986 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1020 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
1021 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1022 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1023 ia32_multipass_dfa_lookahead
1025 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1026 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1029 #undef TARGET_HAVE_TLS
1030 #define TARGET_HAVE_TLS true
1032 #undef TARGET_CANNOT_FORCE_CONST_MEM
1033 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1035 #undef TARGET_DELEGITIMIZE_ADDRESS
1036 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1038 #undef TARGET_MS_BITFIELD_LAYOUT_P
1039 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1041 #undef TARGET_ASM_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1046 #undef TARGET_ASM_FILE_START
1047 #define TARGET_ASM_FILE_START x86_file_start
1049 #undef TARGET_RTX_COSTS
1050 #define TARGET_RTX_COSTS ix86_rtx_costs
1051 #undef TARGET_ADDRESS_COST
1052 #define TARGET_ADDRESS_COST ix86_address_cost
1054 #undef TARGET_FIXED_CONDITION_CODE_REGS
1055 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1056 #undef TARGET_CC_MODES_COMPATIBLE
1057 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1059 #undef TARGET_MACHINE_DEPENDENT_REORG
1060 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1062 #undef TARGET_BUILD_BUILTIN_VA_LIST
1063 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1065 #undef TARGET_MD_ASM_CLOBBERS
1066 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1068 #undef TARGET_PROMOTE_PROTOTYPES
1069 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1070 #undef TARGET_STRUCT_VALUE_RTX
1071 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1072 #undef TARGET_SETUP_INCOMING_VARARGS
1073 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078 struct gcc_target targetm
= TARGET_INITIALIZER
;
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1097 override_options (void)
1100 /* Comes from final.c -- no real reason to change it. */
1101 #define MAX_CODE_ALIGN 16
1105 const struct processor_costs
*cost
; /* Processor costs */
1106 const int target_enable
; /* Target flags to enable. */
1107 const int target_disable
; /* Target flags to disable. */
1108 const int align_loop
; /* Default alignments. */
1109 const int align_loop_max_skip
;
1110 const int align_jump
;
1111 const int align_jump_max_skip
;
1112 const int align_func
;
1114 const processor_target_table
[PROCESSOR_max
] =
1116 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1117 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1118 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1120 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1121 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1122 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1123 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1124 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1127 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1130 const char *const name
; /* processor name or nickname. */
1131 const enum processor_type processor
;
1132 const enum pta_flags
1138 PTA_PREFETCH_SSE
= 16,
1144 const processor_alias_table
[] =
1146 {"i386", PROCESSOR_I386
, 0},
1147 {"i486", PROCESSOR_I486
, 0},
1148 {"i586", PROCESSOR_PENTIUM
, 0},
1149 {"pentium", PROCESSOR_PENTIUM
, 0},
1150 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1151 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1152 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1153 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1154 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1155 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1156 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1157 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1158 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1159 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1160 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1161 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1162 | PTA_MMX
| PTA_PREFETCH_SSE
},
1163 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1164 | PTA_MMX
| PTA_PREFETCH_SSE
},
1165 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1166 | PTA_MMX
| PTA_PREFETCH_SSE
},
1167 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1168 | PTA_MMX
| PTA_PREFETCH_SSE
},
1169 {"k6", PROCESSOR_K6
, PTA_MMX
},
1170 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1171 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1172 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1174 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1175 | PTA_3DNOW
| PTA_3DNOW_A
},
1176 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1177 | PTA_3DNOW_A
| PTA_SSE
},
1178 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1179 | PTA_3DNOW_A
| PTA_SSE
},
1180 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1181 | PTA_3DNOW_A
| PTA_SSE
},
1182 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1183 | PTA_SSE
| PTA_SSE2
},
1184 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1185 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1186 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1187 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1188 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1189 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1190 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1191 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1194 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1196 /* Set the default values for switches whose default depends on TARGET_64BIT
1197 in case they weren't overwritten by command line options. */
1200 if (flag_omit_frame_pointer
== 2)
1201 flag_omit_frame_pointer
= 1;
1202 if (flag_asynchronous_unwind_tables
== 2)
1203 flag_asynchronous_unwind_tables
= 1;
1204 if (flag_pcc_struct_return
== 2)
1205 flag_pcc_struct_return
= 0;
1209 if (flag_omit_frame_pointer
== 2)
1210 flag_omit_frame_pointer
= 0;
1211 if (flag_asynchronous_unwind_tables
== 2)
1212 flag_asynchronous_unwind_tables
= 0;
1213 if (flag_pcc_struct_return
== 2)
1214 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1217 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1218 SUBTARGET_OVERRIDE_OPTIONS
;
1221 if (!ix86_tune_string
&& ix86_arch_string
)
1222 ix86_tune_string
= ix86_arch_string
;
1223 if (!ix86_tune_string
)
1224 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1225 if (!ix86_arch_string
)
1226 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1228 if (ix86_cmodel_string
!= 0)
1230 if (!strcmp (ix86_cmodel_string
, "small"))
1231 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1234 else if (!strcmp (ix86_cmodel_string
, "32"))
1235 ix86_cmodel
= CM_32
;
1236 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1237 ix86_cmodel
= CM_KERNEL
;
1238 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1239 ix86_cmodel
= CM_MEDIUM
;
1240 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1241 ix86_cmodel
= CM_LARGE
;
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1247 ix86_cmodel
= CM_32
;
1249 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1251 if (ix86_asm_string
!= 0)
1253 if (!strcmp (ix86_asm_string
, "intel"))
1254 ix86_asm_dialect
= ASM_INTEL
;
1255 else if (!strcmp (ix86_asm_string
, "att"))
1256 ix86_asm_dialect
= ASM_ATT
;
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1260 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1263 if (ix86_cmodel
== CM_LARGE
)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags
& MASK_64BIT
) ? 64 : 32);
1269 for (i
= 0; i
< pta_size
; i
++)
1270 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1272 ix86_arch
= processor_alias_table
[i
].processor
;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune
= ix86_arch
;
1275 if (processor_alias_table
[i
].flags
& PTA_MMX
1276 && !(target_flags_explicit
& MASK_MMX
))
1277 target_flags
|= MASK_MMX
;
1278 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1279 && !(target_flags_explicit
& MASK_3DNOW
))
1280 target_flags
|= MASK_3DNOW
;
1281 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1282 && !(target_flags_explicit
& MASK_3DNOW_A
))
1283 target_flags
|= MASK_3DNOW_A
;
1284 if (processor_alias_table
[i
].flags
& PTA_SSE
1285 && !(target_flags_explicit
& MASK_SSE
))
1286 target_flags
|= MASK_SSE
;
1287 if (processor_alias_table
[i
].flags
& PTA_SSE2
1288 && !(target_flags_explicit
& MASK_SSE2
))
1289 target_flags
|= MASK_SSE2
;
1290 if (processor_alias_table
[i
].flags
& PTA_SSE3
1291 && !(target_flags_explicit
& MASK_SSE3
))
1292 target_flags
|= MASK_SSE3
;
1293 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1294 x86_prefetch_sse
= true;
1295 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1296 error ("CPU you selected does not support x86-64 instruction set");
1301 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1303 for (i
= 0; i
< pta_size
; i
++)
1304 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1306 ix86_tune
= processor_alias_table
[i
].processor
;
1307 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1308 error ("CPU you selected does not support x86-64 instruction set");
1310 /* Intel CPUs have always interpreted SSE prefetch instructions as
1311 NOPs; so, we can enable SSE prefetch instructions even when
1312 -mtune (rather than -march) points us to a processor that has them.
1313 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1314 higher processors. */
1315 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1316 x86_prefetch_sse
= true;
1320 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1323 ix86_cost
= &size_cost
;
1325 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1326 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1327 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1329 /* Arrange to set up i386_stack_locals for all functions. */
1330 init_machine_status
= ix86_init_machine_status
;
1332 /* Validate -mregparm= value. */
1333 if (ix86_regparm_string
)
1335 i
= atoi (ix86_regparm_string
);
1336 if (i
< 0 || i
> REGPARM_MAX
)
1337 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1343 ix86_regparm
= REGPARM_MAX
;
1345 /* If the user has provided any of the -malign-* options,
1346 warn and use that value only if -falign-* is not set.
1347 Remove this code in GCC 3.2 or later. */
1348 if (ix86_align_loops_string
)
1350 warning ("-malign-loops is obsolete, use -falign-loops");
1351 if (align_loops
== 0)
1353 i
= atoi (ix86_align_loops_string
);
1354 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1355 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1357 align_loops
= 1 << i
;
1361 if (ix86_align_jumps_string
)
1363 warning ("-malign-jumps is obsolete, use -falign-jumps");
1364 if (align_jumps
== 0)
1366 i
= atoi (ix86_align_jumps_string
);
1367 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1368 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1370 align_jumps
= 1 << i
;
1374 if (ix86_align_funcs_string
)
1376 warning ("-malign-functions is obsolete, use -falign-functions");
1377 if (align_functions
== 0)
1379 i
= atoi (ix86_align_funcs_string
);
1380 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1381 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1383 align_functions
= 1 << i
;
1387 /* Default align_* from the processor table. */
1388 if (align_loops
== 0)
1390 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1391 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1393 if (align_jumps
== 0)
1395 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1396 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1398 if (align_functions
== 0)
1400 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1403 /* Validate -mpreferred-stack-boundary= value, or provide default.
1404 The default of 128 bits is for Pentium III's SSE __m128, but we
1405 don't want additional code to keep the stack aligned when
1406 optimizing for code size. */
1407 ix86_preferred_stack_boundary
= (optimize_size
1408 ? TARGET_64BIT
? 128 : 32
1410 if (ix86_preferred_stack_boundary_string
)
1412 i
= atoi (ix86_preferred_stack_boundary_string
);
1413 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1414 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1415 TARGET_64BIT
? 4 : 2);
1417 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1420 /* Validate -mbranch-cost= value, or provide default. */
1421 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1422 if (ix86_branch_cost_string
)
1424 i
= atoi (ix86_branch_cost_string
);
1426 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1428 ix86_branch_cost
= i
;
1431 if (ix86_tls_dialect_string
)
1433 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1434 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1435 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1436 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1438 error ("bad value (%s) for -mtls-dialect= switch",
1439 ix86_tls_dialect_string
);
1442 /* Keep nonleaf frame pointers. */
1443 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1444 flag_omit_frame_pointer
= 1;
1446 /* If we're doing fast math, we don't care about comparison order
1447 wrt NaNs. This lets us use a shorter comparison sequence. */
1448 if (flag_unsafe_math_optimizations
)
1449 target_flags
&= ~MASK_IEEE_FP
;
1451 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1452 since the insns won't need emulation. */
1453 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1454 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1456 /* Turn on SSE2 builtins for -msse3. */
1458 target_flags
|= MASK_SSE2
;
1460 /* Turn on SSE builtins for -msse2. */
1462 target_flags
|= MASK_SSE
;
1466 if (TARGET_ALIGN_DOUBLE
)
1467 error ("-malign-double makes no sense in the 64bit mode");
1469 error ("-mrtd calling convention not supported in the 64bit mode");
1470 /* Enable by default the SSE and MMX builtins. */
1471 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1472 ix86_fpmath
= FPMATH_SSE
;
1476 ix86_fpmath
= FPMATH_387
;
1477 /* i386 ABI does not specify red zone. It still makes sense to use it
1478 when programmer takes care to stack from being destroyed. */
1479 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1480 target_flags
|= MASK_NO_RED_ZONE
;
1483 if (ix86_fpmath_string
!= 0)
1485 if (! strcmp (ix86_fpmath_string
, "387"))
1486 ix86_fpmath
= FPMATH_387
;
1487 else if (! strcmp (ix86_fpmath_string
, "sse"))
1491 warning ("SSE instruction set disabled, using 387 arithmetics");
1492 ix86_fpmath
= FPMATH_387
;
1495 ix86_fpmath
= FPMATH_SSE
;
1497 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1498 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1502 warning ("SSE instruction set disabled, using 387 arithmetics");
1503 ix86_fpmath
= FPMATH_387
;
1505 else if (!TARGET_80387
)
1507 warning ("387 instruction set disabled, using SSE arithmetics");
1508 ix86_fpmath
= FPMATH_SSE
;
1511 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1514 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1517 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1521 target_flags
|= MASK_MMX
;
1522 x86_prefetch_sse
= true;
1525 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1528 target_flags
|= MASK_MMX
;
1529 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1530 extensions it adds. */
1531 if (x86_3dnow_a
& (1 << ix86_arch
))
1532 target_flags
|= MASK_3DNOW_A
;
1534 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1535 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1537 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1539 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1542 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1543 p
= strchr (internal_label_prefix
, 'X');
1544 internal_label_prefix_len
= p
- internal_label_prefix
;
1550 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1552 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1553 make the problem with not enough registers even worse. */
1554 #ifdef INSN_SCHEDULING
1556 flag_schedule_insns
= 0;
1559 /* The default values of these switches depend on the TARGET_64BIT
1560 that is not known at this moment. Mark these values with 2 and
1561 let user the to override these. In case there is no command line option
1562 specifying them, we will set the defaults in override_options. */
1564 flag_omit_frame_pointer
= 2;
1565 flag_pcc_struct_return
= 2;
1566 flag_asynchronous_unwind_tables
= 2;
1569 /* Table of valid machine attributes. */
1570 const struct attribute_spec ix86_attribute_table
[] =
1572 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1573 /* Stdcall attribute says callee is responsible for popping arguments
1574 if they are not variable. */
1575 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1576 /* Fastcall attribute says callee is responsible for popping arguments
1577 if they are not variable. */
1578 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1579 /* Cdecl attribute says the callee is a normal C declaration */
1580 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1581 /* Regparm attribute specifies how many integer arguments are to be
1582 passed in registers. */
1583 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1584 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1585 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1586 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1587 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1589 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1590 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1591 { NULL
, 0, 0, false, false, false, NULL
}
1594 /* Decide whether we can make a sibling call to a function. DECL is the
1595 declaration of the function being targeted by the call and EXP is the
1596 CALL_EXPR representing the call. */
1599 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1601 /* If we are generating position-independent code, we cannot sibcall
1602 optimize any indirect call, or a direct call to a global function,
1603 as the PLT requires %ebx be live. */
1604 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1607 /* If we are returning floats on the 80387 register stack, we cannot
1608 make a sibcall from a function that doesn't return a float to a
1609 function that does or, conversely, from a function that does return
1610 a float to a function that doesn't; the necessary stack adjustment
1611 would not be executed. */
1612 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1613 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1616 /* If this call is indirect, we'll need to be able to use a call-clobbered
1617 register for the address of the target function. Make sure that all
1618 such registers are not used for passing parameters. */
1619 if (!decl
&& !TARGET_64BIT
)
1623 /* We're looking at the CALL_EXPR, we need the type of the function. */
1624 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1625 type
= TREE_TYPE (type
); /* pointer type */
1626 type
= TREE_TYPE (type
); /* function type */
1628 if (ix86_function_regparm (type
, NULL
) >= 3)
1630 /* ??? Need to count the actual number of registers to be used,
1631 not the possible number of registers. Fix later. */
1636 /* Otherwise okay. That also includes certain types of indirect calls. */
1640 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1641 arguments as in struct attribute_spec.handler. */
1643 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1644 tree args ATTRIBUTE_UNUSED
,
1645 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1647 if (TREE_CODE (*node
) != FUNCTION_TYPE
1648 && TREE_CODE (*node
) != METHOD_TYPE
1649 && TREE_CODE (*node
) != FIELD_DECL
1650 && TREE_CODE (*node
) != TYPE_DECL
)
1652 warning ("`%s' attribute only applies to functions",
1653 IDENTIFIER_POINTER (name
));
1654 *no_add_attrs
= true;
1658 if (is_attribute_p ("fastcall", name
))
1660 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1662 error ("fastcall and stdcall attributes are not compatible");
1664 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1666 error ("fastcall and regparm attributes are not compatible");
1669 else if (is_attribute_p ("stdcall", name
))
1671 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1673 error ("fastcall and stdcall attributes are not compatible");
1680 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1681 *no_add_attrs
= true;
1687 /* Handle a "regparm" attribute;
1688 arguments as in struct attribute_spec.handler. */
1690 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1691 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1693 if (TREE_CODE (*node
) != FUNCTION_TYPE
1694 && TREE_CODE (*node
) != METHOD_TYPE
1695 && TREE_CODE (*node
) != FIELD_DECL
1696 && TREE_CODE (*node
) != TYPE_DECL
)
1698 warning ("`%s' attribute only applies to functions",
1699 IDENTIFIER_POINTER (name
));
1700 *no_add_attrs
= true;
1706 cst
= TREE_VALUE (args
);
1707 if (TREE_CODE (cst
) != INTEGER_CST
)
1709 warning ("`%s' attribute requires an integer constant argument",
1710 IDENTIFIER_POINTER (name
));
1711 *no_add_attrs
= true;
1713 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1715 warning ("argument to `%s' attribute larger than %d",
1716 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1717 *no_add_attrs
= true;
1720 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1722 error ("fastcall and regparm attributes are not compatible");
1729 /* Return 0 if the attributes for two types are incompatible, 1 if they
1730 are compatible, and 2 if they are nearly compatible (which causes a
1731 warning to be generated). */
1734 ix86_comp_type_attributes (tree type1
, tree type2
)
1736 /* Check for mismatch of non-default calling convention. */
1737 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1739 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1742 /* Check for mismatched fastcall types */
1743 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1744 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1747 /* Check for mismatched return types (cdecl vs stdcall). */
1748 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1749 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1751 if (ix86_function_regparm (type1
, NULL
)
1752 != ix86_function_regparm (type2
, NULL
))
1757 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1758 DECL may be NULL when calling function indirectly
1759 or considering a libcall. */
1762 ix86_function_regparm (tree type
, tree decl
)
1765 int regparm
= ix86_regparm
;
1766 bool user_convention
= false;
1770 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1773 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1774 user_convention
= true;
1777 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1780 user_convention
= true;
1783 /* Use register calling convention for local functions when possible. */
1784 if (!TARGET_64BIT
&& !user_convention
&& decl
1785 && flag_unit_at_a_time
&& !profile_flag
)
1787 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1790 /* We can't use regparm(3) for nested functions as these use
1791 static chain pointer in third argument. */
1792 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1802 /* Return true if EAX is live at the start of the function. Used by
1803 ix86_expand_prologue to determine if we need special help before
1804 calling allocate_stack_worker. */
1807 ix86_eax_live_at_start_p (void)
1809 /* Cheat. Don't bother working forward from ix86_function_regparm
1810 to the function type to whether an actual argument is located in
1811 eax. Instead just look at cfg info, which is still close enough
1812 to correct at this point. This gives false positives for broken
1813 functions that might use uninitialized data that happens to be
1814 allocated in eax, but who cares? */
1815 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1818 /* Value is the number of bytes of arguments automatically
1819 popped when returning from a subroutine call.
1820 FUNDECL is the declaration node of the function (as a tree),
1821 FUNTYPE is the data type of the function (as a tree),
1822 or for a library call it is an identifier node for the subroutine name.
1823 SIZE is the number of bytes of arguments passed on the stack.
1825 On the 80386, the RTD insn may be used to pop them if the number
1826 of args is fixed, but if the number is variable then the caller
1827 must pop them all. RTD can't be used for library calls now
1828 because the library is compiled with the Unix compiler.
1829 Use of RTD is a selectable option, since it is incompatible with
1830 standard Unix calling sequences. If the option is not selected,
1831 the caller must always pop the args.
1833 The attribute stdcall is equivalent to RTD on a per module basis. */
1836 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1838 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1840 /* Cdecl functions override -mrtd, and never pop the stack. */
1841 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1843 /* Stdcall and fastcall functions will pop the stack if not
1845 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1846 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1850 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1851 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1852 == void_type_node
)))
1856 /* Lose any fake structure return argument if it is passed on the stack. */
1857 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1860 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1863 return GET_MODE_SIZE (Pmode
);
1869 /* Argument support functions. */
1871 /* Return true when register may be used to pass function parameters. */
1873 ix86_function_arg_regno_p (int regno
)
1877 return (regno
< REGPARM_MAX
1878 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1879 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1881 /* RAX is used as hidden argument to va_arg functions. */
1884 for (i
= 0; i
< REGPARM_MAX
; i
++)
1885 if (regno
== x86_64_int_parameter_registers
[i
])
1890 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1891 for a call to a function whose data type is FNTYPE.
1892 For a library call, FNTYPE is 0. */
1895 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1896 tree fntype
, /* tree ptr for function decl */
1897 rtx libname
, /* SYMBOL_REF of library name or 0 */
1900 static CUMULATIVE_ARGS zero_cum
;
1901 tree param
, next_param
;
1903 if (TARGET_DEBUG_ARG
)
1905 fprintf (stderr
, "\ninit_cumulative_args (");
1907 fprintf (stderr
, "fntype code = %s, ret code = %s",
1908 tree_code_name
[(int) TREE_CODE (fntype
)],
1909 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1911 fprintf (stderr
, "no fntype");
1914 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1919 /* Set up the number of registers to use for passing arguments. */
1921 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1923 cum
->nregs
= ix86_regparm
;
1924 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1925 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1926 cum
->warn_sse
= true;
1927 cum
->warn_mmx
= true;
1928 cum
->maybe_vaarg
= false;
1930 /* Use ecx and edx registers if function has fastcall attribute */
1931 if (fntype
&& !TARGET_64BIT
)
1933 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1941 /* Determine if this function has variable arguments. This is
1942 indicated by the last argument being 'void_type_mode' if there
1943 are no variable arguments. If there are variable arguments, then
1944 we won't pass anything in registers */
1946 if (cum
->nregs
|| !TARGET_MMX
|| !TARGET_SSE
)
1948 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1949 param
!= 0; param
= next_param
)
1951 next_param
= TREE_CHAIN (param
);
1952 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1963 cum
->maybe_vaarg
= true;
1967 if ((!fntype
&& !libname
)
1968 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1969 cum
->maybe_vaarg
= 1;
1971 if (TARGET_DEBUG_ARG
)
1972 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1977 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1978 of this code is to classify each 8bytes of incoming argument by the register
1979 class and assign registers accordingly. */
1981 /* Return the union class of CLASS1 and CLASS2.
1982 See the x86-64 PS ABI for details. */
1984 static enum x86_64_reg_class
1985 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
1987 /* Rule #1: If both classes are equal, this is the resulting class. */
1988 if (class1
== class2
)
1991 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1993 if (class1
== X86_64_NO_CLASS
)
1995 if (class2
== X86_64_NO_CLASS
)
1998 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1999 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2000 return X86_64_MEMORY_CLASS
;
2002 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2003 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2004 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2005 return X86_64_INTEGERSI_CLASS
;
2006 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2007 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2008 return X86_64_INTEGER_CLASS
;
2010 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2011 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
2012 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
2013 return X86_64_MEMORY_CLASS
;
2015 /* Rule #6: Otherwise class SSE is used. */
2016 return X86_64_SSE_CLASS
;
2019 /* Classify the argument of type TYPE and mode MODE.
2020 CLASSES will be filled by the register class used to pass each word
2021 of the operand. The number of words is returned. In case the parameter
2022 should be passed in memory, 0 is returned. As a special case for zero
2023 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2025 BIT_OFFSET is used internally for handling records and specifies offset
2026 of the offset in bits modulo 256 to avoid overflow cases.
2028 See the x86-64 PS ABI for details.
2032 classify_argument (enum machine_mode mode
, tree type
,
2033 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2035 HOST_WIDE_INT bytes
=
2036 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2037 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2039 /* Variable sized entities are always passed/returned in memory. */
2043 if (mode
!= VOIDmode
2044 && MUST_PASS_IN_STACK (mode
, type
))
2047 if (type
&& AGGREGATE_TYPE_P (type
))
2051 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2053 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2057 for (i
= 0; i
< words
; i
++)
2058 classes
[i
] = X86_64_NO_CLASS
;
2060 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2061 signalize memory class, so handle it as special case. */
2064 classes
[0] = X86_64_NO_CLASS
;
2068 /* Classify each field of record and merge classes. */
2069 if (TREE_CODE (type
) == RECORD_TYPE
)
2071 /* For classes first merge in the field of the subclasses. */
2072 if (TYPE_BINFO (type
) && BINFO_BASE_BINFOS (TYPE_BINFO (type
)))
2074 tree bases
= BINFO_BASE_BINFOS (TYPE_BINFO (type
));
2075 int n_bases
= BINFO_N_BASE_BINFOS (TYPE_BINFO (type
));
2078 for (i
= 0; i
< n_bases
; ++i
)
2080 tree binfo
= TREE_VEC_ELT (bases
, i
);
2082 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2083 tree type
= BINFO_TYPE (binfo
);
2085 num
= classify_argument (TYPE_MODE (type
),
2087 (offset
+ bit_offset
) % 256);
2090 for (i
= 0; i
< num
; i
++)
2092 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2094 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2098 /* And now merge the fields of structure. */
2099 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2101 if (TREE_CODE (field
) == FIELD_DECL
)
2105 /* Bitfields are always classified as integer. Handle them
2106 early, since later code would consider them to be
2107 misaligned integers. */
2108 if (DECL_BIT_FIELD (field
))
2110 for (i
= int_bit_position (field
) / 8 / 8;
2111 i
< (int_bit_position (field
)
2112 + tree_low_cst (DECL_SIZE (field
), 0)
2115 merge_classes (X86_64_INTEGER_CLASS
,
2120 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2121 TREE_TYPE (field
), subclasses
,
2122 (int_bit_position (field
)
2123 + bit_offset
) % 256);
2126 for (i
= 0; i
< num
; i
++)
2129 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2131 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2137 /* Arrays are handled as small records. */
2138 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2141 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2142 TREE_TYPE (type
), subclasses
, bit_offset
);
2146 /* The partial classes are now full classes. */
2147 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2148 subclasses
[0] = X86_64_SSE_CLASS
;
2149 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2150 subclasses
[0] = X86_64_INTEGER_CLASS
;
2152 for (i
= 0; i
< words
; i
++)
2153 classes
[i
] = subclasses
[i
% num
];
2155 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2156 else if (TREE_CODE (type
) == UNION_TYPE
2157 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2159 /* For classes first merge in the field of the subclasses. */
2160 if (TYPE_BINFO (type
) && BINFO_BASE_BINFOS (TYPE_BINFO (type
)))
2162 tree bases
= BINFO_BASE_BINFOS (TYPE_BINFO (type
));
2163 int n_bases
= BINFO_N_BASE_BINFOS (TYPE_BINFO (type
));
2166 for (i
= 0; i
< n_bases
; ++i
)
2168 tree binfo
= TREE_VEC_ELT (bases
, i
);
2170 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2171 tree type
= BINFO_TYPE (binfo
);
2173 num
= classify_argument (TYPE_MODE (type
),
2175 (offset
+ (bit_offset
% 64)) % 256);
2178 for (i
= 0; i
< num
; i
++)
2180 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2182 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2186 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2188 if (TREE_CODE (field
) == FIELD_DECL
)
2191 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2192 TREE_TYPE (field
), subclasses
,
2196 for (i
= 0; i
< num
; i
++)
2197 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2201 else if (TREE_CODE (type
) == SET_TYPE
)
2205 classes
[0] = X86_64_INTEGERSI_CLASS
;
2208 else if (bytes
<= 8)
2210 classes
[0] = X86_64_INTEGER_CLASS
;
2213 else if (bytes
<= 12)
2215 classes
[0] = X86_64_INTEGER_CLASS
;
2216 classes
[1] = X86_64_INTEGERSI_CLASS
;
2221 classes
[0] = X86_64_INTEGER_CLASS
;
2222 classes
[1] = X86_64_INTEGER_CLASS
;
2229 /* Final merger cleanup. */
2230 for (i
= 0; i
< words
; i
++)
2232 /* If one class is MEMORY, everything should be passed in
2234 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2237 /* The X86_64_SSEUP_CLASS should be always preceded by
2238 X86_64_SSE_CLASS. */
2239 if (classes
[i
] == X86_64_SSEUP_CLASS
2240 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2241 classes
[i
] = X86_64_SSE_CLASS
;
2243 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2244 if (classes
[i
] == X86_64_X87UP_CLASS
2245 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2246 classes
[i
] = X86_64_SSE_CLASS
;
2251 /* Compute alignment needed. We align all types to natural boundaries with
2252 exception of XFmode that is aligned to 64bits. */
2253 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2255 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2258 mode_alignment
= 128;
2259 else if (mode
== XCmode
)
2260 mode_alignment
= 256;
2261 if (COMPLEX_MODE_P (mode
))
2262 mode_alignment
/= 2;
2263 /* Misaligned fields are always returned in memory. */
2264 if (bit_offset
% mode_alignment
)
2268 /* for V1xx modes, just use the base mode */
2269 if (VECTOR_MODE_P (mode
)
2270 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2271 mode
= GET_MODE_INNER (mode
);
2273 /* Classification of atomic types. */
2283 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2284 classes
[0] = X86_64_INTEGERSI_CLASS
;
2286 classes
[0] = X86_64_INTEGER_CLASS
;
2290 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2295 if (!(bit_offset
% 64))
2296 classes
[0] = X86_64_SSESF_CLASS
;
2298 classes
[0] = X86_64_SSE_CLASS
;
2301 classes
[0] = X86_64_SSEDF_CLASS
;
2304 classes
[0] = X86_64_X87_CLASS
;
2305 classes
[1] = X86_64_X87UP_CLASS
;
2308 classes
[0] = X86_64_SSE_CLASS
;
2309 classes
[1] = X86_64_SSEUP_CLASS
;
2312 classes
[0] = X86_64_SSE_CLASS
;
2315 classes
[0] = X86_64_SSEDF_CLASS
;
2316 classes
[1] = X86_64_SSEDF_CLASS
;
2320 /* These modes are larger than 16 bytes. */
2328 classes
[0] = X86_64_SSE_CLASS
;
2329 classes
[1] = X86_64_SSEUP_CLASS
;
2335 classes
[0] = X86_64_SSE_CLASS
;
2341 if (VECTOR_MODE_P (mode
))
2345 if (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
)
2347 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2348 classes
[0] = X86_64_INTEGERSI_CLASS
;
2350 classes
[0] = X86_64_INTEGER_CLASS
;
2351 classes
[1] = X86_64_INTEGER_CLASS
;
2352 return 1 + (bytes
> 8);
2359 /* Examine the argument and return set number of register required in each
2360 class. Return 0 iff parameter should be passed in memory. */
2362 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2363 int *int_nregs
, int *sse_nregs
)
2365 enum x86_64_reg_class
class[MAX_CLASSES
];
2366 int n
= classify_argument (mode
, type
, class, 0);
2372 for (n
--; n
>= 0; n
--)
2375 case X86_64_INTEGER_CLASS
:
2376 case X86_64_INTEGERSI_CLASS
:
2379 case X86_64_SSE_CLASS
:
2380 case X86_64_SSESF_CLASS
:
2381 case X86_64_SSEDF_CLASS
:
2384 case X86_64_NO_CLASS
:
2385 case X86_64_SSEUP_CLASS
:
2387 case X86_64_X87_CLASS
:
2388 case X86_64_X87UP_CLASS
:
2392 case X86_64_MEMORY_CLASS
:
2397 /* Construct container for the argument used by GCC interface. See
2398 FUNCTION_ARG for the detailed description. */
2400 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2401 int nintregs
, int nsseregs
, const int * intreg
,
2404 enum machine_mode tmpmode
;
2406 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2407 enum x86_64_reg_class
class[MAX_CLASSES
];
2411 int needed_sseregs
, needed_intregs
;
2412 rtx exp
[MAX_CLASSES
];
2415 n
= classify_argument (mode
, type
, class, 0);
2416 if (TARGET_DEBUG_ARG
)
2419 fprintf (stderr
, "Memory class\n");
2422 fprintf (stderr
, "Classes:");
2423 for (i
= 0; i
< n
; i
++)
2425 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2427 fprintf (stderr
, "\n");
2432 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2434 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2437 /* First construct simple cases. Avoid SCmode, since we want to use
2438 single register to pass this type. */
2439 if (n
== 1 && mode
!= SCmode
)
2442 case X86_64_INTEGER_CLASS
:
2443 case X86_64_INTEGERSI_CLASS
:
2444 return gen_rtx_REG (mode
, intreg
[0]);
2445 case X86_64_SSE_CLASS
:
2446 case X86_64_SSESF_CLASS
:
2447 case X86_64_SSEDF_CLASS
:
2448 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2449 case X86_64_X87_CLASS
:
2450 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2451 case X86_64_NO_CLASS
:
2452 /* Zero sized array, struct or class. */
2457 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2459 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2461 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2462 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2463 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2464 && class[1] == X86_64_INTEGER_CLASS
2465 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2466 && intreg
[0] + 1 == intreg
[1])
2467 return gen_rtx_REG (mode
, intreg
[0]);
2469 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2470 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
2472 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2474 /* Otherwise figure out the entries of the PARALLEL. */
2475 for (i
= 0; i
< n
; i
++)
2479 case X86_64_NO_CLASS
:
2481 case X86_64_INTEGER_CLASS
:
2482 case X86_64_INTEGERSI_CLASS
:
2483 /* Merge TImodes on aligned occasions here too. */
2484 if (i
* 8 + 8 > bytes
)
2485 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2486 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2490 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2491 if (tmpmode
== BLKmode
)
2493 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2494 gen_rtx_REG (tmpmode
, *intreg
),
2498 case X86_64_SSESF_CLASS
:
2499 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2500 gen_rtx_REG (SFmode
,
2501 SSE_REGNO (sse_regno
)),
2505 case X86_64_SSEDF_CLASS
:
2506 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2507 gen_rtx_REG (DFmode
,
2508 SSE_REGNO (sse_regno
)),
2512 case X86_64_SSE_CLASS
:
2513 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2517 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2518 gen_rtx_REG (tmpmode
,
2519 SSE_REGNO (sse_regno
)),
2521 if (tmpmode
== TImode
)
2529 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2530 for (i
= 0; i
< nexps
; i
++)
2531 XVECEXP (ret
, 0, i
) = exp
[i
];
2535 /* Update the data in CUM to advance over an argument
2536 of mode MODE and data type TYPE.
2537 (TYPE is null for libcalls where that information may not be available.) */
2540 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2541 enum machine_mode mode
, /* current arg mode */
2542 tree type
, /* type of the argument or 0 if lib support */
2543 int named
) /* whether or not the argument was named */
2546 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2547 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2549 if (TARGET_DEBUG_ARG
)
2551 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2552 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2555 int int_nregs
, sse_nregs
;
2556 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2557 cum
->words
+= words
;
2558 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2560 cum
->nregs
-= int_nregs
;
2561 cum
->sse_nregs
-= sse_nregs
;
2562 cum
->regno
+= int_nregs
;
2563 cum
->sse_regno
+= sse_nregs
;
2566 cum
->words
+= words
;
2570 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2571 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2573 cum
->sse_words
+= words
;
2574 cum
->sse_nregs
-= 1;
2575 cum
->sse_regno
+= 1;
2576 if (cum
->sse_nregs
<= 0)
2582 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2583 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2585 cum
->mmx_words
+= words
;
2586 cum
->mmx_nregs
-= 1;
2587 cum
->mmx_regno
+= 1;
2588 if (cum
->mmx_nregs
<= 0)
2596 cum
->words
+= words
;
2597 cum
->nregs
-= words
;
2598 cum
->regno
+= words
;
2600 if (cum
->nregs
<= 0)
2610 /* Define where to put the arguments to a function.
2611 Value is zero to push the argument on the stack,
2612 or a hard register in which to store the argument.
2614 MODE is the argument's machine mode.
2615 TYPE is the data type of the argument (as a tree).
2616 This is null for libcalls where that information may
2618 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2619 the preceding args and about the function being called.
2620 NAMED is nonzero if this argument is a named parameter
2621 (otherwise it is an extra parameter matching an ellipsis). */
2624 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2625 enum machine_mode mode
, /* current arg mode */
2626 tree type
, /* type of the argument or 0 if lib support */
2627 int named
) /* != 0 for normal args, == 0 for ... args */
2631 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2632 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2633 static bool warnedsse
, warnedmmx
;
2635 /* Handle a hidden AL argument containing number of registers for varargs
2636 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2638 if (mode
== VOIDmode
)
2641 return GEN_INT (cum
->maybe_vaarg
2642 ? (cum
->sse_nregs
< 0
2650 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2651 &x86_64_int_parameter_registers
[cum
->regno
],
2656 /* For now, pass fp/complex values on the stack. */
2668 if (words
<= cum
->nregs
)
2670 int regno
= cum
->regno
;
2672 /* Fastcall allocates the first two DWORD (SImode) or
2673 smaller arguments to ECX and EDX. */
2676 if (mode
== BLKmode
|| mode
== DImode
)
2679 /* ECX not EAX is the first allocated register. */
2683 ret
= gen_rtx_REG (mode
, regno
);
2693 if (!type
|| !AGGREGATE_TYPE_P (type
))
2695 if (!TARGET_SSE
&& !warnedmmx
&& cum
->warn_sse
)
2698 warning ("SSE vector argument without SSE enabled "
2702 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2709 if (!type
|| !AGGREGATE_TYPE_P (type
))
2711 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2714 warning ("MMX vector argument without MMX enabled "
2718 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2723 if (TARGET_DEBUG_ARG
)
2726 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2727 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2730 print_simple_rtl (stderr
, ret
);
2732 fprintf (stderr
, ", stack");
2734 fprintf (stderr
, " )\n");
2740 /* A C expression that indicates when an argument must be passed by
2741 reference. If nonzero for an argument, a copy of that argument is
2742 made in memory and a pointer to the argument is passed instead of
2743 the argument itself. The pointer is passed in whatever way is
2744 appropriate for passing a pointer to that type. */
2747 function_arg_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2748 enum machine_mode mode ATTRIBUTE_UNUSED
,
2749 tree type
, int named ATTRIBUTE_UNUSED
)
2754 if (type
&& int_size_in_bytes (type
) == -1)
2756 if (TARGET_DEBUG_ARG
)
2757 fprintf (stderr
, "function_arg_pass_by_reference\n");
2764 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2767 contains_128bit_aligned_vector_p (tree type
)
2769 enum machine_mode mode
= TYPE_MODE (type
);
2770 if (SSE_REG_MODE_P (mode
)
2771 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2773 if (TYPE_ALIGN (type
) < 128)
2776 if (AGGREGATE_TYPE_P (type
))
2778 /* Walk the aggregates recursively. */
2779 if (TREE_CODE (type
) == RECORD_TYPE
2780 || TREE_CODE (type
) == UNION_TYPE
2781 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2785 if (TYPE_BINFO (type
) && BINFO_BASE_BINFOS (TYPE_BINFO (type
)))
2787 tree bases
= BINFO_BASE_BINFOS (TYPE_BINFO (type
));
2788 int n_bases
= BINFO_N_BASE_BINFOS (TYPE_BINFO (type
));
2791 for (i
= 0; i
< n_bases
; ++i
)
2793 tree binfo
= TREE_VEC_ELT (bases
, i
);
2794 tree type
= BINFO_TYPE (binfo
);
2796 if (contains_128bit_aligned_vector_p (type
))
2800 /* And now merge the fields of structure. */
2801 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2803 if (TREE_CODE (field
) == FIELD_DECL
2804 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2808 /* Just for use if some languages passes arrays by value. */
2809 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2811 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2820 /* Gives the alignment boundary, in bits, of an argument with the
2821 specified mode and type. */
2824 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2828 align
= TYPE_ALIGN (type
);
2830 align
= GET_MODE_ALIGNMENT (mode
);
2831 if (align
< PARM_BOUNDARY
)
2832 align
= PARM_BOUNDARY
;
2835 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2836 make an exception for SSE modes since these require 128bit
2839 The handling here differs from field_alignment. ICC aligns MMX
2840 arguments to 4 byte boundaries, while structure fields are aligned
2841 to 8 byte boundaries. */
2844 if (!SSE_REG_MODE_P (mode
))
2845 align
= PARM_BOUNDARY
;
2849 if (!contains_128bit_aligned_vector_p (type
))
2850 align
= PARM_BOUNDARY
;
2858 /* Return true if N is a possible register number of function value. */
2860 ix86_function_value_regno_p (int regno
)
2864 return ((regno
) == 0
2865 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2866 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2868 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2869 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2870 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2873 /* Define how to find the value returned by a function.
2874 VALTYPE is the data type of the value (as a tree).
2875 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2876 otherwise, FUNC is 0. */
2878 ix86_function_value (tree valtype
)
2882 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2883 REGPARM_MAX
, SSE_REGPARM_MAX
,
2884 x86_64_int_return_registers
, 0);
2885 /* For zero sized structures, construct_container return NULL, but we need
2886 to keep rest of compiler happy by returning meaningful value. */
2888 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2892 return gen_rtx_REG (TYPE_MODE (valtype
),
2893 ix86_value_regno (TYPE_MODE (valtype
)));
2896 /* Return false iff type is returned in memory. */
2898 ix86_return_in_memory (tree type
)
2900 int needed_intregs
, needed_sseregs
, size
;
2901 enum machine_mode mode
= TYPE_MODE (type
);
2904 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2906 if (mode
== BLKmode
)
2909 size
= int_size_in_bytes (type
);
2911 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2914 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2916 /* User-created vectors small enough to fit in EAX. */
2920 /* MMX/3dNow values are returned on the stack, since we've
2921 got to EMMS/FEMMS before returning. */
2925 /* SSE values are returned in XMM0, except when it doesn't exist. */
2927 return (TARGET_SSE
? 0 : 1);
2938 /* When returning SSE vector types, we have a choice of either
2939 (1) being abi incompatible with a -march switch, or
2940 (2) generating an error.
2941 Given no good solution, I think the safest thing is one warning.
2942 The user won't be able to use -Werror, but....
2944 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2945 called in response to actually generating a caller or callee that
2946 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2947 via aggregate_value_p for general type probing from tree-ssa. */
2950 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
2954 if (!TARGET_SSE
&& type
&& !warned
)
2956 /* Look at the return type of the function, not the function type. */
2957 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
2960 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
2963 warning ("SSE vector return without SSE enabled changes the ABI");
2970 /* Define how to find the value returned by a library function
2971 assuming the value has mode MODE. */
2973 ix86_libcall_value (enum machine_mode mode
)
2984 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2986 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2991 return gen_rtx_REG (mode
, 0);
2995 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2998 /* Given a mode, return the register to use for a return value. */
3001 ix86_value_regno (enum machine_mode mode
)
3003 /* Floating point return values in %st(0). */
3004 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
3005 return FIRST_FLOAT_REG
;
3006 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3007 we prevent this case when sse is not available. */
3008 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3009 return FIRST_SSE_REG
;
3010 /* Everything else in %eax. */
3014 /* Create the va_list data type. */
3017 ix86_build_builtin_va_list (void)
3019 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3021 /* For i386 we use plain pointer to argument area. */
3023 return build_pointer_type (char_type_node
);
3025 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3026 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3028 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3029 unsigned_type_node
);
3030 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3031 unsigned_type_node
);
3032 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3034 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3037 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3038 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3039 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3040 DECL_FIELD_CONTEXT (f_sav
) = record
;
3042 TREE_CHAIN (record
) = type_decl
;
3043 TYPE_NAME (record
) = type_decl
;
3044 TYPE_FIELDS (record
) = f_gpr
;
3045 TREE_CHAIN (f_gpr
) = f_fpr
;
3046 TREE_CHAIN (f_fpr
) = f_ovf
;
3047 TREE_CHAIN (f_ovf
) = f_sav
;
3049 layout_type (record
);
3051 /* The correct type is an array type of one element. */
3052 return build_array_type (record
, build_index_type (size_zero_node
));
3055 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3058 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3059 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3062 CUMULATIVE_ARGS next_cum
;
3063 rtx save_area
= NULL_RTX
, mem
;
3076 /* Indicate to allocate space on the stack for varargs save area. */
3077 ix86_save_varrargs_registers
= 1;
3079 cfun
->stack_alignment_needed
= 128;
3081 fntype
= TREE_TYPE (current_function_decl
);
3082 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3083 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3084 != void_type_node
));
3086 /* For varargs, we do not want to skip the dummy va_dcl argument.
3087 For stdargs, we do want to skip the last named argument. */
3090 function_arg_advance (&next_cum
, mode
, type
, 1);
3093 save_area
= frame_pointer_rtx
;
3095 set
= get_varargs_alias_set ();
3097 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3099 mem
= gen_rtx_MEM (Pmode
,
3100 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3101 set_mem_alias_set (mem
, set
);
3102 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3103 x86_64_int_parameter_registers
[i
]));
3106 if (next_cum
.sse_nregs
)
3108 /* Now emit code to save SSE registers. The AX parameter contains number
3109 of SSE parameter registers used to call this function. We use
3110 sse_prologue_save insn template that produces computed jump across
3111 SSE saves. We need some preparation work to get this working. */
3113 label
= gen_label_rtx ();
3114 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3116 /* Compute address to jump to :
3117 label - 5*eax + nnamed_sse_arguments*5 */
3118 tmp_reg
= gen_reg_rtx (Pmode
);
3119 nsse_reg
= gen_reg_rtx (Pmode
);
3120 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3121 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3122 gen_rtx_MULT (Pmode
, nsse_reg
,
3124 if (next_cum
.sse_regno
)
3127 gen_rtx_CONST (DImode
,
3128 gen_rtx_PLUS (DImode
,
3130 GEN_INT (next_cum
.sse_regno
* 4))));
3132 emit_move_insn (nsse_reg
, label_ref
);
3133 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3135 /* Compute address of memory block we save into. We always use pointer
3136 pointing 127 bytes after first byte to store - this is needed to keep
3137 instruction size limited by 4 bytes. */
3138 tmp_reg
= gen_reg_rtx (Pmode
);
3139 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3140 plus_constant (save_area
,
3141 8 * REGPARM_MAX
+ 127)));
3142 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3143 set_mem_alias_set (mem
, set
);
3144 set_mem_align (mem
, BITS_PER_WORD
);
3146 /* And finally do the dirty job! */
3147 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3148 GEN_INT (next_cum
.sse_regno
), label
));
3153 /* Implement va_start. */
3156 ix86_va_start (tree valist
, rtx nextarg
)
3158 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3159 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3160 tree gpr
, fpr
, ovf
, sav
, t
;
3162 /* Only 64bit target needs something special. */
3165 std_expand_builtin_va_start (valist
, nextarg
);
3169 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3170 f_fpr
= TREE_CHAIN (f_gpr
);
3171 f_ovf
= TREE_CHAIN (f_fpr
);
3172 f_sav
= TREE_CHAIN (f_ovf
);
3174 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3175 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3176 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3177 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3178 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3180 /* Count number of gp and fp argument registers used. */
3181 words
= current_function_args_info
.words
;
3182 n_gpr
= current_function_args_info
.regno
;
3183 n_fpr
= current_function_args_info
.sse_regno
;
3185 if (TARGET_DEBUG_ARG
)
3186 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3187 (int) words
, (int) n_gpr
, (int) n_fpr
);
3189 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3190 build_int_2 (n_gpr
* 8, 0));
3191 TREE_SIDE_EFFECTS (t
) = 1;
3192 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3194 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3195 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
3196 TREE_SIDE_EFFECTS (t
) = 1;
3197 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3199 /* Find the overflow area. */
3200 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3202 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3203 build_int_2 (words
* UNITS_PER_WORD
, 0));
3204 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3205 TREE_SIDE_EFFECTS (t
) = 1;
3206 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3208 /* Find the register save area.
3209 Prologue of the function save it right above stack frame. */
3210 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3211 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3212 TREE_SIDE_EFFECTS (t
) = 1;
3213 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3216 /* Implement va_arg. */
3219 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3221 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3222 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3223 tree gpr
, fpr
, ovf
, sav
, t
;
3225 tree lab_false
, lab_over
= NULL_TREE
;
3231 /* Only 64bit target needs something special. */
3233 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3235 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3236 f_fpr
= TREE_CHAIN (f_gpr
);
3237 f_ovf
= TREE_CHAIN (f_fpr
);
3238 f_sav
= TREE_CHAIN (f_ovf
);
3240 valist
= build_fold_indirect_ref (valist
);
3241 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3242 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3243 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3244 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3246 size
= int_size_in_bytes (type
);
3249 /* Variable-size types are passed by reference. */
3251 type
= build_pointer_type (type
);
3252 size
= int_size_in_bytes (type
);
3254 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3256 container
= construct_container (TYPE_MODE (type
), type
, 0,
3257 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3259 * Pull the value out of the saved registers ...
3262 addr
= create_tmp_var (ptr_type_node
, "addr");
3263 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3267 int needed_intregs
, needed_sseregs
;
3269 tree int_addr
, sse_addr
;
3271 lab_false
= create_artificial_label ();
3272 lab_over
= create_artificial_label ();
3274 examine_argument (TYPE_MODE (type
), type
, 0,
3275 &needed_intregs
, &needed_sseregs
);
3277 need_temp
= (!REG_P (container
)
3278 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3279 || TYPE_ALIGN (type
) > 128));
3281 /* In case we are passing structure, verify that it is consecutive block
3282 on the register save area. If not we need to do moves. */
3283 if (!need_temp
&& !REG_P (container
))
3285 /* Verify that all registers are strictly consecutive */
3286 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3290 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3292 rtx slot
= XVECEXP (container
, 0, i
);
3293 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3294 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3302 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3304 rtx slot
= XVECEXP (container
, 0, i
);
3305 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3306 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3318 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3319 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3320 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3321 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3323 /* First ensure that we fit completely in registers. */
3326 t
= build_int_2 ((REGPARM_MAX
- needed_intregs
+ 1) * 8, 0);
3327 TREE_TYPE (t
) = TREE_TYPE (gpr
);
3328 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3329 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3330 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3331 gimplify_and_add (t
, pre_p
);
3335 t
= build_int_2 ((SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3336 + REGPARM_MAX
* 8, 0);
3337 TREE_TYPE (t
) = TREE_TYPE (fpr
);
3338 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3339 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3340 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3341 gimplify_and_add (t
, pre_p
);
3344 /* Compute index to start of area used for integer regs. */
3347 /* int_addr = gpr + sav; */
3348 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3349 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3350 gimplify_and_add (t
, pre_p
);
3354 /* sse_addr = fpr + sav; */
3355 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3356 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3357 gimplify_and_add (t
, pre_p
);
3362 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3365 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3366 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3367 gimplify_and_add (t
, pre_p
);
3369 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3371 rtx slot
= XVECEXP (container
, 0, i
);
3372 rtx reg
= XEXP (slot
, 0);
3373 enum machine_mode mode
= GET_MODE (reg
);
3374 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3375 tree addr_type
= build_pointer_type (piece_type
);
3378 tree dest_addr
, dest
;
3380 if (SSE_REGNO_P (REGNO (reg
)))
3382 src_addr
= sse_addr
;
3383 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3387 src_addr
= int_addr
;
3388 src_offset
= REGNO (reg
) * 8;
3390 src_addr
= fold_convert (addr_type
, src_addr
);
3391 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3392 size_int (src_offset
)));
3393 src
= build_fold_indirect_ref (src_addr
);
3395 dest_addr
= fold_convert (addr_type
, addr
);
3396 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3397 size_int (INTVAL (XEXP (slot
, 1)))));
3398 dest
= build_fold_indirect_ref (dest_addr
);
3400 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3401 gimplify_and_add (t
, pre_p
);
3407 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3408 build_int_2 (needed_intregs
* 8, 0));
3409 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3410 gimplify_and_add (t
, pre_p
);
3415 build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3416 build_int_2 (needed_sseregs
* 16, 0));
3417 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3418 gimplify_and_add (t
, pre_p
);
3421 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3422 gimplify_and_add (t
, pre_p
);
3424 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3425 append_to_statement_list (t
, pre_p
);
3428 /* ... otherwise out of the overflow area. */
3430 /* Care for on-stack alignment if needed. */
3431 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3435 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3436 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
3437 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
3439 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3441 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3442 gimplify_and_add (t2
, pre_p
);
3444 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3445 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
3446 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3447 gimplify_and_add (t
, pre_p
);
3451 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3452 append_to_statement_list (t
, pre_p
);
3455 ptrtype
= build_pointer_type (type
);
3456 addr
= fold_convert (ptrtype
, addr
);
3459 addr
= build_fold_indirect_ref (addr
);
3460 return build_fold_indirect_ref (addr
);
3463 /* Return nonzero if OP is either a i387 or SSE fp register. */
3465 any_fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3467 return ANY_FP_REG_P (op
);
3470 /* Return nonzero if OP is an i387 fp register. */
3472 fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3474 return FP_REG_P (op
);
3477 /* Return nonzero if OP is a non-fp register_operand. */
3479 register_and_not_any_fp_reg_operand (rtx op
, enum machine_mode mode
)
3481 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
3484 /* Return nonzero if OP is a register operand other than an
3485 i387 fp register. */
3487 register_and_not_fp_reg_operand (rtx op
, enum machine_mode mode
)
3489 return register_operand (op
, mode
) && !FP_REG_P (op
);
3492 /* Return nonzero if OP is general operand representable on x86_64. */
3495 x86_64_general_operand (rtx op
, enum machine_mode mode
)
3498 return general_operand (op
, mode
);
3499 if (nonimmediate_operand (op
, mode
))
3501 return x86_64_sign_extended_value (op
);
3504 /* Return nonzero if OP is general operand representable on x86_64
3505 as either sign extended or zero extended constant. */
3508 x86_64_szext_general_operand (rtx op
, enum machine_mode mode
)
3511 return general_operand (op
, mode
);
3512 if (nonimmediate_operand (op
, mode
))
3514 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3517 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3520 x86_64_nonmemory_operand (rtx op
, enum machine_mode mode
)
3523 return nonmemory_operand (op
, mode
);
3524 if (register_operand (op
, mode
))
3526 return x86_64_sign_extended_value (op
);
3529 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3532 x86_64_movabs_operand (rtx op
, enum machine_mode mode
)
3534 if (!TARGET_64BIT
|| !flag_pic
)
3535 return nonmemory_operand (op
, mode
);
3536 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
3538 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
3543 /* Return nonzero if OPNUM's MEM should be matched
3544 in movabs* patterns. */
3547 ix86_check_movabs (rtx insn
, int opnum
)
3551 set
= PATTERN (insn
);
3552 if (GET_CODE (set
) == PARALLEL
)
3553 set
= XVECEXP (set
, 0, 0);
3554 if (GET_CODE (set
) != SET
)
3556 mem
= XEXP (set
, opnum
);
3557 while (GET_CODE (mem
) == SUBREG
)
3558 mem
= SUBREG_REG (mem
);
3559 if (GET_CODE (mem
) != MEM
)
3561 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3564 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3567 x86_64_szext_nonmemory_operand (rtx op
, enum machine_mode mode
)
3570 return nonmemory_operand (op
, mode
);
3571 if (register_operand (op
, mode
))
3573 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3576 /* Return nonzero if OP is immediate operand representable on x86_64. */
3579 x86_64_immediate_operand (rtx op
, enum machine_mode mode
)
3582 return immediate_operand (op
, mode
);
3583 return x86_64_sign_extended_value (op
);
3586 /* Return nonzero if OP is immediate operand representable on x86_64. */
3589 x86_64_zext_immediate_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3591 return x86_64_zero_extended_value (op
);
3594 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3595 for shift & compare patterns, as shifting by 0 does not change flags),
3596 else return zero. */
3599 const_int_1_31_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3601 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3604 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3605 reference and a constant. */
3608 symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3610 switch (GET_CODE (op
))
3618 if (GET_CODE (op
) == SYMBOL_REF
3619 || GET_CODE (op
) == LABEL_REF
3620 || (GET_CODE (op
) == UNSPEC
3621 && (XINT (op
, 1) == UNSPEC_GOT
3622 || XINT (op
, 1) == UNSPEC_GOTOFF
3623 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3625 if (GET_CODE (op
) != PLUS
3626 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3630 if (GET_CODE (op
) == SYMBOL_REF
3631 || GET_CODE (op
) == LABEL_REF
)
3633 /* Only @GOTOFF gets offsets. */
3634 if (GET_CODE (op
) != UNSPEC
3635 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3638 op
= XVECEXP (op
, 0, 0);
3639 if (GET_CODE (op
) == SYMBOL_REF
3640 || GET_CODE (op
) == LABEL_REF
)
3649 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3652 pic_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3654 if (GET_CODE (op
) != CONST
)
3659 if (GET_CODE (op
) == UNSPEC
3660 && XINT (op
, 1) == UNSPEC_GOTPCREL
)
3662 if (GET_CODE (op
) == PLUS
3663 && GET_CODE (XEXP (op
, 0)) == UNSPEC
3664 && XINT (XEXP (op
, 0), 1) == UNSPEC_GOTPCREL
)
3669 if (GET_CODE (op
) == UNSPEC
)
3671 if (GET_CODE (op
) != PLUS
3672 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3675 if (GET_CODE (op
) == UNSPEC
)
3681 /* Return true if OP is a symbolic operand that resolves locally. */
3684 local_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3686 if (GET_CODE (op
) == CONST
3687 && GET_CODE (XEXP (op
, 0)) == PLUS
3688 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3689 op
= XEXP (XEXP (op
, 0), 0);
3691 if (GET_CODE (op
) == LABEL_REF
)
3694 if (GET_CODE (op
) != SYMBOL_REF
)
3697 if (SYMBOL_REF_LOCAL_P (op
))
3700 /* There is, however, a not insubstantial body of code in the rest of
3701 the compiler that assumes it can just stick the results of
3702 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3703 /* ??? This is a hack. Should update the body of the compiler to
3704 always create a DECL an invoke targetm.encode_section_info. */
3705 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3706 internal_label_prefix_len
) == 0)
3712 /* Test for various thread-local symbols. */
3715 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3717 if (GET_CODE (op
) != SYMBOL_REF
)
3719 return SYMBOL_REF_TLS_MODEL (op
);
3723 tls_symbolic_operand_1 (rtx op
, enum tls_model kind
)
3725 if (GET_CODE (op
) != SYMBOL_REF
)
3727 return SYMBOL_REF_TLS_MODEL (op
) == kind
;
3731 global_dynamic_symbolic_operand (rtx op
,
3732 enum machine_mode mode ATTRIBUTE_UNUSED
)
3734 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3738 local_dynamic_symbolic_operand (rtx op
,
3739 enum machine_mode mode ATTRIBUTE_UNUSED
)
3741 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3745 initial_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3747 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3751 local_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3753 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3756 /* Test for a valid operand for a call instruction. Don't allow the
3757 arg pointer register or virtual regs since they may decay into
3758 reg + const, which the patterns can't handle. */
3761 call_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3763 /* Disallow indirect through a virtual register. This leads to
3764 compiler aborts when trying to eliminate them. */
3765 if (GET_CODE (op
) == REG
3766 && (op
== arg_pointer_rtx
3767 || op
== frame_pointer_rtx
3768 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3769 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3772 /* Disallow `call 1234'. Due to varying assembler lameness this
3773 gets either rejected or translated to `call .+1234'. */
3774 if (GET_CODE (op
) == CONST_INT
)
3777 /* Explicitly allow SYMBOL_REF even if pic. */
3778 if (GET_CODE (op
) == SYMBOL_REF
)
3781 /* Otherwise we can allow any general_operand in the address. */
3782 return general_operand (op
, Pmode
);
3785 /* Test for a valid operand for a call instruction. Don't allow the
3786 arg pointer register or virtual regs since they may decay into
3787 reg + const, which the patterns can't handle. */
3790 sibcall_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3792 /* Disallow indirect through a virtual register. This leads to
3793 compiler aborts when trying to eliminate them. */
3794 if (GET_CODE (op
) == REG
3795 && (op
== arg_pointer_rtx
3796 || op
== frame_pointer_rtx
3797 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3798 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3801 /* Explicitly allow SYMBOL_REF even if pic. */
3802 if (GET_CODE (op
) == SYMBOL_REF
)
3805 /* Otherwise we can only allow register operands. */
3806 return register_operand (op
, Pmode
);
3810 constant_call_address_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3812 if (GET_CODE (op
) == CONST
3813 && GET_CODE (XEXP (op
, 0)) == PLUS
3814 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3815 op
= XEXP (XEXP (op
, 0), 0);
3816 return GET_CODE (op
) == SYMBOL_REF
;
3819 /* Match exactly zero and one. */
3822 const0_operand (rtx op
, enum machine_mode mode
)
3824 return op
== CONST0_RTX (mode
);
3828 const1_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3830 return op
== const1_rtx
;
3833 /* Match 2, 4, or 8. Used for leal multiplicands. */
3836 const248_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3838 return (GET_CODE (op
) == CONST_INT
3839 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3843 const_0_to_3_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3845 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 4);
3849 const_0_to_7_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3851 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 8);
3855 const_0_to_15_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3857 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 16);
3861 const_0_to_255_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3863 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 256);
3867 /* True if this is a constant appropriate for an increment or decrement. */
3870 incdec_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3872 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3873 registers, since carry flag is not set. */
3874 if ((TARGET_PENTIUM4
|| TARGET_NOCONA
) && !optimize_size
)
3876 return op
== const1_rtx
|| op
== constm1_rtx
;
3879 /* Return nonzero if OP is acceptable as operand of DImode shift
3883 shiftdi_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3886 return nonimmediate_operand (op
, mode
);
3888 return register_operand (op
, mode
);
3891 /* Return false if this is the stack pointer, or any other fake
3892 register eliminable to the stack pointer. Otherwise, this is
3895 This is used to prevent esp from being used as an index reg.
3896 Which would only happen in pathological cases. */
3899 reg_no_sp_operand (rtx op
, enum machine_mode mode
)
3902 if (GET_CODE (t
) == SUBREG
)
3904 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3907 return register_operand (op
, mode
);
3911 mmx_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3913 return MMX_REG_P (op
);
3916 /* Return false if this is any eliminable register. Otherwise
3920 general_no_elim_operand (rtx op
, enum machine_mode mode
)
3923 if (GET_CODE (t
) == SUBREG
)
3925 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3926 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3927 || t
== virtual_stack_dynamic_rtx
)
3930 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3931 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3934 return general_operand (op
, mode
);
3937 /* Return false if this is any eliminable register. Otherwise
3938 register_operand or const_int. */
3941 nonmemory_no_elim_operand (rtx op
, enum machine_mode mode
)
3944 if (GET_CODE (t
) == SUBREG
)
3946 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3947 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3948 || t
== virtual_stack_dynamic_rtx
)
3951 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3954 /* Return false if this is any eliminable register or stack register,
3955 otherwise work like register_operand. */
3958 index_register_operand (rtx op
, enum machine_mode mode
)
3961 if (GET_CODE (t
) == SUBREG
)
3965 if (t
== arg_pointer_rtx
3966 || t
== frame_pointer_rtx
3967 || t
== virtual_incoming_args_rtx
3968 || t
== virtual_stack_vars_rtx
3969 || t
== virtual_stack_dynamic_rtx
3970 || REGNO (t
) == STACK_POINTER_REGNUM
)
3973 return general_operand (op
, mode
);
3976 /* Return true if op is a Q_REGS class register. */
3979 q_regs_operand (rtx op
, enum machine_mode mode
)
3981 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3983 if (GET_CODE (op
) == SUBREG
)
3984 op
= SUBREG_REG (op
);
3985 return ANY_QI_REG_P (op
);
3988 /* Return true if op is an flags register. */
3991 flags_reg_operand (rtx op
, enum machine_mode mode
)
3993 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3995 return REG_P (op
) && REGNO (op
) == FLAGS_REG
&& GET_MODE (op
) != VOIDmode
;
3998 /* Return true if op is a NON_Q_REGS class register. */
4001 non_q_regs_operand (rtx op
, enum machine_mode mode
)
4003 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4005 if (GET_CODE (op
) == SUBREG
)
4006 op
= SUBREG_REG (op
);
4007 return NON_QI_REG_P (op
);
4011 zero_extended_scalar_load_operand (rtx op
,
4012 enum machine_mode mode ATTRIBUTE_UNUSED
)
4015 if (GET_CODE (op
) != MEM
)
4017 op
= maybe_get_pool_constant (op
);
4020 if (GET_CODE (op
) != CONST_VECTOR
)
4023 (GET_MODE_SIZE (GET_MODE (op
)) /
4024 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op
))));
4025 for (n_elts
--; n_elts
> 0; n_elts
--)
4027 rtx elt
= CONST_VECTOR_ELT (op
, n_elts
);
4028 if (elt
!= CONST0_RTX (GET_MODE_INNER (GET_MODE (op
))))
4034 /* Return 1 when OP is operand acceptable for standard SSE move. */
4036 vector_move_operand (rtx op
, enum machine_mode mode
)
4038 if (nonimmediate_operand (op
, mode
))
4040 if (GET_MODE (op
) != mode
&& mode
!= VOIDmode
)
4042 return (op
== CONST0_RTX (GET_MODE (op
)));
4045 /* Return true if op if a valid address, and does not contain
4046 a segment override. */
4049 no_seg_address_operand (rtx op
, enum machine_mode mode
)
4051 struct ix86_address parts
;
4053 if (! address_operand (op
, mode
))
4056 if (! ix86_decompose_address (op
, &parts
))
4059 return parts
.seg
== SEG_DEFAULT
;
4062 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4065 sse_comparison_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4067 enum rtx_code code
= GET_CODE (op
);
4070 /* Operations supported directly. */
4080 /* These are equivalent to ones above in non-IEEE comparisons. */
4087 return !TARGET_IEEE_FP
;
4092 /* Return 1 if OP is a valid comparison operator in valid mode. */
4094 ix86_comparison_operator (rtx op
, enum machine_mode mode
)
4096 enum machine_mode inmode
;
4097 enum rtx_code code
= GET_CODE (op
);
4098 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4100 if (!COMPARISON_P (op
))
4102 inmode
= GET_MODE (XEXP (op
, 0));
4104 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4106 enum rtx_code second_code
, bypass_code
;
4107 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4108 return (bypass_code
== NIL
&& second_code
== NIL
);
4115 if (inmode
== CCmode
|| inmode
== CCGCmode
4116 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
4119 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
4120 if (inmode
== CCmode
)
4124 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
4132 /* Return 1 if OP is a valid comparison operator testing carry flag
4135 ix86_carry_flag_operator (rtx op
, enum machine_mode mode
)
4137 enum machine_mode inmode
;
4138 enum rtx_code code
= GET_CODE (op
);
4140 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4142 if (!COMPARISON_P (op
))
4144 inmode
= GET_MODE (XEXP (op
, 0));
4145 if (GET_CODE (XEXP (op
, 0)) != REG
4146 || REGNO (XEXP (op
, 0)) != 17
4147 || XEXP (op
, 1) != const0_rtx
)
4150 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4152 enum rtx_code second_code
, bypass_code
;
4154 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4155 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4157 code
= ix86_fp_compare_code_to_integer (code
);
4159 else if (inmode
!= CCmode
)
4164 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4167 fcmov_comparison_operator (rtx op
, enum machine_mode mode
)
4169 enum machine_mode inmode
;
4170 enum rtx_code code
= GET_CODE (op
);
4172 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4174 if (!COMPARISON_P (op
))
4176 inmode
= GET_MODE (XEXP (op
, 0));
4177 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4179 enum rtx_code second_code
, bypass_code
;
4181 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4182 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4184 code
= ix86_fp_compare_code_to_integer (code
);
4186 /* i387 supports just limited amount of conditional codes. */
4189 case LTU
: case GTU
: case LEU
: case GEU
:
4190 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4193 case ORDERED
: case UNORDERED
:
4201 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4204 promotable_binary_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4206 switch (GET_CODE (op
))
4209 /* Modern CPUs have same latency for HImode and SImode multiply,
4210 but 386 and 486 do HImode multiply faster. */
4211 return ix86_tune
> PROCESSOR_I486
;
4223 /* Nearly general operand, but accept any const_double, since we wish
4224 to be able to drop them into memory rather than have them get pulled
4228 cmp_fp_expander_operand (rtx op
, enum machine_mode mode
)
4230 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4232 if (GET_CODE (op
) == CONST_DOUBLE
)
4234 return general_operand (op
, mode
);
4237 /* Match an SI or HImode register for a zero_extract. */
4240 ext_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4243 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
4244 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
4247 if (!register_operand (op
, VOIDmode
))
4250 /* Be careful to accept only registers having upper parts. */
4251 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
4252 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
4255 /* Return 1 if this is a valid binary floating-point operation.
4256 OP is the expression matched, and MODE is its mode. */
4259 binary_fp_operator (rtx op
, enum machine_mode mode
)
4261 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4264 switch (GET_CODE (op
))
4270 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
4278 mult_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4280 return GET_CODE (op
) == MULT
;
4284 div_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4286 return GET_CODE (op
) == DIV
;
4290 arith_or_logical_operator (rtx op
, enum machine_mode mode
)
4292 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
4293 && ARITHMETIC_P (op
));
4296 /* Returns 1 if OP is memory operand with a displacement. */
4299 memory_displacement_operand (rtx op
, enum machine_mode mode
)
4301 struct ix86_address parts
;
4303 if (! memory_operand (op
, mode
))
4306 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
4309 return parts
.disp
!= NULL_RTX
;
4312 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4313 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4315 ??? It seems likely that this will only work because cmpsi is an
4316 expander, and no actual insns use this. */
4319 cmpsi_operand (rtx op
, enum machine_mode mode
)
4321 if (nonimmediate_operand (op
, mode
))
4324 if (GET_CODE (op
) == AND
4325 && GET_MODE (op
) == SImode
4326 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
4327 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
4328 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
4329 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
4330 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
4331 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
4337 /* Returns 1 if OP is memory operand that can not be represented by the
4341 long_memory_operand (rtx op
, enum machine_mode mode
)
4343 if (! memory_operand (op
, mode
))
4346 return memory_address_length (op
) != 0;
4349 /* Return nonzero if the rtx is known aligned. */
4352 aligned_operand (rtx op
, enum machine_mode mode
)
4354 struct ix86_address parts
;
4356 if (!general_operand (op
, mode
))
4359 /* Registers and immediate operands are always "aligned". */
4360 if (GET_CODE (op
) != MEM
)
4363 /* Don't even try to do any aligned optimizations with volatiles. */
4364 if (MEM_VOLATILE_P (op
))
4369 /* Pushes and pops are only valid on the stack pointer. */
4370 if (GET_CODE (op
) == PRE_DEC
4371 || GET_CODE (op
) == POST_INC
)
4374 /* Decode the address. */
4375 if (! ix86_decompose_address (op
, &parts
))
4378 /* Look for some component that isn't known to be aligned. */
4382 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
4387 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
4392 if (GET_CODE (parts
.disp
) != CONST_INT
4393 || (INTVAL (parts
.disp
) & 3) != 0)
4397 /* Didn't find one -- this must be an aligned address. */
4401 /* Initialize the table of extra 80387 mathematical constants. */
4404 init_ext_80387_constants (void)
4406 static const char * cst
[5] =
4408 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4409 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4410 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4411 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4412 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4416 for (i
= 0; i
< 5; i
++)
4418 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4419 /* Ensure each constant is rounded to XFmode precision. */
4420 real_convert (&ext_80387_constants_table
[i
],
4421 XFmode
, &ext_80387_constants_table
[i
]);
4424 ext_80387_constants_init
= 1;
4427 /* Return true if the constant is something that can be loaded with
4428 a special instruction. */
4431 standard_80387_constant_p (rtx x
)
4433 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4436 if (x
== CONST0_RTX (GET_MODE (x
)))
4438 if (x
== CONST1_RTX (GET_MODE (x
)))
4441 /* For XFmode constants, try to find a special 80387 instruction when
4442 optimizing for size or on those CPUs that benefit from them. */
4443 if (GET_MODE (x
) == XFmode
4444 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4449 if (! ext_80387_constants_init
)
4450 init_ext_80387_constants ();
4452 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4453 for (i
= 0; i
< 5; i
++)
4454 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4461 /* Return the opcode of the special instruction to be used to load
4465 standard_80387_constant_opcode (rtx x
)
4467 switch (standard_80387_constant_p (x
))
4487 /* Return the CONST_DOUBLE representing the 80387 constant that is
4488 loaded by the specified special instruction. The argument IDX
4489 matches the return value from standard_80387_constant_p. */
4492 standard_80387_constant_rtx (int idx
)
4496 if (! ext_80387_constants_init
)
4497 init_ext_80387_constants ();
4513 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4517 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4520 standard_sse_constant_p (rtx x
)
4522 if (x
== const0_rtx
)
4524 return (x
== CONST0_RTX (GET_MODE (x
)));
4527 /* Returns 1 if OP contains a symbol reference */
4530 symbolic_reference_mentioned_p (rtx op
)
4535 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4538 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4539 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4545 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4546 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4550 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4557 /* Return 1 if it is appropriate to emit `ret' instructions in the
4558 body of a function. Do this only if the epilogue is simple, needing a
4559 couple of insns. Prior to reloading, we can't tell how many registers
4560 must be saved, so return 0 then. Return 0 if there is no frame
4561 marker to de-allocate.
4563 If NON_SAVING_SETJMP is defined and true, then it is not possible
4564 for the epilogue to be simple, so return 0. This is a special case
4565 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4566 until final, but jump_optimize may need to know sooner if a
4570 ix86_can_use_return_insn_p (void)
4572 struct ix86_frame frame
;
4574 #ifdef NON_SAVING_SETJMP
4575 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
4579 if (! reload_completed
|| frame_pointer_needed
)
4582 /* Don't allow more than 32 pop, since that's all we can do
4583 with one instruction. */
4584 if (current_function_pops_args
4585 && current_function_args_size
>= 32768)
4588 ix86_compute_frame_layout (&frame
);
4589 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4592 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4594 x86_64_sign_extended_value (rtx value
)
4596 switch (GET_CODE (value
))
4598 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4599 to be at least 32 and this all acceptable constants are
4600 represented as CONST_INT. */
4602 if (HOST_BITS_PER_WIDE_INT
== 32)
4606 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
4607 return trunc_int_for_mode (val
, SImode
) == val
;
4611 /* For certain code models, the symbolic references are known to fit.
4612 in CM_SMALL_PIC model we know it fits if it is local to the shared
4613 library. Don't count TLS SYMBOL_REFs here, since they should fit
4614 only if inside of UNSPEC handled below. */
4616 /* TLS symbols are not constant. */
4617 if (tls_symbolic_operand (value
, Pmode
))
4619 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
4621 /* For certain code models, the code is near as well. */
4623 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
4624 || ix86_cmodel
== CM_KERNEL
);
4626 /* We also may accept the offsetted memory references in certain special
4629 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
4630 switch (XINT (XEXP (value
, 0), 1))
4632 case UNSPEC_GOTPCREL
:
4634 case UNSPEC_GOTNTPOFF
:
4640 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4642 rtx op1
= XEXP (XEXP (value
, 0), 0);
4643 rtx op2
= XEXP (XEXP (value
, 0), 1);
4644 HOST_WIDE_INT offset
;
4646 if (ix86_cmodel
== CM_LARGE
)
4648 if (GET_CODE (op2
) != CONST_INT
)
4650 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
4651 switch (GET_CODE (op1
))
4654 /* For CM_SMALL assume that latest object is 16MB before
4655 end of 31bits boundary. We may also accept pretty
4656 large negative constants knowing that all objects are
4657 in the positive half of address space. */
4658 if (ix86_cmodel
== CM_SMALL
4659 && offset
< 16*1024*1024
4660 && trunc_int_for_mode (offset
, SImode
) == offset
)
4662 /* For CM_KERNEL we know that all object resist in the
4663 negative half of 32bits address space. We may not
4664 accept negative offsets, since they may be just off
4665 and we may accept pretty large positive ones. */
4666 if (ix86_cmodel
== CM_KERNEL
4668 && trunc_int_for_mode (offset
, SImode
) == offset
)
4672 /* These conditions are similar to SYMBOL_REF ones, just the
4673 constraints for code models differ. */
4674 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4675 && offset
< 16*1024*1024
4676 && trunc_int_for_mode (offset
, SImode
) == offset
)
4678 if (ix86_cmodel
== CM_KERNEL
4680 && trunc_int_for_mode (offset
, SImode
) == offset
)
4684 switch (XINT (op1
, 1))
4689 && trunc_int_for_mode (offset
, SImode
) == offset
)
4703 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4705 x86_64_zero_extended_value (rtx value
)
4707 switch (GET_CODE (value
))
4710 if (HOST_BITS_PER_WIDE_INT
== 32)
4711 return (GET_MODE (value
) == VOIDmode
4712 && !CONST_DOUBLE_HIGH (value
));
4716 if (HOST_BITS_PER_WIDE_INT
== 32)
4717 return INTVAL (value
) >= 0;
4719 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
4722 /* For certain code models, the symbolic references are known to fit. */
4724 /* TLS symbols are not constant. */
4725 if (tls_symbolic_operand (value
, Pmode
))
4727 return ix86_cmodel
== CM_SMALL
;
4729 /* For certain code models, the code is near as well. */
4731 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
4733 /* We also may accept the offsetted memory references in certain special
4736 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4738 rtx op1
= XEXP (XEXP (value
, 0), 0);
4739 rtx op2
= XEXP (XEXP (value
, 0), 1);
4741 if (ix86_cmodel
== CM_LARGE
)
4743 switch (GET_CODE (op1
))
4747 /* For small code model we may accept pretty large positive
4748 offsets, since one bit is available for free. Negative
4749 offsets are limited by the size of NULL pointer area
4750 specified by the ABI. */
4751 if (ix86_cmodel
== CM_SMALL
4752 && GET_CODE (op2
) == CONST_INT
4753 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4754 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4757 /* ??? For the kernel, we may accept adjustment of
4758 -0x10000000, since we know that it will just convert
4759 negative address space to positive, but perhaps this
4760 is not worthwhile. */
4763 /* These conditions are similar to SYMBOL_REF ones, just the
4764 constraints for code models differ. */
4765 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4766 && GET_CODE (op2
) == CONST_INT
4767 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4768 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4782 /* Value should be nonzero if functions must have frame pointers.
4783 Zero means the frame pointer need not be set up (and parms may
4784 be accessed via the stack pointer) in functions that seem suitable. */
4787 ix86_frame_pointer_required (void)
4789 /* If we accessed previous frames, then the generated code expects
4790 to be able to access the saved ebp value in our frame. */
4791 if (cfun
->machine
->accesses_prev_frame
)
4794 /* Several x86 os'es need a frame pointer for other reasons,
4795 usually pertaining to setjmp. */
4796 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4799 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4800 the frame pointer by default. Turn it back on now if we've not
4801 got a leaf function. */
4802 if (TARGET_OMIT_LEAF_FRAME_POINTER
4803 && (!current_function_is_leaf
))
4806 if (current_function_profile
)
4812 /* Record that the current function accesses previous call frames. */
4815 ix86_setup_frame_addresses (void)
4817 cfun
->machine
->accesses_prev_frame
= 1;
4820 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4821 # define USE_HIDDEN_LINKONCE 1
4823 # define USE_HIDDEN_LINKONCE 0
4826 static int pic_labels_used
;
4828 /* Fills in the label name that should be used for a pc thunk for
4829 the given register. */
4832 get_pc_thunk_name (char name
[32], unsigned int regno
)
4834 if (USE_HIDDEN_LINKONCE
)
4835 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4837 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4841 /* This function generates code for -fpic that loads %ebx with
4842 the return address of the caller and then returns. */
4845 ix86_file_end (void)
4850 for (regno
= 0; regno
< 8; ++regno
)
4854 if (! ((pic_labels_used
>> regno
) & 1))
4857 get_pc_thunk_name (name
, regno
);
4859 if (USE_HIDDEN_LINKONCE
)
4863 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4865 TREE_PUBLIC (decl
) = 1;
4866 TREE_STATIC (decl
) = 1;
4867 DECL_ONE_ONLY (decl
) = 1;
4869 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4870 named_section (decl
, NULL
, 0);
4872 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4873 fputs ("\t.hidden\t", asm_out_file
);
4874 assemble_name (asm_out_file
, name
);
4875 fputc ('\n', asm_out_file
);
4876 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4881 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4884 xops
[0] = gen_rtx_REG (SImode
, regno
);
4885 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4886 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4887 output_asm_insn ("ret", xops
);
4890 if (NEED_INDICATE_EXEC_STACK
)
4891 file_end_indicate_exec_stack ();
4894 /* Emit code for the SET_GOT patterns. */
4897 output_set_got (rtx dest
)
4902 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4904 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4906 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4909 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4911 output_asm_insn ("call\t%a2", xops
);
4914 /* Output the "canonical" label name ("Lxx$pb") here too. This
4915 is what will be referred to by the Mach-O PIC subsystem. */
4916 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4918 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4919 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4922 output_asm_insn ("pop{l}\t%0", xops
);
4927 get_pc_thunk_name (name
, REGNO (dest
));
4928 pic_labels_used
|= 1 << REGNO (dest
);
4930 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4931 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4932 output_asm_insn ("call\t%X2", xops
);
4935 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4936 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4937 else if (!TARGET_MACHO
)
4938 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4943 /* Generate an "push" pattern for input ARG. */
4948 return gen_rtx_SET (VOIDmode
,
4950 gen_rtx_PRE_DEC (Pmode
,
4951 stack_pointer_rtx
)),
4955 /* Return >= 0 if there is an unused call-clobbered register available
4956 for the entire function. */
4959 ix86_select_alt_pic_regnum (void)
4961 if (current_function_is_leaf
&& !current_function_profile
)
4964 for (i
= 2; i
>= 0; --i
)
4965 if (!regs_ever_live
[i
])
4969 return INVALID_REGNUM
;
4972 /* Return 1 if we need to save REGNO. */
4974 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4976 if (pic_offset_table_rtx
4977 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4978 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4979 || current_function_profile
4980 || current_function_calls_eh_return
4981 || current_function_uses_const_pool
))
4983 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4988 if (current_function_calls_eh_return
&& maybe_eh_return
)
4993 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4994 if (test
== INVALID_REGNUM
)
5001 return (regs_ever_live
[regno
]
5002 && !call_used_regs
[regno
]
5003 && !fixed_regs
[regno
]
5004 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5007 /* Return number of registers to be saved on the stack. */
5010 ix86_nsaved_regs (void)
5015 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5016 if (ix86_save_reg (regno
, true))
5021 /* Return the offset between two registers, one to be eliminated, and the other
5022 its replacement, at the start of a routine. */
5025 ix86_initial_elimination_offset (int from
, int to
)
5027 struct ix86_frame frame
;
5028 ix86_compute_frame_layout (&frame
);
5030 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5031 return frame
.hard_frame_pointer_offset
;
5032 else if (from
== FRAME_POINTER_REGNUM
5033 && to
== HARD_FRAME_POINTER_REGNUM
)
5034 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5037 if (to
!= STACK_POINTER_REGNUM
)
5039 else if (from
== ARG_POINTER_REGNUM
)
5040 return frame
.stack_pointer_offset
;
5041 else if (from
!= FRAME_POINTER_REGNUM
)
5044 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5048 /* Fill structure ix86_frame about frame of currently computed function. */
5051 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5053 HOST_WIDE_INT total_size
;
5054 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5055 HOST_WIDE_INT offset
;
5056 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5057 HOST_WIDE_INT size
= get_frame_size ();
5059 frame
->nregs
= ix86_nsaved_regs ();
5062 /* During reload iteration the amount of registers saved can change.
5063 Recompute the value as needed. Do not recompute when amount of registers
5064 didn't change as reload does mutiple calls to the function and does not
5065 expect the decision to change within single iteration. */
5067 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5069 int count
= frame
->nregs
;
5071 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5072 /* The fast prologue uses move instead of push to save registers. This
5073 is significantly longer, but also executes faster as modern hardware
5074 can execute the moves in parallel, but can't do that for push/pop.
5076 Be careful about choosing what prologue to emit: When function takes
5077 many instructions to execute we may use slow version as well as in
5078 case function is known to be outside hot spot (this is known with
5079 feedback only). Weight the size of function by number of registers
5080 to save as it is cheap to use one or two push instructions but very
5081 slow to use many of them. */
5083 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5084 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5085 || (flag_branch_probabilities
5086 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5087 cfun
->machine
->use_fast_prologue_epilogue
= false;
5089 cfun
->machine
->use_fast_prologue_epilogue
5090 = !expensive_function_p (count
);
5092 if (TARGET_PROLOGUE_USING_MOVE
5093 && cfun
->machine
->use_fast_prologue_epilogue
)
5094 frame
->save_regs_using_mov
= true;
5096 frame
->save_regs_using_mov
= false;
5099 /* Skip return address and saved base pointer. */
5100 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5102 frame
->hard_frame_pointer_offset
= offset
;
5104 /* Do some sanity checking of stack_alignment_needed and
5105 preferred_alignment, since i386 port is the only using those features
5106 that may break easily. */
5108 if (size
&& !stack_alignment_needed
)
5110 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5112 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5114 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5117 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5118 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5120 /* Register save area */
5121 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5124 if (ix86_save_varrargs_registers
)
5126 offset
+= X86_64_VARARGS_SIZE
;
5127 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5130 frame
->va_arg_size
= 0;
5132 /* Align start of frame for local function. */
5133 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5134 & -stack_alignment_needed
) - offset
;
5136 offset
+= frame
->padding1
;
5138 /* Frame pointer points here. */
5139 frame
->frame_pointer_offset
= offset
;
5143 /* Add outgoing arguments area. Can be skipped if we eliminated
5144 all the function calls as dead code.
5145 Skipping is however impossible when function calls alloca. Alloca
5146 expander assumes that last current_function_outgoing_args_size
5147 of stack frame are unused. */
5148 if (ACCUMULATE_OUTGOING_ARGS
5149 && (!current_function_is_leaf
|| current_function_calls_alloca
))
5151 offset
+= current_function_outgoing_args_size
;
5152 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5155 frame
->outgoing_arguments_size
= 0;
5157 /* Align stack boundary. Only needed if we're calling another function
5159 if (!current_function_is_leaf
|| current_function_calls_alloca
)
5160 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5161 & -preferred_alignment
) - offset
;
5163 frame
->padding2
= 0;
5165 offset
+= frame
->padding2
;
5167 /* We've reached end of stack frame. */
5168 frame
->stack_pointer_offset
= offset
;
5170 /* Size prologue needs to allocate. */
5171 frame
->to_allocate
=
5172 (size
+ frame
->padding1
+ frame
->padding2
5173 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5175 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5176 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5177 frame
->save_regs_using_mov
= false;
5179 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5180 && current_function_is_leaf
)
5182 frame
->red_zone_size
= frame
->to_allocate
;
5183 if (frame
->save_regs_using_mov
)
5184 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5185 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5186 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5189 frame
->red_zone_size
= 0;
5190 frame
->to_allocate
-= frame
->red_zone_size
;
5191 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5193 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5194 fprintf (stderr
, "size: %i\n", size
);
5195 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5196 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5197 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5198 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5199 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5200 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5201 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5202 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5203 frame
->hard_frame_pointer_offset
);
5204 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5208 /* Emit code to save registers in the prologue. */
5211 ix86_emit_save_regs (void)
5216 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5217 if (ix86_save_reg (regno
, true))
5219 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5220 RTX_FRAME_RELATED_P (insn
) = 1;
5224 /* Emit code to save registers using MOV insns. First register
5225 is restored from POINTER + OFFSET. */
5227 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5232 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5233 if (ix86_save_reg (regno
, true))
5235 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5237 gen_rtx_REG (Pmode
, regno
));
5238 RTX_FRAME_RELATED_P (insn
) = 1;
5239 offset
+= UNITS_PER_WORD
;
5243 /* Expand prologue or epilogue stack adjustment.
5244 The pattern exist to put a dependency on all ebp-based memory accesses.
5245 STYLE should be negative if instructions should be marked as frame related,
5246 zero if %r11 register is live and cannot be freely used and positive
5250 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5255 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5256 else if (x86_64_immediate_operand (offset
, DImode
))
5257 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5261 /* r11 is used by indirect sibcall return as well, set before the
5262 epilogue and used after the epilogue. ATM indirect sibcall
5263 shouldn't be used together with huge frame sizes in one
5264 function because of the frame_size check in sibcall.c. */
5267 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5268 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5270 RTX_FRAME_RELATED_P (insn
) = 1;
5271 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5275 RTX_FRAME_RELATED_P (insn
) = 1;
5278 /* Expand the prologue into a bunch of separate insns. */
5281 ix86_expand_prologue (void)
5285 struct ix86_frame frame
;
5286 HOST_WIDE_INT allocate
;
5288 ix86_compute_frame_layout (&frame
);
5290 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5291 slower on all targets. Also sdb doesn't like it. */
5293 if (frame_pointer_needed
)
5295 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5296 RTX_FRAME_RELATED_P (insn
) = 1;
5298 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5299 RTX_FRAME_RELATED_P (insn
) = 1;
5302 allocate
= frame
.to_allocate
;
5304 if (!frame
.save_regs_using_mov
)
5305 ix86_emit_save_regs ();
5307 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5309 /* When using red zone we may start register saving before allocating
5310 the stack frame saving one cycle of the prologue. */
5311 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5312 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5313 : stack_pointer_rtx
,
5314 -frame
.nregs
* UNITS_PER_WORD
);
5318 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5319 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5320 GEN_INT (-allocate
), -1);
5323 /* Only valid for Win32. */
5324 rtx eax
= gen_rtx_REG (SImode
, 0);
5325 bool eax_live
= ix86_eax_live_at_start_p ();
5332 emit_insn (gen_push (eax
));
5336 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
5337 RTX_FRAME_RELATED_P (insn
) = 1;
5339 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5340 RTX_FRAME_RELATED_P (insn
) = 1;
5344 rtx t
= plus_constant (stack_pointer_rtx
, allocate
);
5345 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5349 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5351 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5352 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5354 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5355 -frame
.nregs
* UNITS_PER_WORD
);
5358 pic_reg_used
= false;
5359 if (pic_offset_table_rtx
5360 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5361 || current_function_profile
))
5363 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5365 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5366 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5368 pic_reg_used
= true;
5373 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5375 /* Even with accurate pre-reload life analysis, we can wind up
5376 deleting all references to the pic register after reload.
5377 Consider if cross-jumping unifies two sides of a branch
5378 controlled by a comparison vs the only read from a global.
5379 In which case, allow the set_got to be deleted, though we're
5380 too late to do anything about the ebx save in the prologue. */
5381 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5384 /* Prevent function calls from be scheduled before the call to mcount.
5385 In the pic_reg_used case, make sure that the got load isn't deleted. */
5386 if (current_function_profile
)
5387 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5390 /* Emit code to restore saved registers using MOV insns. First register
5391 is restored from POINTER + OFFSET. */
5393 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5394 int maybe_eh_return
)
5397 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5399 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5400 if (ix86_save_reg (regno
, maybe_eh_return
))
5402 /* Ensure that adjust_address won't be forced to produce pointer
5403 out of range allowed by x86-64 instruction set. */
5404 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5408 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5409 emit_move_insn (r11
, GEN_INT (offset
));
5410 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5411 base_address
= gen_rtx_MEM (Pmode
, r11
);
5414 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5415 adjust_address (base_address
, Pmode
, offset
));
5416 offset
+= UNITS_PER_WORD
;
5420 /* Restore function stack, frame, and registers. */
5423 ix86_expand_epilogue (int style
)
5426 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5427 struct ix86_frame frame
;
5428 HOST_WIDE_INT offset
;
5430 ix86_compute_frame_layout (&frame
);
5432 /* Calculate start of saved registers relative to ebp. Special care
5433 must be taken for the normal return case of a function using
5434 eh_return: the eax and edx registers are marked as saved, but not
5435 restored along this path. */
5436 offset
= frame
.nregs
;
5437 if (current_function_calls_eh_return
&& style
!= 2)
5439 offset
*= -UNITS_PER_WORD
;
5441 /* If we're only restoring one register and sp is not valid then
5442 using a move instruction to restore the register since it's
5443 less work than reloading sp and popping the register.
5445 The default code result in stack adjustment using add/lea instruction,
5446 while this code results in LEAVE instruction (or discrete equivalent),
5447 so it is profitable in some other cases as well. Especially when there
5448 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5449 and there is exactly one register to pop. This heuristic may need some
5450 tuning in future. */
5451 if ((!sp_valid
&& frame
.nregs
<= 1)
5452 || (TARGET_EPILOGUE_USING_MOVE
5453 && cfun
->machine
->use_fast_prologue_epilogue
5454 && (frame
.nregs
> 1 || frame
.to_allocate
))
5455 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5456 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5457 && cfun
->machine
->use_fast_prologue_epilogue
5458 && frame
.nregs
== 1)
5459 || current_function_calls_eh_return
)
5461 /* Restore registers. We can use ebp or esp to address the memory
5462 locations. If both are available, default to ebp, since offsets
5463 are known to be small. Only exception is esp pointing directly to the
5464 end of block of saved registers, where we may simplify addressing
5467 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5468 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5469 frame
.to_allocate
, style
== 2);
5471 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5472 offset
, style
== 2);
5474 /* eh_return epilogues need %ecx added to the stack pointer. */
5477 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5479 if (frame_pointer_needed
)
5481 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5482 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5483 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5485 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5486 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5488 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5493 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5494 tmp
= plus_constant (tmp
, (frame
.to_allocate
5495 + frame
.nregs
* UNITS_PER_WORD
));
5496 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5499 else if (!frame_pointer_needed
)
5500 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5501 GEN_INT (frame
.to_allocate
5502 + frame
.nregs
* UNITS_PER_WORD
),
5504 /* If not an i386, mov & pop is faster than "leave". */
5505 else if (TARGET_USE_LEAVE
|| optimize_size
5506 || !cfun
->machine
->use_fast_prologue_epilogue
)
5507 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5510 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5511 hard_frame_pointer_rtx
,
5514 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5516 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5521 /* First step is to deallocate the stack frame so that we can
5522 pop the registers. */
5525 if (!frame_pointer_needed
)
5527 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5528 hard_frame_pointer_rtx
,
5529 GEN_INT (offset
), style
);
5531 else if (frame
.to_allocate
)
5532 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5533 GEN_INT (frame
.to_allocate
), style
);
5535 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5536 if (ix86_save_reg (regno
, false))
5539 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5541 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5543 if (frame_pointer_needed
)
5545 /* Leave results in shorter dependency chains on CPUs that are
5546 able to grok it fast. */
5547 if (TARGET_USE_LEAVE
)
5548 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5549 else if (TARGET_64BIT
)
5550 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5552 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5556 /* Sibcall epilogues don't want a return instruction. */
5560 if (current_function_pops_args
&& current_function_args_size
)
5562 rtx popc
= GEN_INT (current_function_pops_args
);
5564 /* i386 can only pop 64K bytes. If asked to pop more, pop
5565 return address, do explicit add, and jump indirectly to the
5568 if (current_function_pops_args
>= 65536)
5570 rtx ecx
= gen_rtx_REG (SImode
, 2);
5572 /* There is no "pascal" calling convention in 64bit ABI. */
5576 emit_insn (gen_popsi1 (ecx
));
5577 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5578 emit_jump_insn (gen_return_indirect_internal (ecx
));
5581 emit_jump_insn (gen_return_pop_internal (popc
));
5584 emit_jump_insn (gen_return_internal ());
5587 /* Reset from the function's potential modifications. */
5590 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5591 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5593 if (pic_offset_table_rtx
)
5594 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5597 /* Extract the parts of an RTL expression that is a valid memory address
5598 for an instruction. Return 0 if the structure of the address is
5599 grossly off. Return -1 if the address contains ASHIFT, so it is not
5600 strictly valid, but still used for computing length of lea instruction. */
5603 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5605 rtx base
= NULL_RTX
;
5606 rtx index
= NULL_RTX
;
5607 rtx disp
= NULL_RTX
;
5608 HOST_WIDE_INT scale
= 1;
5609 rtx scale_rtx
= NULL_RTX
;
5611 enum ix86_address_seg seg
= SEG_DEFAULT
;
5613 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5615 else if (GET_CODE (addr
) == PLUS
)
5625 addends
[n
++] = XEXP (op
, 1);
5628 while (GET_CODE (op
) == PLUS
);
5633 for (i
= n
; i
>= 0; --i
)
5636 switch (GET_CODE (op
))
5641 index
= XEXP (op
, 0);
5642 scale_rtx
= XEXP (op
, 1);
5646 if (XINT (op
, 1) == UNSPEC_TP
5647 && TARGET_TLS_DIRECT_SEG_REFS
5648 && seg
== SEG_DEFAULT
)
5649 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5678 else if (GET_CODE (addr
) == MULT
)
5680 index
= XEXP (addr
, 0); /* index*scale */
5681 scale_rtx
= XEXP (addr
, 1);
5683 else if (GET_CODE (addr
) == ASHIFT
)
5687 /* We're called for lea too, which implements ashift on occasion. */
5688 index
= XEXP (addr
, 0);
5689 tmp
= XEXP (addr
, 1);
5690 if (GET_CODE (tmp
) != CONST_INT
)
5692 scale
= INTVAL (tmp
);
5693 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5699 disp
= addr
; /* displacement */
5701 /* Extract the integral value of scale. */
5704 if (GET_CODE (scale_rtx
) != CONST_INT
)
5706 scale
= INTVAL (scale_rtx
);
5709 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5710 if (base
&& index
&& scale
== 1
5711 && (index
== arg_pointer_rtx
5712 || index
== frame_pointer_rtx
5713 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
5720 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5721 if ((base
== hard_frame_pointer_rtx
5722 || base
== frame_pointer_rtx
5723 || base
== arg_pointer_rtx
) && !disp
)
5726 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5727 Avoid this by transforming to [%esi+0]. */
5728 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5729 && base
&& !index
&& !disp
5731 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
5734 /* Special case: encode reg+reg instead of reg*2. */
5735 if (!base
&& index
&& scale
&& scale
== 2)
5736 base
= index
, scale
= 1;
5738 /* Special case: scaling cannot be encoded without base or displacement. */
5739 if (!base
&& !disp
&& index
&& scale
!= 1)
5751 /* Return cost of the memory address x.
5752 For i386, it is better to use a complex address than let gcc copy
5753 the address into a reg and make a new pseudo. But not if the address
5754 requires to two regs - that would mean more pseudos with longer
5757 ix86_address_cost (rtx x
)
5759 struct ix86_address parts
;
5762 if (!ix86_decompose_address (x
, &parts
))
5765 /* More complex memory references are better. */
5766 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5768 if (parts
.seg
!= SEG_DEFAULT
)
5771 /* Attempt to minimize number of registers in the address. */
5773 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5775 && (!REG_P (parts
.index
)
5776 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5780 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5782 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5783 && parts
.base
!= parts
.index
)
5786 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5787 since it's predecode logic can't detect the length of instructions
5788 and it degenerates to vector decoded. Increase cost of such
5789 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5790 to split such addresses or even refuse such addresses at all.
5792 Following addressing modes are affected:
5797 The first and last case may be avoidable by explicitly coding the zero in
5798 memory address, but I don't have AMD-K6 machine handy to check this
5802 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5803 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5804 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5810 /* If X is a machine specific address (i.e. a symbol or label being
5811 referenced as a displacement from the GOT implemented using an
5812 UNSPEC), then return the base term. Otherwise return X. */
5815 ix86_find_base_term (rtx x
)
5821 if (GET_CODE (x
) != CONST
)
5824 if (GET_CODE (term
) == PLUS
5825 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5826 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5827 term
= XEXP (term
, 0);
5828 if (GET_CODE (term
) != UNSPEC
5829 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5832 term
= XVECEXP (term
, 0, 0);
5834 if (GET_CODE (term
) != SYMBOL_REF
5835 && GET_CODE (term
) != LABEL_REF
)
5841 term
= ix86_delegitimize_address (x
);
5843 if (GET_CODE (term
) != SYMBOL_REF
5844 && GET_CODE (term
) != LABEL_REF
)
5850 /* Determine if a given RTX is a valid constant. We already know this
5851 satisfies CONSTANT_P. */
5854 legitimate_constant_p (rtx x
)
5858 switch (GET_CODE (x
))
5861 /* TLS symbols are not constant. */
5862 if (tls_symbolic_operand (x
, Pmode
))
5867 inner
= XEXP (x
, 0);
5869 /* Offsets of TLS symbols are never valid.
5870 Discourage CSE from creating them. */
5871 if (GET_CODE (inner
) == PLUS
5872 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5875 if (GET_CODE (inner
) == PLUS
5876 || GET_CODE (inner
) == MINUS
)
5878 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
5880 inner
= XEXP (inner
, 0);
5883 /* Only some unspecs are valid as "constants". */
5884 if (GET_CODE (inner
) == UNSPEC
)
5885 switch (XINT (inner
, 1))
5889 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5891 return local_dynamic_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5901 /* Otherwise we handle everything else in the move patterns. */
5905 /* Determine if it's legal to put X into the constant pool. This
5906 is not possible for the address of thread-local symbols, which
5907 is checked above. */
5910 ix86_cannot_force_const_mem (rtx x
)
5912 return !legitimate_constant_p (x
);
5915 /* Determine if a given RTX is a valid constant address. */
5918 constant_address_p (rtx x
)
5920 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5923 /* Nonzero if the constant value X is a legitimate general operand
5924 when generating PIC code. It is given that flag_pic is on and
5925 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5928 legitimate_pic_operand_p (rtx x
)
5932 switch (GET_CODE (x
))
5935 inner
= XEXP (x
, 0);
5937 /* Only some unspecs are valid as "constants". */
5938 if (GET_CODE (inner
) == UNSPEC
)
5939 switch (XINT (inner
, 1))
5942 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5950 return legitimate_pic_address_disp_p (x
);
5957 /* Determine if a given CONST RTX is a valid memory displacement
5961 legitimate_pic_address_disp_p (rtx disp
)
5965 /* In 64bit mode we can allow direct addresses of symbols and labels
5966 when they are not dynamic symbols. */
5969 /* TLS references should always be enclosed in UNSPEC. */
5970 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5972 if (GET_CODE (disp
) == SYMBOL_REF
5973 && ix86_cmodel
== CM_SMALL_PIC
5974 && SYMBOL_REF_LOCAL_P (disp
))
5976 if (GET_CODE (disp
) == LABEL_REF
)
5978 if (GET_CODE (disp
) == CONST
5979 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
5981 rtx op0
= XEXP (XEXP (disp
, 0), 0);
5982 rtx op1
= XEXP (XEXP (disp
, 0), 1);
5984 /* TLS references should always be enclosed in UNSPEC. */
5985 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
5987 if (((GET_CODE (op0
) == SYMBOL_REF
5988 && ix86_cmodel
== CM_SMALL_PIC
5989 && SYMBOL_REF_LOCAL_P (op0
))
5990 || GET_CODE (op0
) == LABEL_REF
)
5991 && GET_CODE (op1
) == CONST_INT
5992 && INTVAL (op1
) < 16*1024*1024
5993 && INTVAL (op1
) >= -16*1024*1024)
5997 if (GET_CODE (disp
) != CONST
)
5999 disp
= XEXP (disp
, 0);
6003 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6004 of GOT tables. We should not need these anyway. */
6005 if (GET_CODE (disp
) != UNSPEC
6006 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
6009 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6010 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6016 if (GET_CODE (disp
) == PLUS
)
6018 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6020 disp
= XEXP (disp
, 0);
6024 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6025 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
6027 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6028 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6029 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6031 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6032 if (! strcmp (sym_name
, "<pic base>"))
6037 if (GET_CODE (disp
) != UNSPEC
)
6040 switch (XINT (disp
, 1))
6045 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6047 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6048 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6049 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6051 case UNSPEC_GOTTPOFF
:
6052 case UNSPEC_GOTNTPOFF
:
6053 case UNSPEC_INDNTPOFF
:
6056 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6058 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6060 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6066 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6067 memory address for an instruction. The MODE argument is the machine mode
6068 for the MEM expression that wants to use this address.
6070 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6071 convert common non-canonical forms to canonical form so that they will
6075 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6077 struct ix86_address parts
;
6078 rtx base
, index
, disp
;
6079 HOST_WIDE_INT scale
;
6080 const char *reason
= NULL
;
6081 rtx reason_rtx
= NULL_RTX
;
6083 if (TARGET_DEBUG_ADDR
)
6086 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6087 GET_MODE_NAME (mode
), strict
);
6091 if (ix86_decompose_address (addr
, &parts
) <= 0)
6093 reason
= "decomposition failed";
6098 index
= parts
.index
;
6100 scale
= parts
.scale
;
6102 /* Validate base register.
6104 Don't allow SUBREG's here, it can lead to spill failures when the base
6105 is one word out of a two word structure, which is represented internally
6112 if (GET_CODE (base
) != REG
)
6114 reason
= "base is not a register";
6118 if (GET_MODE (base
) != Pmode
)
6120 reason
= "base is not in Pmode";
6124 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
6125 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
6127 reason
= "base is not valid";
6132 /* Validate index register.
6134 Don't allow SUBREG's here, it can lead to spill failures when the index
6135 is one word out of a two word structure, which is represented internally
6142 if (GET_CODE (index
) != REG
)
6144 reason
= "index is not a register";
6148 if (GET_MODE (index
) != Pmode
)
6150 reason
= "index is not in Pmode";
6154 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
6155 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
6157 reason
= "index is not valid";
6162 /* Validate scale factor. */
6165 reason_rtx
= GEN_INT (scale
);
6168 reason
= "scale without index";
6172 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6174 reason
= "scale is not a valid multiplier";
6179 /* Validate displacement. */
6184 if (GET_CODE (disp
) == CONST
6185 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6186 switch (XINT (XEXP (disp
, 0), 1))
6190 case UNSPEC_GOTPCREL
:
6193 goto is_legitimate_pic
;
6195 case UNSPEC_GOTTPOFF
:
6196 case UNSPEC_GOTNTPOFF
:
6197 case UNSPEC_INDNTPOFF
:
6203 reason
= "invalid address unspec";
6207 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
6209 && !machopic_operand_p (disp
)
6214 if (TARGET_64BIT
&& (index
|| base
))
6216 /* foo@dtpoff(%rX) is ok. */
6217 if (GET_CODE (disp
) != CONST
6218 || GET_CODE (XEXP (disp
, 0)) != PLUS
6219 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6220 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6221 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6222 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6224 reason
= "non-constant pic memory reference";
6228 else if (! legitimate_pic_address_disp_p (disp
))
6230 reason
= "displacement is an invalid pic construct";
6234 /* This code used to verify that a symbolic pic displacement
6235 includes the pic_offset_table_rtx register.
6237 While this is good idea, unfortunately these constructs may
6238 be created by "adds using lea" optimization for incorrect
6247 This code is nonsensical, but results in addressing
6248 GOT table with pic_offset_table_rtx base. We can't
6249 just refuse it easily, since it gets matched by
6250 "addsi3" pattern, that later gets split to lea in the
6251 case output register differs from input. While this
6252 can be handled by separate addsi pattern for this case
6253 that never results in lea, this seems to be easier and
6254 correct fix for crash to disable this test. */
6256 else if (GET_CODE (disp
) != LABEL_REF
6257 && GET_CODE (disp
) != CONST_INT
6258 && (GET_CODE (disp
) != CONST
6259 || !legitimate_constant_p (disp
))
6260 && (GET_CODE (disp
) != SYMBOL_REF
6261 || !legitimate_constant_p (disp
)))
6263 reason
= "displacement is not constant";
6266 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
6268 reason
= "displacement is out of range";
6273 /* Everything looks valid. */
6274 if (TARGET_DEBUG_ADDR
)
6275 fprintf (stderr
, "Success.\n");
6279 if (TARGET_DEBUG_ADDR
)
6281 fprintf (stderr
, "Error: %s\n", reason
);
6282 debug_rtx (reason_rtx
);
6287 /* Return an unique alias set for the GOT. */
6289 static HOST_WIDE_INT
6290 ix86_GOT_alias_set (void)
6292 static HOST_WIDE_INT set
= -1;
6294 set
= new_alias_set ();
6298 /* Return a legitimate reference for ORIG (an address) using the
6299 register REG. If REG is 0, a new pseudo is generated.
6301 There are two types of references that must be handled:
6303 1. Global data references must load the address from the GOT, via
6304 the PIC reg. An insn is emitted to do this load, and the reg is
6307 2. Static data references, constant pool addresses, and code labels
6308 compute the address as an offset from the GOT, whose base is in
6309 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6310 differentiate them from global data objects. The returned
6311 address is the PIC reg + an unspec constant.
6313 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6314 reg also appears in the address. */
6317 legitimize_pic_address (rtx orig
, rtx reg
)
6325 reg
= gen_reg_rtx (Pmode
);
6326 /* Use the generic Mach-O PIC machinery. */
6327 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6330 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6332 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6334 /* This symbol may be referenced via a displacement from the PIC
6335 base address (@GOTOFF). */
6337 if (reload_in_progress
)
6338 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6339 if (GET_CODE (addr
) == CONST
)
6340 addr
= XEXP (addr
, 0);
6341 if (GET_CODE (addr
) == PLUS
)
6343 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6344 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6347 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6348 new = gen_rtx_CONST (Pmode
, new);
6349 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6353 emit_move_insn (reg
, new);
6357 else if (GET_CODE (addr
) == SYMBOL_REF
)
6361 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6362 new = gen_rtx_CONST (Pmode
, new);
6363 new = gen_rtx_MEM (Pmode
, new);
6364 RTX_UNCHANGING_P (new) = 1;
6365 set_mem_alias_set (new, ix86_GOT_alias_set ());
6368 reg
= gen_reg_rtx (Pmode
);
6369 /* Use directly gen_movsi, otherwise the address is loaded
6370 into register for CSE. We don't want to CSE this addresses,
6371 instead we CSE addresses from the GOT table, so skip this. */
6372 emit_insn (gen_movsi (reg
, new));
6377 /* This symbol must be referenced via a load from the
6378 Global Offset Table (@GOT). */
6380 if (reload_in_progress
)
6381 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6382 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6383 new = gen_rtx_CONST (Pmode
, new);
6384 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6385 new = gen_rtx_MEM (Pmode
, new);
6386 RTX_UNCHANGING_P (new) = 1;
6387 set_mem_alias_set (new, ix86_GOT_alias_set ());
6390 reg
= gen_reg_rtx (Pmode
);
6391 emit_move_insn (reg
, new);
6397 if (GET_CODE (addr
) == CONST
)
6399 addr
= XEXP (addr
, 0);
6401 /* We must match stuff we generate before. Assume the only
6402 unspecs that can get here are ours. Not that we could do
6403 anything with them anyway.... */
6404 if (GET_CODE (addr
) == UNSPEC
6405 || (GET_CODE (addr
) == PLUS
6406 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6408 if (GET_CODE (addr
) != PLUS
)
6411 if (GET_CODE (addr
) == PLUS
)
6413 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6415 /* Check first to see if this is a constant offset from a @GOTOFF
6416 symbol reference. */
6417 if (local_symbolic_operand (op0
, Pmode
)
6418 && GET_CODE (op1
) == CONST_INT
)
6422 if (reload_in_progress
)
6423 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6424 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6426 new = gen_rtx_PLUS (Pmode
, new, op1
);
6427 new = gen_rtx_CONST (Pmode
, new);
6428 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6432 emit_move_insn (reg
, new);
6438 if (INTVAL (op1
) < -16*1024*1024
6439 || INTVAL (op1
) >= 16*1024*1024)
6440 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
6445 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6446 new = legitimize_pic_address (XEXP (addr
, 1),
6447 base
== reg
? NULL_RTX
: reg
);
6449 if (GET_CODE (new) == CONST_INT
)
6450 new = plus_constant (base
, INTVAL (new));
6453 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6455 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6456 new = XEXP (new, 1);
6458 new = gen_rtx_PLUS (Pmode
, base
, new);
6466 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6469 get_thread_pointer (int to_reg
)
6473 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6477 reg
= gen_reg_rtx (Pmode
);
6478 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6479 insn
= emit_insn (insn
);
6484 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6485 false if we expect this to be used for a memory address and true if
6486 we expect to load the address into a register. */
6489 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6491 rtx dest
, base
, off
, pic
;
6496 case TLS_MODEL_GLOBAL_DYNAMIC
:
6497 dest
= gen_reg_rtx (Pmode
);
6500 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6503 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6504 insns
= get_insns ();
6507 emit_libcall_block (insns
, dest
, rax
, x
);
6510 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6513 case TLS_MODEL_LOCAL_DYNAMIC
:
6514 base
= gen_reg_rtx (Pmode
);
6517 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6520 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6521 insns
= get_insns ();
6524 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6525 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6526 emit_libcall_block (insns
, base
, rax
, note
);
6529 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6531 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6532 off
= gen_rtx_CONST (Pmode
, off
);
6534 return gen_rtx_PLUS (Pmode
, base
, off
);
6536 case TLS_MODEL_INITIAL_EXEC
:
6540 type
= UNSPEC_GOTNTPOFF
;
6544 if (reload_in_progress
)
6545 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6546 pic
= pic_offset_table_rtx
;
6547 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6549 else if (!TARGET_GNU_TLS
)
6551 pic
= gen_reg_rtx (Pmode
);
6552 emit_insn (gen_set_got (pic
));
6553 type
= UNSPEC_GOTTPOFF
;
6558 type
= UNSPEC_INDNTPOFF
;
6561 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6562 off
= gen_rtx_CONST (Pmode
, off
);
6564 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6565 off
= gen_rtx_MEM (Pmode
, off
);
6566 RTX_UNCHANGING_P (off
) = 1;
6567 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6569 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6571 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6572 off
= force_reg (Pmode
, off
);
6573 return gen_rtx_PLUS (Pmode
, base
, off
);
6577 base
= get_thread_pointer (true);
6578 dest
= gen_reg_rtx (Pmode
);
6579 emit_insn (gen_subsi3 (dest
, base
, off
));
6583 case TLS_MODEL_LOCAL_EXEC
:
6584 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6585 (TARGET_64BIT
|| TARGET_GNU_TLS
)
6586 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6587 off
= gen_rtx_CONST (Pmode
, off
);
6589 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6591 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6592 return gen_rtx_PLUS (Pmode
, base
, off
);
6596 base
= get_thread_pointer (true);
6597 dest
= gen_reg_rtx (Pmode
);
6598 emit_insn (gen_subsi3 (dest
, base
, off
));
6609 /* Try machine-dependent ways of modifying an illegitimate address
6610 to be legitimate. If we find one, return the new, valid address.
6611 This macro is used in only one place: `memory_address' in explow.c.
6613 OLDX is the address as it was before break_out_memory_refs was called.
6614 In some cases it is useful to look at this to decide what needs to be done.
6616 MODE and WIN are passed so that this macro can use
6617 GO_IF_LEGITIMATE_ADDRESS.
6619 It is always safe for this macro to do nothing. It exists to recognize
6620 opportunities to optimize the output.
6622 For the 80386, we handle X+REG by loading X into a register R and
6623 using R+REG. R will go in a general reg and indexing will be used.
6624 However, if REG is a broken-out memory address or multiplication,
6625 nothing needs to be done because REG can certainly go in a general reg.
6627 When -fpic is used, special handling is needed for symbolic references.
6628 See comments by legitimize_pic_address in i386.c for details. */
6631 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6636 if (TARGET_DEBUG_ADDR
)
6638 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6639 GET_MODE_NAME (mode
));
6643 log
= tls_symbolic_operand (x
, mode
);
6645 return legitimize_tls_address (x
, log
, false);
6646 if (GET_CODE (x
) == CONST
6647 && GET_CODE (XEXP (x
, 0)) == PLUS
6648 && (log
= tls_symbolic_operand (XEXP (XEXP (x
, 0), 0), Pmode
)))
6650 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
6651 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
6654 if (flag_pic
&& SYMBOLIC_CONST (x
))
6655 return legitimize_pic_address (x
, 0);
6657 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6658 if (GET_CODE (x
) == ASHIFT
6659 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6660 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
6663 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6664 GEN_INT (1 << log
));
6667 if (GET_CODE (x
) == PLUS
)
6669 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6671 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6672 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6673 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
6676 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6677 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6678 GEN_INT (1 << log
));
6681 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6682 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6683 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
6686 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6687 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6688 GEN_INT (1 << log
));
6691 /* Put multiply first if it isn't already. */
6692 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6694 rtx tmp
= XEXP (x
, 0);
6695 XEXP (x
, 0) = XEXP (x
, 1);
6700 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6701 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6702 created by virtual register instantiation, register elimination, and
6703 similar optimizations. */
6704 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6707 x
= gen_rtx_PLUS (Pmode
,
6708 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6709 XEXP (XEXP (x
, 1), 0)),
6710 XEXP (XEXP (x
, 1), 1));
6714 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6715 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6716 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6717 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6718 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6719 && CONSTANT_P (XEXP (x
, 1)))
6722 rtx other
= NULL_RTX
;
6724 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6726 constant
= XEXP (x
, 1);
6727 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6729 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6731 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6732 other
= XEXP (x
, 1);
6740 x
= gen_rtx_PLUS (Pmode
,
6741 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6742 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6743 plus_constant (other
, INTVAL (constant
)));
6747 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6750 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6753 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6756 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6759 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6763 && GET_CODE (XEXP (x
, 1)) == REG
6764 && GET_CODE (XEXP (x
, 0)) == REG
)
6767 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6770 x
= legitimize_pic_address (x
, 0);
6773 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6776 if (GET_CODE (XEXP (x
, 0)) == REG
)
6778 rtx temp
= gen_reg_rtx (Pmode
);
6779 rtx val
= force_operand (XEXP (x
, 1), temp
);
6781 emit_move_insn (temp
, val
);
6787 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6789 rtx temp
= gen_reg_rtx (Pmode
);
6790 rtx val
= force_operand (XEXP (x
, 0), temp
);
6792 emit_move_insn (temp
, val
);
6802 /* Print an integer constant expression in assembler syntax. Addition
6803 and subtraction are the only arithmetic that may appear in these
6804 expressions. FILE is the stdio stream to write to, X is the rtx, and
6805 CODE is the operand print code from the output string. */
6808 output_pic_addr_const (FILE *file
, rtx x
, int code
)
6812 switch (GET_CODE (x
))
6822 /* Mark the decl as referenced so that cgraph will output the function. */
6823 if (SYMBOL_REF_DECL (x
))
6824 mark_decl_referenced (SYMBOL_REF_DECL (x
));
6826 assemble_name (file
, XSTR (x
, 0));
6827 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
6828 fputs ("@PLT", file
);
6835 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6836 assemble_name (asm_out_file
, buf
);
6840 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6844 /* This used to output parentheses around the expression,
6845 but that does not work on the 386 (either ATT or BSD assembler). */
6846 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6850 if (GET_MODE (x
) == VOIDmode
)
6852 /* We can use %d if the number is <32 bits and positive. */
6853 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6854 fprintf (file
, "0x%lx%08lx",
6855 (unsigned long) CONST_DOUBLE_HIGH (x
),
6856 (unsigned long) CONST_DOUBLE_LOW (x
));
6858 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6861 /* We can't handle floating point constants;
6862 PRINT_OPERAND must handle them. */
6863 output_operand_lossage ("floating constant misused");
6867 /* Some assemblers need integer constants to appear first. */
6868 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6870 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6872 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6874 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6876 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6878 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6886 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6887 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6889 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6891 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6895 if (XVECLEN (x
, 0) != 1)
6897 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6898 switch (XINT (x
, 1))
6901 fputs ("@GOT", file
);
6904 fputs ("@GOTOFF", file
);
6906 case UNSPEC_GOTPCREL
:
6907 fputs ("@GOTPCREL(%rip)", file
);
6909 case UNSPEC_GOTTPOFF
:
6910 /* FIXME: This might be @TPOFF in Sun ld too. */
6911 fputs ("@GOTTPOFF", file
);
6914 fputs ("@TPOFF", file
);
6918 fputs ("@TPOFF", file
);
6920 fputs ("@NTPOFF", file
);
6923 fputs ("@DTPOFF", file
);
6925 case UNSPEC_GOTNTPOFF
:
6927 fputs ("@GOTTPOFF(%rip)", file
);
6929 fputs ("@GOTNTPOFF", file
);
6931 case UNSPEC_INDNTPOFF
:
6932 fputs ("@INDNTPOFF", file
);
6935 output_operand_lossage ("invalid UNSPEC as operand");
6941 output_operand_lossage ("invalid expression as operand");
6945 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6946 We need to handle our special PIC relocations. */
6949 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
6952 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6956 fprintf (file
, "%s", ASM_LONG
);
6959 output_pic_addr_const (file
, x
, '\0');
6961 output_addr_const (file
, x
);
6965 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6966 We need to emit DTP-relative relocations. */
6969 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
6971 fputs (ASM_LONG
, file
);
6972 output_addr_const (file
, x
);
6973 fputs ("@DTPOFF", file
);
6979 fputs (", 0", file
);
6986 /* In the name of slightly smaller debug output, and to cater to
6987 general assembler losage, recognize PIC+GOTOFF and turn it back
6988 into a direct symbol reference. */
6991 ix86_delegitimize_address (rtx orig_x
)
6995 if (GET_CODE (x
) == MEM
)
7000 if (GET_CODE (x
) != CONST
7001 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7002 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7003 || GET_CODE (orig_x
) != MEM
)
7005 return XVECEXP (XEXP (x
, 0), 0, 0);
7008 if (GET_CODE (x
) != PLUS
7009 || GET_CODE (XEXP (x
, 1)) != CONST
)
7012 if (GET_CODE (XEXP (x
, 0)) == REG
7013 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7014 /* %ebx + GOT/GOTOFF */
7016 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7018 /* %ebx + %reg * scale + GOT/GOTOFF */
7020 if (GET_CODE (XEXP (y
, 0)) == REG
7021 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7023 else if (GET_CODE (XEXP (y
, 1)) == REG
7024 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7028 if (GET_CODE (y
) != REG
7029 && GET_CODE (y
) != MULT
7030 && GET_CODE (y
) != ASHIFT
)
7036 x
= XEXP (XEXP (x
, 1), 0);
7037 if (GET_CODE (x
) == UNSPEC
7038 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7039 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7042 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
7043 return XVECEXP (x
, 0, 0);
7046 if (GET_CODE (x
) == PLUS
7047 && GET_CODE (XEXP (x
, 0)) == UNSPEC
7048 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7049 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7050 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
7051 && GET_CODE (orig_x
) != MEM
)))
7053 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
7055 return gen_rtx_PLUS (Pmode
, y
, x
);
7063 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7068 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7070 enum rtx_code second_code
, bypass_code
;
7071 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7072 if (bypass_code
!= NIL
|| second_code
!= NIL
)
7074 code
= ix86_fp_compare_code_to_integer (code
);
7078 code
= reverse_condition (code
);
7089 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
7094 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7095 Those same assemblers have the same but opposite losage on cmov. */
7098 suffix
= fp
? "nbe" : "a";
7101 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7103 else if (mode
== CCmode
|| mode
== CCGCmode
)
7114 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7116 else if (mode
== CCmode
|| mode
== CCGCmode
)
7125 suffix
= fp
? "nb" : "ae";
7128 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
7138 suffix
= fp
? "u" : "p";
7141 suffix
= fp
? "nu" : "np";
7146 fputs (suffix
, file
);
7149 /* Print the name of register X to FILE based on its machine mode and number.
7150 If CODE is 'w', pretend the mode is HImode.
7151 If CODE is 'b', pretend the mode is QImode.
7152 If CODE is 'k', pretend the mode is SImode.
7153 If CODE is 'q', pretend the mode is DImode.
7154 If CODE is 'h', pretend the reg is the `high' byte register.
7155 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7158 print_reg (rtx x
, int code
, FILE *file
)
7160 if (REGNO (x
) == ARG_POINTER_REGNUM
7161 || REGNO (x
) == FRAME_POINTER_REGNUM
7162 || REGNO (x
) == FLAGS_REG
7163 || REGNO (x
) == FPSR_REG
)
7166 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7169 if (code
== 'w' || MMX_REG_P (x
))
7171 else if (code
== 'b')
7173 else if (code
== 'k')
7175 else if (code
== 'q')
7177 else if (code
== 'y')
7179 else if (code
== 'h')
7182 code
= GET_MODE_SIZE (GET_MODE (x
));
7184 /* Irritatingly, AMD extended registers use different naming convention
7185 from the normal registers. */
7186 if (REX_INT_REG_P (x
))
7193 error ("extended registers have no high halves");
7196 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7199 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7202 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7205 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7208 error ("unsupported operand size for extended register");
7216 if (STACK_TOP_P (x
))
7218 fputs ("st(0)", file
);
7225 if (! ANY_FP_REG_P (x
))
7226 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7231 fputs (hi_reg_name
[REGNO (x
)], file
);
7234 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7236 fputs (qi_reg_name
[REGNO (x
)], file
);
7239 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7241 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7248 /* Locate some local-dynamic symbol still in use by this function
7249 so that we can print its name in some tls_local_dynamic_base
7253 get_some_local_dynamic_name (void)
7257 if (cfun
->machine
->some_ld_name
)
7258 return cfun
->machine
->some_ld_name
;
7260 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7262 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7263 return cfun
->machine
->some_ld_name
;
7269 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7273 if (GET_CODE (x
) == SYMBOL_REF
7274 && local_dynamic_symbolic_operand (x
, Pmode
))
7276 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7284 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7285 C -- print opcode suffix for set/cmov insn.
7286 c -- like C, but print reversed condition
7287 F,f -- likewise, but for floating-point.
7288 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7290 R -- print the prefix for register names.
7291 z -- print the opcode suffix for the size of the current operand.
7292 * -- print a star (in certain assembler syntax)
7293 A -- print an absolute memory reference.
7294 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7295 s -- print a shift double count, followed by the assemblers argument
7297 b -- print the QImode name of the register for the indicated operand.
7298 %b0 would print %al if operands[0] is reg 0.
7299 w -- likewise, print the HImode name of the register.
7300 k -- likewise, print the SImode name of the register.
7301 q -- likewise, print the DImode name of the register.
7302 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7303 y -- print "st(0)" instead of "st" as a register.
7304 D -- print condition for SSE cmp instruction.
7305 P -- if PIC, print an @PLT suffix.
7306 X -- don't print any sort of PIC '@' suffix for a symbol.
7307 & -- print some in-use local-dynamic symbol name.
7311 print_operand (FILE *file
, rtx x
, int code
)
7318 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7323 assemble_name (file
, get_some_local_dynamic_name ());
7327 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7329 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7331 /* Intel syntax. For absolute addresses, registers should not
7332 be surrounded by braces. */
7333 if (GET_CODE (x
) != REG
)
7336 PRINT_OPERAND (file
, x
, 0);
7344 PRINT_OPERAND (file
, x
, 0);
7349 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7354 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7359 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7364 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7369 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7374 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7379 /* 387 opcodes don't get size suffixes if the operands are
7381 if (STACK_REG_P (x
))
7384 /* Likewise if using Intel opcodes. */
7385 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7388 /* This is the size of op from size of operand. */
7389 switch (GET_MODE_SIZE (GET_MODE (x
)))
7392 #ifdef HAVE_GAS_FILDS_FISTS
7398 if (GET_MODE (x
) == SFmode
)
7413 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7415 #ifdef GAS_MNEMONICS
7441 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7443 PRINT_OPERAND (file
, x
, 0);
7449 /* Little bit of braindamage here. The SSE compare instructions
7450 does use completely different names for the comparisons that the
7451 fp conditional moves. */
7452 switch (GET_CODE (x
))
7467 fputs ("unord", file
);
7471 fputs ("neq", file
);
7475 fputs ("nlt", file
);
7479 fputs ("nle", file
);
7482 fputs ("ord", file
);
7490 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7491 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7493 switch (GET_MODE (x
))
7495 case HImode
: putc ('w', file
); break;
7497 case SFmode
: putc ('l', file
); break;
7499 case DFmode
: putc ('q', file
); break;
7507 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7510 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7511 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7514 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7517 /* Like above, but reverse condition */
7519 /* Check to see if argument to %c is really a constant
7520 and not a condition code which needs to be reversed. */
7521 if (!COMPARISON_P (x
))
7523 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7526 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7529 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7530 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7533 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7539 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7542 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7545 int pred_val
= INTVAL (XEXP (x
, 0));
7547 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7548 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7550 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7551 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7553 /* Emit hints only in the case default branch prediction
7554 heuristics would fail. */
7555 if (taken
!= cputaken
)
7557 /* We use 3e (DS) prefix for taken branches and
7558 2e (CS) prefix for not taken branches. */
7560 fputs ("ds ; ", file
);
7562 fputs ("cs ; ", file
);
7569 output_operand_lossage ("invalid operand code `%c'", code
);
7573 if (GET_CODE (x
) == REG
)
7574 print_reg (x
, code
, file
);
7576 else if (GET_CODE (x
) == MEM
)
7578 /* No `byte ptr' prefix for call instructions. */
7579 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7582 switch (GET_MODE_SIZE (GET_MODE (x
)))
7584 case 1: size
= "BYTE"; break;
7585 case 2: size
= "WORD"; break;
7586 case 4: size
= "DWORD"; break;
7587 case 8: size
= "QWORD"; break;
7588 case 12: size
= "XWORD"; break;
7589 case 16: size
= "XMMWORD"; break;
7594 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7597 else if (code
== 'w')
7599 else if (code
== 'k')
7603 fputs (" PTR ", file
);
7607 /* Avoid (%rip) for call operands. */
7608 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7609 && GET_CODE (x
) != CONST_INT
)
7610 output_addr_const (file
, x
);
7611 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7612 output_operand_lossage ("invalid constraints for operand");
7617 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7622 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7623 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7625 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7627 fprintf (file
, "0x%08lx", l
);
7630 /* These float cases don't actually occur as immediate operands. */
7631 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7635 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7636 fprintf (file
, "%s", dstr
);
7639 else if (GET_CODE (x
) == CONST_DOUBLE
7640 && GET_MODE (x
) == XFmode
)
7644 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7645 fprintf (file
, "%s", dstr
);
7652 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7654 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7657 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7658 || GET_CODE (x
) == LABEL_REF
)
7660 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7663 fputs ("OFFSET FLAT:", file
);
7666 if (GET_CODE (x
) == CONST_INT
)
7667 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7669 output_pic_addr_const (file
, x
, code
);
7671 output_addr_const (file
, x
);
7675 /* Print a memory operand whose address is ADDR. */
7678 print_operand_address (FILE *file
, rtx addr
)
7680 struct ix86_address parts
;
7681 rtx base
, index
, disp
;
7684 if (! ix86_decompose_address (addr
, &parts
))
7688 index
= parts
.index
;
7690 scale
= parts
.scale
;
7698 if (USER_LABEL_PREFIX
[0] == 0)
7700 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
7706 if (!base
&& !index
)
7708 /* Displacement only requires special attention. */
7710 if (GET_CODE (disp
) == CONST_INT
)
7712 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
7714 if (USER_LABEL_PREFIX
[0] == 0)
7716 fputs ("ds:", file
);
7718 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
7721 output_pic_addr_const (file
, disp
, 0);
7723 output_addr_const (file
, disp
);
7725 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7727 && ((GET_CODE (disp
) == SYMBOL_REF
7728 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
7729 || GET_CODE (disp
) == LABEL_REF
7730 || (GET_CODE (disp
) == CONST
7731 && GET_CODE (XEXP (disp
, 0)) == PLUS
7732 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
7733 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
7734 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
7735 fputs ("(%rip)", file
);
7739 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7744 output_pic_addr_const (file
, disp
, 0);
7745 else if (GET_CODE (disp
) == LABEL_REF
)
7746 output_asm_label (disp
);
7748 output_addr_const (file
, disp
);
7753 print_reg (base
, 0, file
);
7757 print_reg (index
, 0, file
);
7759 fprintf (file
, ",%d", scale
);
7765 rtx offset
= NULL_RTX
;
7769 /* Pull out the offset of a symbol; print any symbol itself. */
7770 if (GET_CODE (disp
) == CONST
7771 && GET_CODE (XEXP (disp
, 0)) == PLUS
7772 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7774 offset
= XEXP (XEXP (disp
, 0), 1);
7775 disp
= gen_rtx_CONST (VOIDmode
,
7776 XEXP (XEXP (disp
, 0), 0));
7780 output_pic_addr_const (file
, disp
, 0);
7781 else if (GET_CODE (disp
) == LABEL_REF
)
7782 output_asm_label (disp
);
7783 else if (GET_CODE (disp
) == CONST_INT
)
7786 output_addr_const (file
, disp
);
7792 print_reg (base
, 0, file
);
7795 if (INTVAL (offset
) >= 0)
7797 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7801 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7808 print_reg (index
, 0, file
);
7810 fprintf (file
, "*%d", scale
);
7818 output_addr_const_extra (FILE *file
, rtx x
)
7822 if (GET_CODE (x
) != UNSPEC
)
7825 op
= XVECEXP (x
, 0, 0);
7826 switch (XINT (x
, 1))
7828 case UNSPEC_GOTTPOFF
:
7829 output_addr_const (file
, op
);
7830 /* FIXME: This might be @TPOFF in Sun ld. */
7831 fputs ("@GOTTPOFF", file
);
7834 output_addr_const (file
, op
);
7835 fputs ("@TPOFF", file
);
7838 output_addr_const (file
, op
);
7840 fputs ("@TPOFF", file
);
7842 fputs ("@NTPOFF", file
);
7845 output_addr_const (file
, op
);
7846 fputs ("@DTPOFF", file
);
7848 case UNSPEC_GOTNTPOFF
:
7849 output_addr_const (file
, op
);
7851 fputs ("@GOTTPOFF(%rip)", file
);
7853 fputs ("@GOTNTPOFF", file
);
7855 case UNSPEC_INDNTPOFF
:
7856 output_addr_const (file
, op
);
7857 fputs ("@INDNTPOFF", file
);
7867 /* Split one or more DImode RTL references into pairs of SImode
7868 references. The RTL can be REG, offsettable MEM, integer constant, or
7869 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7870 split and "num" is its length. lo_half and hi_half are output arrays
7871 that parallel "operands". */
7874 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7878 rtx op
= operands
[num
];
7880 /* simplify_subreg refuse to split volatile memory addresses,
7881 but we still have to handle it. */
7882 if (GET_CODE (op
) == MEM
)
7884 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7885 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7889 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7890 GET_MODE (op
) == VOIDmode
7891 ? DImode
: GET_MODE (op
), 0);
7892 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7893 GET_MODE (op
) == VOIDmode
7894 ? DImode
: GET_MODE (op
), 4);
7898 /* Split one or more TImode RTL references into pairs of SImode
7899 references. The RTL can be REG, offsettable MEM, integer constant, or
7900 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7901 split and "num" is its length. lo_half and hi_half are output arrays
7902 that parallel "operands". */
7905 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7909 rtx op
= operands
[num
];
7911 /* simplify_subreg refuse to split volatile memory addresses, but we
7912 still have to handle it. */
7913 if (GET_CODE (op
) == MEM
)
7915 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7916 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7920 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7921 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7926 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7927 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7928 is the expression of the binary operation. The output may either be
7929 emitted here, or returned to the caller, like all output_* functions.
7931 There is no guarantee that the operands are the same mode, as they
7932 might be within FLOAT or FLOAT_EXTEND expressions. */
7934 #ifndef SYSV386_COMPAT
7935 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7936 wants to fix the assemblers because that causes incompatibility
7937 with gcc. No-one wants to fix gcc because that causes
7938 incompatibility with assemblers... You can use the option of
7939 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7940 #define SYSV386_COMPAT 1
7944 output_387_binary_op (rtx insn
, rtx
*operands
)
7946 static char buf
[30];
7949 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7951 #ifdef ENABLE_CHECKING
7952 /* Even if we do not want to check the inputs, this documents input
7953 constraints. Which helps in understanding the following code. */
7954 if (STACK_REG_P (operands
[0])
7955 && ((REG_P (operands
[1])
7956 && REGNO (operands
[0]) == REGNO (operands
[1])
7957 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7958 || (REG_P (operands
[2])
7959 && REGNO (operands
[0]) == REGNO (operands
[2])
7960 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7961 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7967 switch (GET_CODE (operands
[3]))
7970 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7971 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7979 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7980 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7988 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7989 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7997 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7998 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8012 if (GET_MODE (operands
[0]) == SFmode
)
8013 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8015 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8020 switch (GET_CODE (operands
[3]))
8024 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8026 rtx temp
= operands
[2];
8027 operands
[2] = operands
[1];
8031 /* know operands[0] == operands[1]. */
8033 if (GET_CODE (operands
[2]) == MEM
)
8039 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8041 if (STACK_TOP_P (operands
[0]))
8042 /* How is it that we are storing to a dead operand[2]?
8043 Well, presumably operands[1] is dead too. We can't
8044 store the result to st(0) as st(0) gets popped on this
8045 instruction. Instead store to operands[2] (which I
8046 think has to be st(1)). st(1) will be popped later.
8047 gcc <= 2.8.1 didn't have this check and generated
8048 assembly code that the Unixware assembler rejected. */
8049 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8051 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8055 if (STACK_TOP_P (operands
[0]))
8056 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8058 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8063 if (GET_CODE (operands
[1]) == MEM
)
8069 if (GET_CODE (operands
[2]) == MEM
)
8075 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8078 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8079 derived assemblers, confusingly reverse the direction of
8080 the operation for fsub{r} and fdiv{r} when the
8081 destination register is not st(0). The Intel assembler
8082 doesn't have this brain damage. Read !SYSV386_COMPAT to
8083 figure out what the hardware really does. */
8084 if (STACK_TOP_P (operands
[0]))
8085 p
= "{p\t%0, %2|rp\t%2, %0}";
8087 p
= "{rp\t%2, %0|p\t%0, %2}";
8089 if (STACK_TOP_P (operands
[0]))
8090 /* As above for fmul/fadd, we can't store to st(0). */
8091 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8093 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8098 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8101 if (STACK_TOP_P (operands
[0]))
8102 p
= "{rp\t%0, %1|p\t%1, %0}";
8104 p
= "{p\t%1, %0|rp\t%0, %1}";
8106 if (STACK_TOP_P (operands
[0]))
8107 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8109 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8114 if (STACK_TOP_P (operands
[0]))
8116 if (STACK_TOP_P (operands
[1]))
8117 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8119 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8122 else if (STACK_TOP_P (operands
[1]))
8125 p
= "{\t%1, %0|r\t%0, %1}";
8127 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8133 p
= "{r\t%2, %0|\t%0, %2}";
8135 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8148 /* Output code to initialize control word copies used by
8149 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8150 is set to control word rounding downwards. */
8152 emit_i387_cw_initialization (rtx normal
, rtx round_down
)
8154 rtx reg
= gen_reg_rtx (HImode
);
8156 emit_insn (gen_x86_fnstcw_1 (normal
));
8157 emit_move_insn (reg
, normal
);
8158 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
8160 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8162 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
8163 emit_move_insn (round_down
, reg
);
8166 /* Output code for INSN to convert a float to a signed int. OPERANDS
8167 are the insn operands. The output may be [HSD]Imode and the input
8168 operand may be [SDX]Fmode. */
8171 output_fix_trunc (rtx insn
, rtx
*operands
)
8173 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8174 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8176 /* Jump through a hoop or two for DImode, since the hardware has no
8177 non-popping instruction. We used to do this a different way, but
8178 that was somewhat fragile and broke with post-reload splitters. */
8179 if (dimode_p
&& !stack_top_dies
)
8180 output_asm_insn ("fld\t%y1", operands
);
8182 if (!STACK_TOP_P (operands
[1]))
8185 if (GET_CODE (operands
[0]) != MEM
)
8188 output_asm_insn ("fldcw\t%3", operands
);
8189 if (stack_top_dies
|| dimode_p
)
8190 output_asm_insn ("fistp%z0\t%0", operands
);
8192 output_asm_insn ("fist%z0\t%0", operands
);
8193 output_asm_insn ("fldcw\t%2", operands
);
8198 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8199 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8200 when fucom should be used. */
8203 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8206 rtx cmp_op0
= operands
[0];
8207 rtx cmp_op1
= operands
[1];
8208 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
8213 cmp_op1
= operands
[2];
8217 if (GET_MODE (operands
[0]) == SFmode
)
8219 return "ucomiss\t{%1, %0|%0, %1}";
8221 return "comiss\t{%1, %0|%0, %1}";
8224 return "ucomisd\t{%1, %0|%0, %1}";
8226 return "comisd\t{%1, %0|%0, %1}";
8229 if (! STACK_TOP_P (cmp_op0
))
8232 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8234 if (STACK_REG_P (cmp_op1
)
8236 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8237 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8239 /* If both the top of the 387 stack dies, and the other operand
8240 is also a stack register that dies, then this must be a
8241 `fcompp' float compare */
8245 /* There is no double popping fcomi variant. Fortunately,
8246 eflags is immune from the fstp's cc clobbering. */
8248 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8250 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8258 return "fucompp\n\tfnstsw\t%0";
8260 return "fcompp\n\tfnstsw\t%0";
8273 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8275 static const char * const alt
[24] =
8287 "fcomi\t{%y1, %0|%0, %y1}",
8288 "fcomip\t{%y1, %0|%0, %y1}",
8289 "fucomi\t{%y1, %0|%0, %y1}",
8290 "fucomip\t{%y1, %0|%0, %y1}",
8297 "fcom%z2\t%y2\n\tfnstsw\t%0",
8298 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8299 "fucom%z2\t%y2\n\tfnstsw\t%0",
8300 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8302 "ficom%z2\t%y2\n\tfnstsw\t%0",
8303 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8311 mask
= eflags_p
<< 3;
8312 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
8313 mask
|= unordered_p
<< 1;
8314 mask
|= stack_top_dies
;
8327 ix86_output_addr_vec_elt (FILE *file
, int value
)
8329 const char *directive
= ASM_LONG
;
8334 directive
= ASM_QUAD
;
8340 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8344 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8347 fprintf (file
, "%s%s%d-%s%d\n",
8348 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8349 else if (HAVE_AS_GOTOFF_IN_DATA
)
8350 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8352 else if (TARGET_MACHO
)
8354 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8355 machopic_output_function_base_name (file
);
8356 fprintf(file
, "\n");
8360 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8361 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8364 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8368 ix86_expand_clear (rtx dest
)
8372 /* We play register width games, which are only valid after reload. */
8373 if (!reload_completed
)
8376 /* Avoid HImode and its attendant prefix byte. */
8377 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8378 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8380 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8382 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8383 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8385 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8386 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8392 /* X is an unchanging MEM. If it is a constant pool reference, return
8393 the constant pool rtx, else NULL. */
8396 maybe_get_pool_constant (rtx x
)
8398 x
= ix86_delegitimize_address (XEXP (x
, 0));
8400 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8401 return get_pool_constant (x
);
8407 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8409 int strict
= (reload_in_progress
|| reload_completed
);
8411 enum tls_model model
;
8416 model
= tls_symbolic_operand (op1
, Pmode
);
8419 op1
= legitimize_tls_address (op1
, model
, true);
8420 op1
= force_operand (op1
, op0
);
8425 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8430 rtx temp
= ((reload_in_progress
8431 || ((op0
&& GET_CODE (op0
) == REG
)
8433 ? op0
: gen_reg_rtx (Pmode
));
8434 op1
= machopic_indirect_data_reference (op1
, temp
);
8435 op1
= machopic_legitimize_pic_address (op1
, mode
,
8436 temp
== op1
? 0 : temp
);
8438 else if (MACHOPIC_INDIRECT
)
8439 op1
= machopic_indirect_data_reference (op1
, 0);
8443 if (GET_CODE (op0
) == MEM
)
8444 op1
= force_reg (Pmode
, op1
);
8446 op1
= legitimize_address (op1
, op1
, Pmode
);
8447 #endif /* TARGET_MACHO */
8451 if (GET_CODE (op0
) == MEM
8452 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8453 || !push_operand (op0
, mode
))
8454 && GET_CODE (op1
) == MEM
)
8455 op1
= force_reg (mode
, op1
);
8457 if (push_operand (op0
, mode
)
8458 && ! general_no_elim_operand (op1
, mode
))
8459 op1
= copy_to_mode_reg (mode
, op1
);
8461 /* Force large constants in 64bit compilation into register
8462 to get them CSEed. */
8463 if (TARGET_64BIT
&& mode
== DImode
8464 && immediate_operand (op1
, mode
)
8465 && !x86_64_zero_extended_value (op1
)
8466 && !register_operand (op0
, mode
)
8467 && optimize
&& !reload_completed
&& !reload_in_progress
)
8468 op1
= copy_to_mode_reg (mode
, op1
);
8470 if (FLOAT_MODE_P (mode
))
8472 /* If we are loading a floating point constant to a register,
8473 force the value to memory now, since we'll get better code
8474 out the back end. */
8478 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8480 op1
= validize_mem (force_const_mem (mode
, op1
));
8481 if (!register_operand (op0
, mode
))
8483 rtx temp
= gen_reg_rtx (mode
);
8484 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8485 emit_move_insn (op0
, temp
);
8492 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8496 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
8498 /* Force constants other than zero into memory. We do not know how
8499 the instructions used to build constants modify the upper 64 bits
8500 of the register, once we have that information we may be able
8501 to handle some of them more efficiently. */
8502 if ((reload_in_progress
| reload_completed
) == 0
8503 && register_operand (operands
[0], mode
)
8504 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
8505 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
8507 /* Make operand1 a register if it isn't already. */
8509 && !register_operand (operands
[0], mode
)
8510 && !register_operand (operands
[1], mode
))
8512 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
8513 emit_move_insn (operands
[0], temp
);
8517 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
8520 /* Attempt to expand a binary operator. Make the expansion closer to the
8521 actual machine, then just general_operand, which will allow 3 separate
8522 memory references (one output, two input) in a single insn. */
8525 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
8528 int matching_memory
;
8529 rtx src1
, src2
, dst
, op
, clob
;
8535 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8536 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8537 && (rtx_equal_p (dst
, src2
)
8538 || immediate_operand (src1
, mode
)))
8545 /* If the destination is memory, and we do not have matching source
8546 operands, do things in registers. */
8547 matching_memory
= 0;
8548 if (GET_CODE (dst
) == MEM
)
8550 if (rtx_equal_p (dst
, src1
))
8551 matching_memory
= 1;
8552 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8553 && rtx_equal_p (dst
, src2
))
8554 matching_memory
= 2;
8556 dst
= gen_reg_rtx (mode
);
8559 /* Both source operands cannot be in memory. */
8560 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8562 if (matching_memory
!= 2)
8563 src2
= force_reg (mode
, src2
);
8565 src1
= force_reg (mode
, src1
);
8568 /* If the operation is not commutable, source 1 cannot be a constant
8569 or non-matching memory. */
8570 if ((CONSTANT_P (src1
)
8571 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8572 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8573 src1
= force_reg (mode
, src1
);
8575 /* If optimizing, copy to regs to improve CSE */
8576 if (optimize
&& ! no_new_pseudos
)
8578 if (GET_CODE (dst
) == MEM
)
8579 dst
= gen_reg_rtx (mode
);
8580 if (GET_CODE (src1
) == MEM
)
8581 src1
= force_reg (mode
, src1
);
8582 if (GET_CODE (src2
) == MEM
)
8583 src2
= force_reg (mode
, src2
);
8586 /* Emit the instruction. */
8588 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8589 if (reload_in_progress
)
8591 /* Reload doesn't know about the flags register, and doesn't know that
8592 it doesn't want to clobber it. We can only do this with PLUS. */
8599 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8600 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8603 /* Fix up the destination if needed. */
8604 if (dst
!= operands
[0])
8605 emit_move_insn (operands
[0], dst
);
8608 /* Return TRUE or FALSE depending on whether the binary operator meets the
8609 appropriate constraints. */
8612 ix86_binary_operator_ok (enum rtx_code code
,
8613 enum machine_mode mode ATTRIBUTE_UNUSED
,
8616 /* Both source operands cannot be in memory. */
8617 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8619 /* If the operation is not commutable, source 1 cannot be a constant. */
8620 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8622 /* If the destination is memory, we must have a matching source operand. */
8623 if (GET_CODE (operands
[0]) == MEM
8624 && ! (rtx_equal_p (operands
[0], operands
[1])
8625 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8626 && rtx_equal_p (operands
[0], operands
[2]))))
8628 /* If the operation is not commutable and the source 1 is memory, we must
8629 have a matching destination. */
8630 if (GET_CODE (operands
[1]) == MEM
8631 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
8632 && ! rtx_equal_p (operands
[0], operands
[1]))
8637 /* Attempt to expand a unary operator. Make the expansion closer to the
8638 actual machine, then just general_operand, which will allow 2 separate
8639 memory references (one output, one input) in a single insn. */
8642 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
8645 int matching_memory
;
8646 rtx src
, dst
, op
, clob
;
8651 /* If the destination is memory, and we do not have matching source
8652 operands, do things in registers. */
8653 matching_memory
= 0;
8654 if (GET_CODE (dst
) == MEM
)
8656 if (rtx_equal_p (dst
, src
))
8657 matching_memory
= 1;
8659 dst
= gen_reg_rtx (mode
);
8662 /* When source operand is memory, destination must match. */
8663 if (!matching_memory
&& GET_CODE (src
) == MEM
)
8664 src
= force_reg (mode
, src
);
8666 /* If optimizing, copy to regs to improve CSE */
8667 if (optimize
&& ! no_new_pseudos
)
8669 if (GET_CODE (dst
) == MEM
)
8670 dst
= gen_reg_rtx (mode
);
8671 if (GET_CODE (src
) == MEM
)
8672 src
= force_reg (mode
, src
);
8675 /* Emit the instruction. */
8677 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8678 if (reload_in_progress
|| code
== NOT
)
8680 /* Reload doesn't know about the flags register, and doesn't know that
8681 it doesn't want to clobber it. */
8688 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8689 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8692 /* Fix up the destination if needed. */
8693 if (dst
!= operands
[0])
8694 emit_move_insn (operands
[0], dst
);
8697 /* Return TRUE or FALSE depending on whether the unary operator meets the
8698 appropriate constraints. */
8701 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
8702 enum machine_mode mode ATTRIBUTE_UNUSED
,
8703 rtx operands
[2] ATTRIBUTE_UNUSED
)
8705 /* If one of operands is memory, source and destination must match. */
8706 if ((GET_CODE (operands
[0]) == MEM
8707 || GET_CODE (operands
[1]) == MEM
)
8708 && ! rtx_equal_p (operands
[0], operands
[1]))
8713 /* Return TRUE or FALSE depending on whether the first SET in INSN
8714 has source and destination with matching CC modes, and that the
8715 CC mode is at least as constrained as REQ_MODE. */
8718 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8721 enum machine_mode set_mode
;
8723 set
= PATTERN (insn
);
8724 if (GET_CODE (set
) == PARALLEL
)
8725 set
= XVECEXP (set
, 0, 0);
8726 if (GET_CODE (set
) != SET
)
8728 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8731 set_mode
= GET_MODE (SET_DEST (set
));
8735 if (req_mode
!= CCNOmode
8736 && (req_mode
!= CCmode
8737 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8741 if (req_mode
== CCGCmode
)
8745 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8749 if (req_mode
== CCZmode
)
8759 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8762 /* Generate insn patterns to do an integer compare of OPERANDS. */
8765 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8767 enum machine_mode cmpmode
;
8770 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8771 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8773 /* This is very simple, but making the interface the same as in the
8774 FP case makes the rest of the code easier. */
8775 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8776 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8778 /* Return the test that should be put into the flags user, i.e.
8779 the bcc, scc, or cmov instruction. */
8780 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8783 /* Figure out whether to use ordered or unordered fp comparisons.
8784 Return the appropriate mode to use. */
8787 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8789 /* ??? In order to make all comparisons reversible, we do all comparisons
8790 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8791 all forms trapping and nontrapping comparisons, we can make inequality
8792 comparisons trapping again, since it results in better code when using
8793 FCOM based compares. */
8794 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8798 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8800 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8801 return ix86_fp_compare_mode (code
);
8804 /* Only zero flag is needed. */
8806 case NE
: /* ZF!=0 */
8808 /* Codes needing carry flag. */
8809 case GEU
: /* CF=0 */
8810 case GTU
: /* CF=0 & ZF=0 */
8811 case LTU
: /* CF=1 */
8812 case LEU
: /* CF=1 | ZF=1 */
8814 /* Codes possibly doable only with sign flag when
8815 comparing against zero. */
8816 case GE
: /* SF=OF or SF=0 */
8817 case LT
: /* SF<>OF or SF=1 */
8818 if (op1
== const0_rtx
)
8821 /* For other cases Carry flag is not required. */
8823 /* Codes doable only with sign flag when comparing
8824 against zero, but we miss jump instruction for it
8825 so we need to use relational tests against overflow
8826 that thus needs to be zero. */
8827 case GT
: /* ZF=0 & SF=OF */
8828 case LE
: /* ZF=1 | SF<>OF */
8829 if (op1
== const0_rtx
)
8833 /* strcmp pattern do (use flags) and combine may ask us for proper
8842 /* Return the fixed registers used for condition codes. */
8845 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8852 /* If two condition code modes are compatible, return a condition code
8853 mode which is compatible with both. Otherwise, return
8856 static enum machine_mode
8857 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8862 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8865 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8866 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8894 /* These are only compatible with themselves, which we already
8900 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8903 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8905 enum rtx_code swapped_code
= swap_condition (code
);
8906 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8907 || (ix86_fp_comparison_cost (swapped_code
)
8908 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8911 /* Swap, force into registers, or otherwise massage the two operands
8912 to a fp comparison. The operands are updated in place; the new
8913 comparison code is returned. */
8915 static enum rtx_code
8916 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8918 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8919 rtx op0
= *pop0
, op1
= *pop1
;
8920 enum machine_mode op_mode
= GET_MODE (op0
);
8921 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8923 /* All of the unordered compare instructions only work on registers.
8924 The same is true of the XFmode compare instructions. The same is
8925 true of the fcomi compare instructions. */
8928 && (fpcmp_mode
== CCFPUmode
8929 || op_mode
== XFmode
8930 || ix86_use_fcomi_compare (code
)))
8932 op0
= force_reg (op_mode
, op0
);
8933 op1
= force_reg (op_mode
, op1
);
8937 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8938 things around if they appear profitable, otherwise force op0
8941 if (standard_80387_constant_p (op0
) == 0
8942 || (GET_CODE (op0
) == MEM
8943 && ! (standard_80387_constant_p (op1
) == 0
8944 || GET_CODE (op1
) == MEM
)))
8947 tmp
= op0
, op0
= op1
, op1
= tmp
;
8948 code
= swap_condition (code
);
8951 if (GET_CODE (op0
) != REG
)
8952 op0
= force_reg (op_mode
, op0
);
8954 if (CONSTANT_P (op1
))
8956 if (standard_80387_constant_p (op1
))
8957 op1
= force_reg (op_mode
, op1
);
8959 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8963 /* Try to rearrange the comparison to make it cheaper. */
8964 if (ix86_fp_comparison_cost (code
)
8965 > ix86_fp_comparison_cost (swap_condition (code
))
8966 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8969 tmp
= op0
, op0
= op1
, op1
= tmp
;
8970 code
= swap_condition (code
);
8971 if (GET_CODE (op0
) != REG
)
8972 op0
= force_reg (op_mode
, op0
);
8980 /* Convert comparison codes we use to represent FP comparison to integer
8981 code that will result in proper branch. Return UNKNOWN if no such code
8983 static enum rtx_code
8984 ix86_fp_compare_code_to_integer (enum rtx_code code
)
9013 /* Split comparison code CODE into comparisons we can do using branch
9014 instructions. BYPASS_CODE is comparison code for branch that will
9015 branch around FIRST_CODE and SECOND_CODE. If some of branches
9016 is not required, set value to NIL.
9017 We never require more than two branches. */
9019 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
9020 enum rtx_code
*first_code
,
9021 enum rtx_code
*second_code
)
9027 /* The fcomi comparison sets flags as follows:
9037 case GT
: /* GTU - CF=0 & ZF=0 */
9038 case GE
: /* GEU - CF=0 */
9039 case ORDERED
: /* PF=0 */
9040 case UNORDERED
: /* PF=1 */
9041 case UNEQ
: /* EQ - ZF=1 */
9042 case UNLT
: /* LTU - CF=1 */
9043 case UNLE
: /* LEU - CF=1 | ZF=1 */
9044 case LTGT
: /* EQ - ZF=0 */
9046 case LT
: /* LTU - CF=1 - fails on unordered */
9048 *bypass_code
= UNORDERED
;
9050 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
9052 *bypass_code
= UNORDERED
;
9054 case EQ
: /* EQ - ZF=1 - fails on unordered */
9056 *bypass_code
= UNORDERED
;
9058 case NE
: /* NE - ZF=0 - fails on unordered */
9060 *second_code
= UNORDERED
;
9062 case UNGE
: /* GEU - CF=0 - fails on unordered */
9064 *second_code
= UNORDERED
;
9066 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
9068 *second_code
= UNORDERED
;
9073 if (!TARGET_IEEE_FP
)
9080 /* Return cost of comparison done fcom + arithmetics operations on AX.
9081 All following functions do use number of instructions as a cost metrics.
9082 In future this should be tweaked to compute bytes for optimize_size and
9083 take into account performance of various instructions on various CPUs. */
9085 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
9087 if (!TARGET_IEEE_FP
)
9089 /* The cost of code output by ix86_expand_fp_compare. */
9117 /* Return cost of comparison done using fcomi operation.
9118 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9120 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
9122 enum rtx_code bypass_code
, first_code
, second_code
;
9123 /* Return arbitrarily high cost when instruction is not supported - this
9124 prevents gcc from using it. */
9127 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9128 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
9131 /* Return cost of comparison done using sahf operation.
9132 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9134 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
9136 enum rtx_code bypass_code
, first_code
, second_code
;
9137 /* Return arbitrarily high cost when instruction is not preferred - this
9138 avoids gcc from using it. */
9139 if (!TARGET_USE_SAHF
&& !optimize_size
)
9141 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9142 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
9145 /* Compute cost of the comparison done using any method.
9146 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9148 ix86_fp_comparison_cost (enum rtx_code code
)
9150 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
9153 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
9154 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
9156 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
9157 if (min
> sahf_cost
)
9159 if (min
> fcomi_cost
)
9164 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9167 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
9168 rtx
*second_test
, rtx
*bypass_test
)
9170 enum machine_mode fpcmp_mode
, intcmp_mode
;
9172 int cost
= ix86_fp_comparison_cost (code
);
9173 enum rtx_code bypass_code
, first_code
, second_code
;
9175 fpcmp_mode
= ix86_fp_compare_mode (code
);
9176 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
9179 *second_test
= NULL_RTX
;
9181 *bypass_test
= NULL_RTX
;
9183 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9185 /* Do fcomi/sahf based test when profitable. */
9186 if ((bypass_code
== NIL
|| bypass_test
)
9187 && (second_code
== NIL
|| second_test
)
9188 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
9192 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9193 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
9199 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9200 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9202 scratch
= gen_reg_rtx (HImode
);
9203 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9204 emit_insn (gen_x86_sahf_1 (scratch
));
9207 /* The FP codes work out to act like unsigned. */
9208 intcmp_mode
= fpcmp_mode
;
9210 if (bypass_code
!= NIL
)
9211 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
9212 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9214 if (second_code
!= NIL
)
9215 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
9216 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9221 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9222 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9223 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9225 scratch
= gen_reg_rtx (HImode
);
9226 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9228 /* In the unordered case, we have to check C2 for NaN's, which
9229 doesn't happen to work out to anything nice combination-wise.
9230 So do some bit twiddling on the value we've got in AH to come
9231 up with an appropriate set of condition codes. */
9233 intcmp_mode
= CCNOmode
;
9238 if (code
== GT
|| !TARGET_IEEE_FP
)
9240 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9245 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9246 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9247 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
9248 intcmp_mode
= CCmode
;
9254 if (code
== LT
&& TARGET_IEEE_FP
)
9256 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9257 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
9258 intcmp_mode
= CCmode
;
9263 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
9269 if (code
== GE
|| !TARGET_IEEE_FP
)
9271 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
9276 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9277 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9284 if (code
== LE
&& TARGET_IEEE_FP
)
9286 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9287 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9288 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9289 intcmp_mode
= CCmode
;
9294 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9300 if (code
== EQ
&& TARGET_IEEE_FP
)
9302 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9303 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9304 intcmp_mode
= CCmode
;
9309 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9316 if (code
== NE
&& TARGET_IEEE_FP
)
9318 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9319 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9325 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9331 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9335 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9344 /* Return the test that should be put into the flags user, i.e.
9345 the bcc, scc, or cmov instruction. */
9346 return gen_rtx_fmt_ee (code
, VOIDmode
,
9347 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9352 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
9355 op0
= ix86_compare_op0
;
9356 op1
= ix86_compare_op1
;
9359 *second_test
= NULL_RTX
;
9361 *bypass_test
= NULL_RTX
;
9363 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9364 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9365 second_test
, bypass_test
);
9367 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9372 /* Return true if the CODE will result in nontrivial jump sequence. */
9374 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
9376 enum rtx_code bypass_code
, first_code
, second_code
;
9379 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9380 return bypass_code
!= NIL
|| second_code
!= NIL
;
9384 ix86_expand_branch (enum rtx_code code
, rtx label
)
9388 switch (GET_MODE (ix86_compare_op0
))
9394 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9395 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9396 gen_rtx_LABEL_REF (VOIDmode
, label
),
9398 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9407 enum rtx_code bypass_code
, first_code
, second_code
;
9409 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9412 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9414 /* Check whether we will use the natural sequence with one jump. If
9415 so, we can expand jump early. Otherwise delay expansion by
9416 creating compound insn to not confuse optimizers. */
9417 if (bypass_code
== NIL
&& second_code
== NIL
9420 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9421 gen_rtx_LABEL_REF (VOIDmode
, label
),
9426 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9427 ix86_compare_op0
, ix86_compare_op1
);
9428 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9429 gen_rtx_LABEL_REF (VOIDmode
, label
),
9431 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9433 use_fcomi
= ix86_use_fcomi_compare (code
);
9434 vec
= rtvec_alloc (3 + !use_fcomi
);
9435 RTVEC_ELT (vec
, 0) = tmp
;
9437 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9439 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9442 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9444 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9452 /* Expand DImode branch into multiple compare+branch. */
9454 rtx lo
[2], hi
[2], label2
;
9455 enum rtx_code code1
, code2
, code3
;
9457 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9459 tmp
= ix86_compare_op0
;
9460 ix86_compare_op0
= ix86_compare_op1
;
9461 ix86_compare_op1
= tmp
;
9462 code
= swap_condition (code
);
9464 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9465 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9467 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9468 avoid two branches. This costs one extra insn, so disable when
9469 optimizing for size. */
9471 if ((code
== EQ
|| code
== NE
)
9473 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9478 if (hi
[1] != const0_rtx
)
9479 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9480 NULL_RTX
, 0, OPTAB_WIDEN
);
9483 if (lo
[1] != const0_rtx
)
9484 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9485 NULL_RTX
, 0, OPTAB_WIDEN
);
9487 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9488 NULL_RTX
, 0, OPTAB_WIDEN
);
9490 ix86_compare_op0
= tmp
;
9491 ix86_compare_op1
= const0_rtx
;
9492 ix86_expand_branch (code
, label
);
9496 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9497 op1 is a constant and the low word is zero, then we can just
9498 examine the high word. */
9500 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9503 case LT
: case LTU
: case GE
: case GEU
:
9504 ix86_compare_op0
= hi
[0];
9505 ix86_compare_op1
= hi
[1];
9506 ix86_expand_branch (code
, label
);
9512 /* Otherwise, we need two or three jumps. */
9514 label2
= gen_label_rtx ();
9517 code2
= swap_condition (code
);
9518 code3
= unsigned_condition (code
);
9522 case LT
: case GT
: case LTU
: case GTU
:
9525 case LE
: code1
= LT
; code2
= GT
; break;
9526 case GE
: code1
= GT
; code2
= LT
; break;
9527 case LEU
: code1
= LTU
; code2
= GTU
; break;
9528 case GEU
: code1
= GTU
; code2
= LTU
; break;
9530 case EQ
: code1
= NIL
; code2
= NE
; break;
9531 case NE
: code2
= NIL
; break;
9539 * if (hi(a) < hi(b)) goto true;
9540 * if (hi(a) > hi(b)) goto false;
9541 * if (lo(a) < lo(b)) goto true;
9545 ix86_compare_op0
= hi
[0];
9546 ix86_compare_op1
= hi
[1];
9549 ix86_expand_branch (code1
, label
);
9551 ix86_expand_branch (code2
, label2
);
9553 ix86_compare_op0
= lo
[0];
9554 ix86_compare_op1
= lo
[1];
9555 ix86_expand_branch (code3
, label
);
9558 emit_label (label2
);
9567 /* Split branch based on floating point condition. */
9569 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
9570 rtx target1
, rtx target2
, rtx tmp
)
9573 rtx label
= NULL_RTX
;
9575 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9578 if (target2
!= pc_rtx
)
9581 code
= reverse_condition_maybe_unordered (code
);
9586 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9587 tmp
, &second
, &bypass
);
9589 if (split_branch_probability
>= 0)
9591 /* Distribute the probabilities across the jumps.
9592 Assume the BYPASS and SECOND to be always test
9594 probability
= split_branch_probability
;
9596 /* Value of 1 is low enough to make no need for probability
9597 to be updated. Later we may run some experiments and see
9598 if unordered values are more frequent in practice. */
9600 bypass_probability
= 1;
9602 second_probability
= 1;
9604 if (bypass
!= NULL_RTX
)
9606 label
= gen_label_rtx ();
9607 i
= emit_jump_insn (gen_rtx_SET
9609 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9611 gen_rtx_LABEL_REF (VOIDmode
,
9614 if (bypass_probability
>= 0)
9616 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9617 GEN_INT (bypass_probability
),
9620 i
= emit_jump_insn (gen_rtx_SET
9622 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9623 condition
, target1
, target2
)));
9624 if (probability
>= 0)
9626 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9627 GEN_INT (probability
),
9629 if (second
!= NULL_RTX
)
9631 i
= emit_jump_insn (gen_rtx_SET
9633 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9635 if (second_probability
>= 0)
9637 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9638 GEN_INT (second_probability
),
9641 if (label
!= NULL_RTX
)
9646 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9648 rtx ret
, tmp
, tmpreg
, equiv
;
9649 rtx second_test
, bypass_test
;
9651 if (GET_MODE (ix86_compare_op0
) == DImode
9653 return 0; /* FAIL */
9655 if (GET_MODE (dest
) != QImode
)
9658 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9659 PUT_MODE (ret
, QImode
);
9664 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9665 if (bypass_test
|| second_test
)
9667 rtx test
= second_test
;
9669 rtx tmp2
= gen_reg_rtx (QImode
);
9676 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9678 PUT_MODE (test
, QImode
);
9679 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9682 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9684 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9687 /* Attach a REG_EQUAL note describing the comparison result. */
9688 equiv
= simplify_gen_relational (code
, QImode
,
9689 GET_MODE (ix86_compare_op0
),
9690 ix86_compare_op0
, ix86_compare_op1
);
9691 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9693 return 1; /* DONE */
9696 /* Expand comparison setting or clearing carry flag. Return true when
9697 successful and set pop for the operation. */
9699 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9701 enum machine_mode mode
=
9702 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9704 /* Do not handle DImode compares that go trought special path. Also we can't
9705 deal with FP compares yet. This is possible to add. */
9706 if ((mode
== DImode
&& !TARGET_64BIT
))
9708 if (FLOAT_MODE_P (mode
))
9710 rtx second_test
= NULL
, bypass_test
= NULL
;
9711 rtx compare_op
, compare_seq
;
9713 /* Shortcut: following common codes never translate into carry flag compares. */
9714 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9715 || code
== ORDERED
|| code
== UNORDERED
)
9718 /* These comparisons require zero flag; swap operands so they won't. */
9719 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9725 code
= swap_condition (code
);
9728 /* Try to expand the comparison and verify that we end up with carry flag
9729 based comparison. This is fails to be true only when we decide to expand
9730 comparison using arithmetic that is not too common scenario. */
9732 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9733 &second_test
, &bypass_test
);
9734 compare_seq
= get_insns ();
9737 if (second_test
|| bypass_test
)
9739 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9740 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9741 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9743 code
= GET_CODE (compare_op
);
9744 if (code
!= LTU
&& code
!= GEU
)
9746 emit_insn (compare_seq
);
9750 if (!INTEGRAL_MODE_P (mode
))
9758 /* Convert a==0 into (unsigned)a<1. */
9761 if (op1
!= const0_rtx
)
9764 code
= (code
== EQ
? LTU
: GEU
);
9767 /* Convert a>b into b<a or a>=b-1. */
9770 if (GET_CODE (op1
) == CONST_INT
)
9772 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9773 /* Bail out on overflow. We still can swap operands but that
9774 would force loading of the constant into register. */
9775 if (op1
== const0_rtx
9776 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9778 code
= (code
== GTU
? GEU
: LTU
);
9785 code
= (code
== GTU
? LTU
: GEU
);
9789 /* Convert a>=0 into (unsigned)a<0x80000000. */
9792 if (mode
== DImode
|| op1
!= const0_rtx
)
9794 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9795 code
= (code
== LT
? GEU
: LTU
);
9799 if (mode
== DImode
|| op1
!= constm1_rtx
)
9801 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9802 code
= (code
== LE
? GEU
: LTU
);
9808 /* Swapping operands may cause constant to appear as first operand. */
9809 if (!nonimmediate_operand (op0
, VOIDmode
))
9813 op0
= force_reg (mode
, op0
);
9815 ix86_compare_op0
= op0
;
9816 ix86_compare_op1
= op1
;
9817 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9818 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9824 ix86_expand_int_movcc (rtx operands
[])
9826 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9827 rtx compare_seq
, compare_op
;
9828 rtx second_test
, bypass_test
;
9829 enum machine_mode mode
= GET_MODE (operands
[0]);
9830 bool sign_bit_compare_p
= false;;
9833 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9834 compare_seq
= get_insns ();
9837 compare_code
= GET_CODE (compare_op
);
9839 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9840 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9841 sign_bit_compare_p
= true;
9843 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9844 HImode insns, we'd be swallowed in word prefix ops. */
9846 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9847 && (mode
!= DImode
|| TARGET_64BIT
)
9848 && GET_CODE (operands
[2]) == CONST_INT
9849 && GET_CODE (operands
[3]) == CONST_INT
)
9851 rtx out
= operands
[0];
9852 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9853 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9857 /* Sign bit compares are better done using shifts than we do by using
9859 if (sign_bit_compare_p
9860 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9861 ix86_compare_op1
, &compare_op
))
9863 /* Detect overlap between destination and compare sources. */
9866 if (!sign_bit_compare_p
)
9870 compare_code
= GET_CODE (compare_op
);
9872 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9873 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9876 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9879 /* To simplify rest of code, restrict to the GEU case. */
9880 if (compare_code
== LTU
)
9882 HOST_WIDE_INT tmp
= ct
;
9885 compare_code
= reverse_condition (compare_code
);
9886 code
= reverse_condition (code
);
9891 PUT_CODE (compare_op
,
9892 reverse_condition_maybe_unordered
9893 (GET_CODE (compare_op
)));
9895 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9899 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9900 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9901 tmp
= gen_reg_rtx (mode
);
9904 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9906 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9910 if (code
== GT
|| code
== GE
)
9911 code
= reverse_condition (code
);
9914 HOST_WIDE_INT tmp
= ct
;
9919 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9920 ix86_compare_op1
, VOIDmode
, 0, -1);
9933 tmp
= expand_simple_binop (mode
, PLUS
,
9935 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9946 tmp
= expand_simple_binop (mode
, IOR
,
9948 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9950 else if (diff
== -1 && ct
)
9960 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9962 tmp
= expand_simple_binop (mode
, PLUS
,
9963 copy_rtx (tmp
), GEN_INT (cf
),
9964 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9972 * andl cf - ct, dest
9982 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9985 tmp
= expand_simple_binop (mode
, AND
,
9987 gen_int_mode (cf
- ct
, mode
),
9988 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9990 tmp
= expand_simple_binop (mode
, PLUS
,
9991 copy_rtx (tmp
), GEN_INT (ct
),
9992 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9995 if (!rtx_equal_p (tmp
, out
))
9996 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9998 return 1; /* DONE */
10004 tmp
= ct
, ct
= cf
, cf
= tmp
;
10006 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10008 /* We may be reversing unordered compare to normal compare, that
10009 is not valid in general (we may convert non-trapping condition
10010 to trapping one), however on i386 we currently emit all
10011 comparisons unordered. */
10012 compare_code
= reverse_condition_maybe_unordered (compare_code
);
10013 code
= reverse_condition_maybe_unordered (code
);
10017 compare_code
= reverse_condition (compare_code
);
10018 code
= reverse_condition (code
);
10022 compare_code
= NIL
;
10023 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
10024 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
10026 if (ix86_compare_op1
== const0_rtx
10027 && (code
== LT
|| code
== GE
))
10028 compare_code
= code
;
10029 else if (ix86_compare_op1
== constm1_rtx
)
10033 else if (code
== GT
)
10038 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10039 if (compare_code
!= NIL
10040 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
10041 && (cf
== -1 || ct
== -1))
10043 /* If lea code below could be used, only optimize
10044 if it results in a 2 insn sequence. */
10046 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10047 || diff
== 3 || diff
== 5 || diff
== 9)
10048 || (compare_code
== LT
&& ct
== -1)
10049 || (compare_code
== GE
&& cf
== -1))
10052 * notl op1 (if necessary)
10060 code
= reverse_condition (code
);
10063 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10064 ix86_compare_op1
, VOIDmode
, 0, -1);
10066 out
= expand_simple_binop (mode
, IOR
,
10068 out
, 1, OPTAB_DIRECT
);
10069 if (out
!= operands
[0])
10070 emit_move_insn (operands
[0], out
);
10072 return 1; /* DONE */
10077 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
10078 || diff
== 3 || diff
== 5 || diff
== 9)
10079 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
10080 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
10086 * lea cf(dest*(ct-cf)),dest
10090 * This also catches the degenerate setcc-only case.
10096 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10097 ix86_compare_op1
, VOIDmode
, 0, 1);
10100 /* On x86_64 the lea instruction operates on Pmode, so we need
10101 to get arithmetics done in proper mode to match. */
10103 tmp
= copy_rtx (out
);
10107 out1
= copy_rtx (out
);
10108 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
10112 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
10118 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
10121 if (!rtx_equal_p (tmp
, out
))
10124 out
= force_operand (tmp
, copy_rtx (out
));
10126 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
10128 if (!rtx_equal_p (out
, operands
[0]))
10129 emit_move_insn (operands
[0], copy_rtx (out
));
10131 return 1; /* DONE */
10135 * General case: Jumpful:
10136 * xorl dest,dest cmpl op1, op2
10137 * cmpl op1, op2 movl ct, dest
10138 * setcc dest jcc 1f
10139 * decl dest movl cf, dest
10140 * andl (cf-ct),dest 1:
10143 * Size 20. Size 14.
10145 * This is reasonably steep, but branch mispredict costs are
10146 * high on modern cpus, so consider failing only if optimizing
10150 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10151 && BRANCH_COST
>= 2)
10157 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10158 /* We may be reversing unordered compare to normal compare,
10159 that is not valid in general (we may convert non-trapping
10160 condition to trapping one), however on i386 we currently
10161 emit all comparisons unordered. */
10162 code
= reverse_condition_maybe_unordered (code
);
10165 code
= reverse_condition (code
);
10166 if (compare_code
!= NIL
)
10167 compare_code
= reverse_condition (compare_code
);
10171 if (compare_code
!= NIL
)
10173 /* notl op1 (if needed)
10178 For x < 0 (resp. x <= -1) there will be no notl,
10179 so if possible swap the constants to get rid of the
10181 True/false will be -1/0 while code below (store flag
10182 followed by decrement) is 0/-1, so the constants need
10183 to be exchanged once more. */
10185 if (compare_code
== GE
|| !cf
)
10187 code
= reverse_condition (code
);
10192 HOST_WIDE_INT tmp
= cf
;
10197 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10198 ix86_compare_op1
, VOIDmode
, 0, -1);
10202 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10203 ix86_compare_op1
, VOIDmode
, 0, 1);
10205 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
10206 copy_rtx (out
), 1, OPTAB_DIRECT
);
10209 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
10210 gen_int_mode (cf
- ct
, mode
),
10211 copy_rtx (out
), 1, OPTAB_DIRECT
);
10213 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
10214 copy_rtx (out
), 1, OPTAB_DIRECT
);
10215 if (!rtx_equal_p (out
, operands
[0]))
10216 emit_move_insn (operands
[0], copy_rtx (out
));
10218 return 1; /* DONE */
10222 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10224 /* Try a few things more with specific constants and a variable. */
10227 rtx var
, orig_out
, out
, tmp
;
10229 if (BRANCH_COST
<= 2)
10230 return 0; /* FAIL */
10232 /* If one of the two operands is an interesting constant, load a
10233 constant with the above and mask it in with a logical operation. */
10235 if (GET_CODE (operands
[2]) == CONST_INT
)
10238 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
10239 operands
[3] = constm1_rtx
, op
= and_optab
;
10240 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
10241 operands
[3] = const0_rtx
, op
= ior_optab
;
10243 return 0; /* FAIL */
10245 else if (GET_CODE (operands
[3]) == CONST_INT
)
10248 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
10249 operands
[2] = constm1_rtx
, op
= and_optab
;
10250 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
10251 operands
[2] = const0_rtx
, op
= ior_optab
;
10253 return 0; /* FAIL */
10256 return 0; /* FAIL */
10258 orig_out
= operands
[0];
10259 tmp
= gen_reg_rtx (mode
);
10262 /* Recurse to get the constant loaded. */
10263 if (ix86_expand_int_movcc (operands
) == 0)
10264 return 0; /* FAIL */
10266 /* Mask in the interesting variable. */
10267 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
10269 if (!rtx_equal_p (out
, orig_out
))
10270 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
10272 return 1; /* DONE */
10276 * For comparison with above,
10286 if (! nonimmediate_operand (operands
[2], mode
))
10287 operands
[2] = force_reg (mode
, operands
[2]);
10288 if (! nonimmediate_operand (operands
[3], mode
))
10289 operands
[3] = force_reg (mode
, operands
[3]);
10291 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10293 rtx tmp
= gen_reg_rtx (mode
);
10294 emit_move_insn (tmp
, operands
[3]);
10297 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10299 rtx tmp
= gen_reg_rtx (mode
);
10300 emit_move_insn (tmp
, operands
[2]);
10304 if (! register_operand (operands
[2], VOIDmode
)
10306 || ! register_operand (operands
[3], VOIDmode
)))
10307 operands
[2] = force_reg (mode
, operands
[2]);
10310 && ! register_operand (operands
[3], VOIDmode
))
10311 operands
[3] = force_reg (mode
, operands
[3]);
10313 emit_insn (compare_seq
);
10314 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10315 gen_rtx_IF_THEN_ELSE (mode
,
10316 compare_op
, operands
[2],
10319 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10320 gen_rtx_IF_THEN_ELSE (mode
,
10322 copy_rtx (operands
[3]),
10323 copy_rtx (operands
[0]))));
10325 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10326 gen_rtx_IF_THEN_ELSE (mode
,
10328 copy_rtx (operands
[2]),
10329 copy_rtx (operands
[0]))));
10331 return 1; /* DONE */
10335 ix86_expand_fp_movcc (rtx operands
[])
10337 enum rtx_code code
;
10339 rtx compare_op
, second_test
, bypass_test
;
10341 /* For SF/DFmode conditional moves based on comparisons
10342 in same mode, we may want to use SSE min/max instructions. */
10343 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
10344 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
10345 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
10346 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10347 && (!TARGET_IEEE_FP
10348 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
10349 /* We may be called from the post-reload splitter. */
10350 && (!REG_P (operands
[0])
10351 || SSE_REG_P (operands
[0])
10352 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
10354 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
10355 code
= GET_CODE (operands
[1]);
10357 /* See if we have (cross) match between comparison operands and
10358 conditional move operands. */
10359 if (rtx_equal_p (operands
[2], op1
))
10364 code
= reverse_condition_maybe_unordered (code
);
10366 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
10368 /* Check for min operation. */
10369 if (code
== LT
|| code
== UNLE
)
10377 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10378 if (memory_operand (op0
, VOIDmode
))
10379 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10380 if (GET_MODE (operands
[0]) == SFmode
)
10381 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
10383 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
10386 /* Check for max operation. */
10387 if (code
== GT
|| code
== UNGE
)
10395 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10396 if (memory_operand (op0
, VOIDmode
))
10397 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10398 if (GET_MODE (operands
[0]) == SFmode
)
10399 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
10401 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
10405 /* Manage condition to be sse_comparison_operator. In case we are
10406 in non-ieee mode, try to canonicalize the destination operand
10407 to be first in the comparison - this helps reload to avoid extra
10409 if (!sse_comparison_operator (operands
[1], VOIDmode
)
10410 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
10412 rtx tmp
= ix86_compare_op0
;
10413 ix86_compare_op0
= ix86_compare_op1
;
10414 ix86_compare_op1
= tmp
;
10415 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
10416 VOIDmode
, ix86_compare_op0
,
10419 /* Similarly try to manage result to be first operand of conditional
10420 move. We also don't support the NE comparison on SSE, so try to
10422 if ((rtx_equal_p (operands
[0], operands
[3])
10423 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
10424 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
10426 rtx tmp
= operands
[2];
10427 operands
[2] = operands
[3];
10429 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10430 (GET_CODE (operands
[1])),
10431 VOIDmode
, ix86_compare_op0
,
10434 if (GET_MODE (operands
[0]) == SFmode
)
10435 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
10436 operands
[2], operands
[3],
10437 ix86_compare_op0
, ix86_compare_op1
));
10439 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
10440 operands
[2], operands
[3],
10441 ix86_compare_op0
, ix86_compare_op1
));
10445 /* The floating point conditional move instructions don't directly
10446 support conditions resulting from a signed integer comparison. */
10448 code
= GET_CODE (operands
[1]);
10449 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10451 /* The floating point conditional move instructions don't directly
10452 support signed integer comparisons. */
10454 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10456 if (second_test
!= NULL
|| bypass_test
!= NULL
)
10458 tmp
= gen_reg_rtx (QImode
);
10459 ix86_expand_setcc (code
, tmp
);
10461 ix86_compare_op0
= tmp
;
10462 ix86_compare_op1
= const0_rtx
;
10463 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10465 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10467 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10468 emit_move_insn (tmp
, operands
[3]);
10471 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10473 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10474 emit_move_insn (tmp
, operands
[2]);
10478 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10479 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10484 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10485 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10490 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10491 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10499 /* Expand conditional increment or decrement using adb/sbb instructions.
10500 The default case using setcc followed by the conditional move can be
10501 done by generic code. */
10503 ix86_expand_int_addcc (rtx operands
[])
10505 enum rtx_code code
= GET_CODE (operands
[1]);
10507 rtx val
= const0_rtx
;
10508 bool fpcmp
= false;
10509 enum machine_mode mode
= GET_MODE (operands
[0]);
10511 if (operands
[3] != const1_rtx
10512 && operands
[3] != constm1_rtx
)
10514 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10515 ix86_compare_op1
, &compare_op
))
10517 code
= GET_CODE (compare_op
);
10519 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10520 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10523 code
= ix86_fp_compare_code_to_integer (code
);
10530 PUT_CODE (compare_op
,
10531 reverse_condition_maybe_unordered
10532 (GET_CODE (compare_op
)));
10534 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10536 PUT_MODE (compare_op
, mode
);
10538 /* Construct either adc or sbb insn. */
10539 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10541 switch (GET_MODE (operands
[0]))
10544 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10547 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10550 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10553 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10561 switch (GET_MODE (operands
[0]))
10564 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10567 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10570 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10573 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10579 return 1; /* DONE */
10583 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10584 works for floating pointer parameters and nonoffsetable memories.
10585 For pushes, it returns just stack offsets; the values will be saved
10586 in the right order. Maximally three parts are generated. */
10589 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
10594 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
10596 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10598 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
10600 if (size
< 2 || size
> 3)
10603 /* Optimize constant pool reference to immediates. This is used by fp
10604 moves, that force all constants to memory to allow combining. */
10605 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
10607 rtx tmp
= maybe_get_pool_constant (operand
);
10612 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10614 /* The only non-offsetable memories we handle are pushes. */
10615 if (! push_operand (operand
, VOIDmode
))
10618 operand
= copy_rtx (operand
);
10619 PUT_MODE (operand
, Pmode
);
10620 parts
[0] = parts
[1] = parts
[2] = operand
;
10622 else if (!TARGET_64BIT
)
10624 if (mode
== DImode
)
10625 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10628 if (REG_P (operand
))
10630 if (!reload_completed
)
10632 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10633 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10635 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10637 else if (offsettable_memref_p (operand
))
10639 operand
= adjust_address (operand
, SImode
, 0);
10640 parts
[0] = operand
;
10641 parts
[1] = adjust_address (operand
, SImode
, 4);
10643 parts
[2] = adjust_address (operand
, SImode
, 8);
10645 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10650 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10654 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10655 parts
[2] = gen_int_mode (l
[2], SImode
);
10658 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10663 parts
[1] = gen_int_mode (l
[1], SImode
);
10664 parts
[0] = gen_int_mode (l
[0], SImode
);
10672 if (mode
== TImode
)
10673 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10674 if (mode
== XFmode
|| mode
== TFmode
)
10676 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10677 if (REG_P (operand
))
10679 if (!reload_completed
)
10681 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10682 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10684 else if (offsettable_memref_p (operand
))
10686 operand
= adjust_address (operand
, DImode
, 0);
10687 parts
[0] = operand
;
10688 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10690 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10695 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10696 real_to_target (l
, &r
, mode
);
10697 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10698 if (HOST_BITS_PER_WIDE_INT
>= 64)
10701 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10702 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10705 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10706 if (upper_mode
== SImode
)
10707 parts
[1] = gen_int_mode (l
[2], SImode
);
10708 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10711 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10712 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10715 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10725 /* Emit insns to perform a move or push of DI, DF, and XF values.
10726 Return false when normal moves are needed; true when all required
10727 insns have been emitted. Operands 2-4 contain the input values
10728 int the correct order; operands 5-7 contain the output values. */
10731 ix86_split_long_move (rtx operands
[])
10736 int collisions
= 0;
10737 enum machine_mode mode
= GET_MODE (operands
[0]);
10739 /* The DFmode expanders may ask us to move double.
10740 For 64bit target this is single move. By hiding the fact
10741 here we simplify i386.md splitters. */
10742 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10744 /* Optimize constant pool reference to immediates. This is used by
10745 fp moves, that force all constants to memory to allow combining. */
10747 if (GET_CODE (operands
[1]) == MEM
10748 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10749 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10750 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10751 if (push_operand (operands
[0], VOIDmode
))
10753 operands
[0] = copy_rtx (operands
[0]);
10754 PUT_MODE (operands
[0], Pmode
);
10757 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10758 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10759 emit_move_insn (operands
[0], operands
[1]);
10763 /* The only non-offsettable memory we handle is push. */
10764 if (push_operand (operands
[0], VOIDmode
))
10766 else if (GET_CODE (operands
[0]) == MEM
10767 && ! offsettable_memref_p (operands
[0]))
10770 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10771 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10773 /* When emitting push, take care for source operands on the stack. */
10774 if (push
&& GET_CODE (operands
[1]) == MEM
10775 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10778 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10779 XEXP (part
[1][2], 0));
10780 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10781 XEXP (part
[1][1], 0));
10784 /* We need to do copy in the right order in case an address register
10785 of the source overlaps the destination. */
10786 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10788 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10790 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10793 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10796 /* Collision in the middle part can be handled by reordering. */
10797 if (collisions
== 1 && nparts
== 3
10798 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10801 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10802 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10805 /* If there are more collisions, we can't handle it by reordering.
10806 Do an lea to the last part and use only one colliding move. */
10807 else if (collisions
> 1)
10813 base
= part
[0][nparts
- 1];
10815 /* Handle the case when the last part isn't valid for lea.
10816 Happens in 64-bit mode storing the 12-byte XFmode. */
10817 if (GET_MODE (base
) != Pmode
)
10818 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10820 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10821 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10822 part
[1][1] = replace_equiv_address (part
[1][1],
10823 plus_constant (base
, UNITS_PER_WORD
));
10825 part
[1][2] = replace_equiv_address (part
[1][2],
10826 plus_constant (base
, 8));
10836 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10837 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10838 emit_move_insn (part
[0][2], part
[1][2]);
10843 /* In 64bit mode we don't have 32bit push available. In case this is
10844 register, it is OK - we will just use larger counterpart. We also
10845 retype memory - these comes from attempt to avoid REX prefix on
10846 moving of second half of TFmode value. */
10847 if (GET_MODE (part
[1][1]) == SImode
)
10849 if (GET_CODE (part
[1][1]) == MEM
)
10850 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10851 else if (REG_P (part
[1][1]))
10852 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10855 if (GET_MODE (part
[1][0]) == SImode
)
10856 part
[1][0] = part
[1][1];
10859 emit_move_insn (part
[0][1], part
[1][1]);
10860 emit_move_insn (part
[0][0], part
[1][0]);
10864 /* Choose correct order to not overwrite the source before it is copied. */
10865 if ((REG_P (part
[0][0])
10866 && REG_P (part
[1][1])
10867 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10869 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10871 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10875 operands
[2] = part
[0][2];
10876 operands
[3] = part
[0][1];
10877 operands
[4] = part
[0][0];
10878 operands
[5] = part
[1][2];
10879 operands
[6] = part
[1][1];
10880 operands
[7] = part
[1][0];
10884 operands
[2] = part
[0][1];
10885 operands
[3] = part
[0][0];
10886 operands
[5] = part
[1][1];
10887 operands
[6] = part
[1][0];
10894 operands
[2] = part
[0][0];
10895 operands
[3] = part
[0][1];
10896 operands
[4] = part
[0][2];
10897 operands
[5] = part
[1][0];
10898 operands
[6] = part
[1][1];
10899 operands
[7] = part
[1][2];
10903 operands
[2] = part
[0][0];
10904 operands
[3] = part
[0][1];
10905 operands
[5] = part
[1][0];
10906 operands
[6] = part
[1][1];
10909 emit_move_insn (operands
[2], operands
[5]);
10910 emit_move_insn (operands
[3], operands
[6]);
10912 emit_move_insn (operands
[4], operands
[7]);
10918 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10920 rtx low
[2], high
[2];
10923 if (GET_CODE (operands
[2]) == CONST_INT
)
10925 split_di (operands
, 2, low
, high
);
10926 count
= INTVAL (operands
[2]) & 63;
10930 emit_move_insn (high
[0], low
[1]);
10931 emit_move_insn (low
[0], const0_rtx
);
10934 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
10938 if (!rtx_equal_p (operands
[0], operands
[1]))
10939 emit_move_insn (operands
[0], operands
[1]);
10940 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10941 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
10946 if (!rtx_equal_p (operands
[0], operands
[1]))
10947 emit_move_insn (operands
[0], operands
[1]);
10949 split_di (operands
, 1, low
, high
);
10951 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10952 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10954 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10956 if (! no_new_pseudos
)
10957 scratch
= force_reg (SImode
, const0_rtx
);
10959 emit_move_insn (scratch
, const0_rtx
);
10961 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
10965 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10970 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
10972 rtx low
[2], high
[2];
10975 if (GET_CODE (operands
[2]) == CONST_INT
)
10977 split_di (operands
, 2, low
, high
);
10978 count
= INTVAL (operands
[2]) & 63;
10982 emit_move_insn (high
[0], high
[1]);
10983 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10984 emit_move_insn (low
[0], high
[0]);
10987 else if (count
>= 32)
10989 emit_move_insn (low
[0], high
[1]);
10991 if (! reload_completed
)
10992 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10995 emit_move_insn (high
[0], low
[0]);
10996 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
11000 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
11004 if (!rtx_equal_p (operands
[0], operands
[1]))
11005 emit_move_insn (operands
[0], operands
[1]);
11006 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
11007 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
11012 if (!rtx_equal_p (operands
[0], operands
[1]))
11013 emit_move_insn (operands
[0], operands
[1]);
11015 split_di (operands
, 1, low
, high
);
11017 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
11018 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
11020 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
11022 if (! no_new_pseudos
)
11023 scratch
= gen_reg_rtx (SImode
);
11024 emit_move_insn (scratch
, high
[0]);
11025 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
11026 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11030 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
11035 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
11037 rtx low
[2], high
[2];
11040 if (GET_CODE (operands
[2]) == CONST_INT
)
11042 split_di (operands
, 2, low
, high
);
11043 count
= INTVAL (operands
[2]) & 63;
11047 emit_move_insn (low
[0], high
[1]);
11048 emit_move_insn (high
[0], const0_rtx
);
11051 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
11055 if (!rtx_equal_p (operands
[0], operands
[1]))
11056 emit_move_insn (operands
[0], operands
[1]);
11057 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
11058 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
11063 if (!rtx_equal_p (operands
[0], operands
[1]))
11064 emit_move_insn (operands
[0], operands
[1]);
11066 split_di (operands
, 1, low
, high
);
11068 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
11069 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
11071 /* Heh. By reversing the arguments, we can reuse this pattern. */
11072 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
11074 if (! no_new_pseudos
)
11075 scratch
= force_reg (SImode
, const0_rtx
);
11077 emit_move_insn (scratch
, const0_rtx
);
11079 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
11083 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
11087 /* Helper function for the string operations below. Dest VARIABLE whether
11088 it is aligned to VALUE bytes. If true, jump to the label. */
11090 ix86_expand_aligntest (rtx variable
, int value
)
11092 rtx label
= gen_label_rtx ();
11093 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
11094 if (GET_MODE (variable
) == DImode
)
11095 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
11097 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
11098 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
11103 /* Adjust COUNTER by the VALUE. */
11105 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
11107 if (GET_MODE (countreg
) == DImode
)
11108 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
11110 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
11113 /* Zero extend possibly SImode EXP to Pmode register. */
11115 ix86_zero_extend_to_Pmode (rtx exp
)
11118 if (GET_MODE (exp
) == VOIDmode
)
11119 return force_reg (Pmode
, exp
);
11120 if (GET_MODE (exp
) == Pmode
)
11121 return copy_to_mode_reg (Pmode
, exp
);
11122 r
= gen_reg_rtx (Pmode
);
11123 emit_insn (gen_zero_extendsidi2 (r
, exp
));
11127 /* Expand string move (memcpy) operation. Use i386 string operations when
11128 profitable. expand_clrmem contains similar code. */
11130 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
11132 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
11133 enum machine_mode counter_mode
;
11134 HOST_WIDE_INT align
= 0;
11135 unsigned HOST_WIDE_INT count
= 0;
11137 if (GET_CODE (align_exp
) == CONST_INT
)
11138 align
= INTVAL (align_exp
);
11140 /* Can't use any of this if the user has appropriated esi or edi. */
11141 if (global_regs
[4] || global_regs
[5])
11144 /* This simple hack avoids all inlining code and simplifies code below. */
11145 if (!TARGET_ALIGN_STRINGOPS
)
11148 if (GET_CODE (count_exp
) == CONST_INT
)
11150 count
= INTVAL (count_exp
);
11151 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11155 /* Figure out proper mode for counter. For 32bits it is always SImode,
11156 for 64bits use SImode when possible, otherwise DImode.
11157 Set count to number of bytes copied when known at compile time. */
11158 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11159 || x86_64_zero_extended_value (count_exp
))
11160 counter_mode
= SImode
;
11162 counter_mode
= DImode
;
11164 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
11167 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11168 if (destreg
!= XEXP (dst
, 0))
11169 dst
= replace_equiv_address_nv (dst
, destreg
);
11170 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11171 if (srcreg
!= XEXP (src
, 0))
11172 src
= replace_equiv_address_nv (src
, srcreg
);
11174 /* When optimizing for size emit simple rep ; movsb instruction for
11175 counts not divisible by 4. */
11177 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11179 emit_insn (gen_cld ());
11180 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11181 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11182 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
11183 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
11187 /* For constant aligned (or small unaligned) copies use rep movsl
11188 followed by code copying the rest. For PentiumPro ensure 8 byte
11189 alignment to allow rep movsl acceleration. */
11191 else if (count
!= 0
11193 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11194 || optimize_size
|| count
< (unsigned int) 64))
11196 unsigned HOST_WIDE_INT offset
= 0;
11197 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11198 rtx srcmem
, dstmem
;
11200 emit_insn (gen_cld ());
11201 if (count
& ~(size
- 1))
11203 countreg
= copy_to_mode_reg (counter_mode
,
11204 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11205 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11206 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11208 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11209 GEN_INT (size
== 4 ? 2 : 3));
11210 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11211 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11213 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11214 countreg
, destexp
, srcexp
));
11215 offset
= count
& ~(size
- 1);
11217 if (size
== 8 && (count
& 0x04))
11219 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
11221 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11223 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11228 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
11230 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11232 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11237 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
11239 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11241 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11244 /* The generic code based on the glibc implementation:
11245 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11246 allowing accelerated copying there)
11247 - copy the data using rep movsl
11248 - copy the rest. */
11253 rtx srcmem
, dstmem
;
11254 int desired_alignment
= (TARGET_PENTIUMPRO
11255 && (count
== 0 || count
>= (unsigned int) 260)
11256 ? 8 : UNITS_PER_WORD
);
11257 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11258 dst
= change_address (dst
, BLKmode
, destreg
);
11259 src
= change_address (src
, BLKmode
, srcreg
);
11261 /* In case we don't know anything about the alignment, default to
11262 library version, since it is usually equally fast and result in
11265 Also emit call when we know that the count is large and call overhead
11266 will not be important. */
11267 if (!TARGET_INLINE_ALL_STRINGOPS
11268 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11271 if (TARGET_SINGLE_STRINGOP
)
11272 emit_insn (gen_cld ());
11274 countreg2
= gen_reg_rtx (Pmode
);
11275 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11277 /* We don't use loops to align destination and to copy parts smaller
11278 than 4 bytes, because gcc is able to optimize such code better (in
11279 the case the destination or the count really is aligned, gcc is often
11280 able to predict the branches) and also it is friendlier to the
11281 hardware branch prediction.
11283 Using loops is beneficial for generic case, because we can
11284 handle small counts using the loops. Many CPUs (such as Athlon)
11285 have large REP prefix setup costs.
11287 This is quite costly. Maybe we can revisit this decision later or
11288 add some customizability to this code. */
11290 if (count
== 0 && align
< desired_alignment
)
11292 label
= gen_label_rtx ();
11293 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11294 LEU
, 0, counter_mode
, 1, label
);
11298 rtx label
= ix86_expand_aligntest (destreg
, 1);
11299 srcmem
= change_address (src
, QImode
, srcreg
);
11300 dstmem
= change_address (dst
, QImode
, destreg
);
11301 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11302 ix86_adjust_counter (countreg
, 1);
11303 emit_label (label
);
11304 LABEL_NUSES (label
) = 1;
11308 rtx label
= ix86_expand_aligntest (destreg
, 2);
11309 srcmem
= change_address (src
, HImode
, srcreg
);
11310 dstmem
= change_address (dst
, HImode
, destreg
);
11311 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11312 ix86_adjust_counter (countreg
, 2);
11313 emit_label (label
);
11314 LABEL_NUSES (label
) = 1;
11316 if (align
<= 4 && desired_alignment
> 4)
11318 rtx label
= ix86_expand_aligntest (destreg
, 4);
11319 srcmem
= change_address (src
, SImode
, srcreg
);
11320 dstmem
= change_address (dst
, SImode
, destreg
);
11321 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11322 ix86_adjust_counter (countreg
, 4);
11323 emit_label (label
);
11324 LABEL_NUSES (label
) = 1;
11327 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11329 emit_label (label
);
11330 LABEL_NUSES (label
) = 1;
11333 if (!TARGET_SINGLE_STRINGOP
)
11334 emit_insn (gen_cld ());
11337 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11339 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11343 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11344 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11346 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11347 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11348 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11349 countreg2
, destexp
, srcexp
));
11353 emit_label (label
);
11354 LABEL_NUSES (label
) = 1;
11356 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11358 srcmem
= change_address (src
, SImode
, srcreg
);
11359 dstmem
= change_address (dst
, SImode
, destreg
);
11360 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11362 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
11364 rtx label
= ix86_expand_aligntest (countreg
, 4);
11365 srcmem
= change_address (src
, SImode
, srcreg
);
11366 dstmem
= change_address (dst
, SImode
, destreg
);
11367 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11368 emit_label (label
);
11369 LABEL_NUSES (label
) = 1;
11371 if (align
> 2 && count
!= 0 && (count
& 2))
11373 srcmem
= change_address (src
, HImode
, srcreg
);
11374 dstmem
= change_address (dst
, HImode
, destreg
);
11375 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11377 if (align
<= 2 || count
== 0)
11379 rtx label
= ix86_expand_aligntest (countreg
, 2);
11380 srcmem
= change_address (src
, HImode
, srcreg
);
11381 dstmem
= change_address (dst
, HImode
, destreg
);
11382 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11383 emit_label (label
);
11384 LABEL_NUSES (label
) = 1;
11386 if (align
> 1 && count
!= 0 && (count
& 1))
11388 srcmem
= change_address (src
, QImode
, srcreg
);
11389 dstmem
= change_address (dst
, QImode
, destreg
);
11390 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11392 if (align
<= 1 || count
== 0)
11394 rtx label
= ix86_expand_aligntest (countreg
, 1);
11395 srcmem
= change_address (src
, QImode
, srcreg
);
11396 dstmem
= change_address (dst
, QImode
, destreg
);
11397 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11398 emit_label (label
);
11399 LABEL_NUSES (label
) = 1;
11406 /* Expand string clear operation (bzero). Use i386 string operations when
11407 profitable. expand_movmem contains similar code. */
11409 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
11411 rtx destreg
, zeroreg
, countreg
, destexp
;
11412 enum machine_mode counter_mode
;
11413 HOST_WIDE_INT align
= 0;
11414 unsigned HOST_WIDE_INT count
= 0;
11416 if (GET_CODE (align_exp
) == CONST_INT
)
11417 align
= INTVAL (align_exp
);
11419 /* Can't use any of this if the user has appropriated esi. */
11420 if (global_regs
[4])
11423 /* This simple hack avoids all inlining code and simplifies code below. */
11424 if (!TARGET_ALIGN_STRINGOPS
)
11427 if (GET_CODE (count_exp
) == CONST_INT
)
11429 count
= INTVAL (count_exp
);
11430 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11433 /* Figure out proper mode for counter. For 32bits it is always SImode,
11434 for 64bits use SImode when possible, otherwise DImode.
11435 Set count to number of bytes copied when known at compile time. */
11436 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11437 || x86_64_zero_extended_value (count_exp
))
11438 counter_mode
= SImode
;
11440 counter_mode
= DImode
;
11442 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11443 if (destreg
!= XEXP (dst
, 0))
11444 dst
= replace_equiv_address_nv (dst
, destreg
);
11446 emit_insn (gen_cld ());
11448 /* When optimizing for size emit simple rep ; movsb instruction for
11449 counts not divisible by 4. */
11451 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11453 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11454 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11455 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11456 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11458 else if (count
!= 0
11460 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11461 || optimize_size
|| count
< (unsigned int) 64))
11463 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11464 unsigned HOST_WIDE_INT offset
= 0;
11466 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11467 if (count
& ~(size
- 1))
11469 countreg
= copy_to_mode_reg (counter_mode
,
11470 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11471 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11472 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11473 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
, GEN_INT (size
== 4 ? 2 : 3));
11474 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11475 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11476 offset
= count
& ~(size
- 1);
11478 if (size
== 8 && (count
& 0x04))
11480 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11482 emit_insn (gen_strset (destreg
, mem
,
11483 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11488 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11490 emit_insn (gen_strset (destreg
, mem
,
11491 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11496 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11498 emit_insn (gen_strset (destreg
, mem
,
11499 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11506 /* Compute desired alignment of the string operation. */
11507 int desired_alignment
= (TARGET_PENTIUMPRO
11508 && (count
== 0 || count
>= (unsigned int) 260)
11509 ? 8 : UNITS_PER_WORD
);
11511 /* In case we don't know anything about the alignment, default to
11512 library version, since it is usually equally fast and result in
11515 Also emit call when we know that the count is large and call overhead
11516 will not be important. */
11517 if (!TARGET_INLINE_ALL_STRINGOPS
11518 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11521 if (TARGET_SINGLE_STRINGOP
)
11522 emit_insn (gen_cld ());
11524 countreg2
= gen_reg_rtx (Pmode
);
11525 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11526 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11527 /* Get rid of MEM_OFFSET, it won't be accurate. */
11528 dst
= change_address (dst
, BLKmode
, destreg
);
11530 if (count
== 0 && align
< desired_alignment
)
11532 label
= gen_label_rtx ();
11533 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11534 LEU
, 0, counter_mode
, 1, label
);
11538 rtx label
= ix86_expand_aligntest (destreg
, 1);
11539 emit_insn (gen_strset (destreg
, dst
,
11540 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11541 ix86_adjust_counter (countreg
, 1);
11542 emit_label (label
);
11543 LABEL_NUSES (label
) = 1;
11547 rtx label
= ix86_expand_aligntest (destreg
, 2);
11548 emit_insn (gen_strset (destreg
, dst
,
11549 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11550 ix86_adjust_counter (countreg
, 2);
11551 emit_label (label
);
11552 LABEL_NUSES (label
) = 1;
11554 if (align
<= 4 && desired_alignment
> 4)
11556 rtx label
= ix86_expand_aligntest (destreg
, 4);
11557 emit_insn (gen_strset (destreg
, dst
,
11559 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11561 ix86_adjust_counter (countreg
, 4);
11562 emit_label (label
);
11563 LABEL_NUSES (label
) = 1;
11566 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11568 emit_label (label
);
11569 LABEL_NUSES (label
) = 1;
11573 if (!TARGET_SINGLE_STRINGOP
)
11574 emit_insn (gen_cld ());
11577 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11579 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11583 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11584 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11586 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11587 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11591 emit_label (label
);
11592 LABEL_NUSES (label
) = 1;
11595 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11596 emit_insn (gen_strset (destreg
, dst
,
11597 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11598 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11600 rtx label
= ix86_expand_aligntest (countreg
, 4);
11601 emit_insn (gen_strset (destreg
, dst
,
11602 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11603 emit_label (label
);
11604 LABEL_NUSES (label
) = 1;
11606 if (align
> 2 && count
!= 0 && (count
& 2))
11607 emit_insn (gen_strset (destreg
, dst
,
11608 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11609 if (align
<= 2 || count
== 0)
11611 rtx label
= ix86_expand_aligntest (countreg
, 2);
11612 emit_insn (gen_strset (destreg
, dst
,
11613 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11614 emit_label (label
);
11615 LABEL_NUSES (label
) = 1;
11617 if (align
> 1 && count
!= 0 && (count
& 1))
11618 emit_insn (gen_strset (destreg
, dst
,
11619 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11620 if (align
<= 1 || count
== 0)
11622 rtx label
= ix86_expand_aligntest (countreg
, 1);
11623 emit_insn (gen_strset (destreg
, dst
,
11624 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11625 emit_label (label
);
11626 LABEL_NUSES (label
) = 1;
11632 /* Expand strlen. */
11634 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11636 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11638 /* The generic case of strlen expander is long. Avoid it's
11639 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11641 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11642 && !TARGET_INLINE_ALL_STRINGOPS
11644 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11647 addr
= force_reg (Pmode
, XEXP (src
, 0));
11648 scratch1
= gen_reg_rtx (Pmode
);
11650 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11653 /* Well it seems that some optimizer does not combine a call like
11654 foo(strlen(bar), strlen(bar));
11655 when the move and the subtraction is done here. It does calculate
11656 the length just once when these instructions are done inside of
11657 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11658 often used and I use one fewer register for the lifetime of
11659 output_strlen_unroll() this is better. */
11661 emit_move_insn (out
, addr
);
11663 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11665 /* strlensi_unroll_1 returns the address of the zero at the end of
11666 the string, like memchr(), so compute the length by subtracting
11667 the start address. */
11669 emit_insn (gen_subdi3 (out
, out
, addr
));
11671 emit_insn (gen_subsi3 (out
, out
, addr
));
11676 scratch2
= gen_reg_rtx (Pmode
);
11677 scratch3
= gen_reg_rtx (Pmode
);
11678 scratch4
= force_reg (Pmode
, constm1_rtx
);
11680 emit_move_insn (scratch3
, addr
);
11681 eoschar
= force_reg (QImode
, eoschar
);
11683 emit_insn (gen_cld ());
11684 src
= replace_equiv_address_nv (src
, scratch3
);
11686 /* If .md starts supporting :P, this can be done in .md. */
11687 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11688 scratch4
), UNSPEC_SCAS
);
11689 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11692 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11693 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11697 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11698 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11704 /* Expand the appropriate insns for doing strlen if not just doing
11707 out = result, initialized with the start address
11708 align_rtx = alignment of the address.
11709 scratch = scratch register, initialized with the startaddress when
11710 not aligned, otherwise undefined
11712 This is just the body. It needs the initializations mentioned above and
11713 some address computing at the end. These things are done in i386.md. */
11716 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11720 rtx align_2_label
= NULL_RTX
;
11721 rtx align_3_label
= NULL_RTX
;
11722 rtx align_4_label
= gen_label_rtx ();
11723 rtx end_0_label
= gen_label_rtx ();
11725 rtx tmpreg
= gen_reg_rtx (SImode
);
11726 rtx scratch
= gen_reg_rtx (SImode
);
11730 if (GET_CODE (align_rtx
) == CONST_INT
)
11731 align
= INTVAL (align_rtx
);
11733 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11735 /* Is there a known alignment and is it less than 4? */
11738 rtx scratch1
= gen_reg_rtx (Pmode
);
11739 emit_move_insn (scratch1
, out
);
11740 /* Is there a known alignment and is it not 2? */
11743 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11744 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11746 /* Leave just the 3 lower bits. */
11747 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11748 NULL_RTX
, 0, OPTAB_WIDEN
);
11750 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11751 Pmode
, 1, align_4_label
);
11752 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
11753 Pmode
, 1, align_2_label
);
11754 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
11755 Pmode
, 1, align_3_label
);
11759 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11760 check if is aligned to 4 - byte. */
11762 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
11763 NULL_RTX
, 0, OPTAB_WIDEN
);
11765 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11766 Pmode
, 1, align_4_label
);
11769 mem
= change_address (src
, QImode
, out
);
11771 /* Now compare the bytes. */
11773 /* Compare the first n unaligned byte on a byte per byte basis. */
11774 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11775 QImode
, 1, end_0_label
);
11777 /* Increment the address. */
11779 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11781 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11783 /* Not needed with an alignment of 2 */
11786 emit_label (align_2_label
);
11788 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11792 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11794 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11796 emit_label (align_3_label
);
11799 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11803 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11805 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11808 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11809 align this loop. It gives only huge programs, but does not help to
11811 emit_label (align_4_label
);
11813 mem
= change_address (src
, SImode
, out
);
11814 emit_move_insn (scratch
, mem
);
11816 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11818 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11820 /* This formula yields a nonzero result iff one of the bytes is zero.
11821 This saves three branches inside loop and many cycles. */
11823 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11824 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11825 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11826 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11827 gen_int_mode (0x80808080, SImode
)));
11828 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11833 rtx reg
= gen_reg_rtx (SImode
);
11834 rtx reg2
= gen_reg_rtx (Pmode
);
11835 emit_move_insn (reg
, tmpreg
);
11836 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11838 /* If zero is not in the first two bytes, move two bytes forward. */
11839 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11840 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11841 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11842 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11843 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11846 /* Emit lea manually to avoid clobbering of flags. */
11847 emit_insn (gen_rtx_SET (SImode
, reg2
,
11848 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
11850 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11851 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11852 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11853 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11860 rtx end_2_label
= gen_label_rtx ();
11861 /* Is zero in the first two bytes? */
11863 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11864 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11865 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11866 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11867 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11869 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11870 JUMP_LABEL (tmp
) = end_2_label
;
11872 /* Not in the first two. Move two bytes forward. */
11873 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11875 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
11877 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
11879 emit_label (end_2_label
);
11883 /* Avoid branch in fixing the byte. */
11884 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11885 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11886 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11888 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11890 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11892 emit_label (end_0_label
);
11896 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11897 rtx callarg2 ATTRIBUTE_UNUSED
,
11898 rtx pop
, int sibcall
)
11900 rtx use
= NULL
, call
;
11902 if (pop
== const0_rtx
)
11904 if (TARGET_64BIT
&& pop
)
11908 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11909 fnaddr
= machopic_indirect_call_target (fnaddr
);
11911 /* Static functions and indirect calls don't need the pic register. */
11912 if (! TARGET_64BIT
&& flag_pic
11913 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11914 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11915 use_reg (&use
, pic_offset_table_rtx
);
11917 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11919 rtx al
= gen_rtx_REG (QImode
, 0);
11920 emit_move_insn (al
, callarg2
);
11921 use_reg (&use
, al
);
11923 #endif /* TARGET_MACHO */
11925 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11927 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11928 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11930 if (sibcall
&& TARGET_64BIT
11931 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11934 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11935 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
11936 emit_move_insn (fnaddr
, addr
);
11937 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11940 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11942 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11945 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11946 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11947 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11950 call
= emit_call_insn (call
);
11952 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11956 /* Clear stack slot assignments remembered from previous functions.
11957 This is called from INIT_EXPANDERS once before RTL is emitted for each
11960 static struct machine_function
*
11961 ix86_init_machine_status (void)
11963 struct machine_function
*f
;
11965 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
11966 f
->use_fast_prologue_epilogue_nregs
= -1;
11971 /* Return a MEM corresponding to a stack slot with mode MODE.
11972 Allocate a new slot if necessary.
11974 The RTL for a function can have several slots available: N is
11975 which slot to use. */
11978 assign_386_stack_local (enum machine_mode mode
, int n
)
11980 struct stack_local_entry
*s
;
11982 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11985 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11986 if (s
->mode
== mode
&& s
->n
== n
)
11989 s
= (struct stack_local_entry
*)
11990 ggc_alloc (sizeof (struct stack_local_entry
));
11993 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11995 s
->next
= ix86_stack_locals
;
11996 ix86_stack_locals
= s
;
12000 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12002 static GTY(()) rtx ix86_tls_symbol
;
12004 ix86_tls_get_addr (void)
12007 if (!ix86_tls_symbol
)
12009 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
12010 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
12011 ? "___tls_get_addr"
12012 : "__tls_get_addr");
12015 return ix86_tls_symbol
;
12018 /* Calculate the length of the memory address in the instruction
12019 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12022 memory_address_length (rtx addr
)
12024 struct ix86_address parts
;
12025 rtx base
, index
, disp
;
12028 if (GET_CODE (addr
) == PRE_DEC
12029 || GET_CODE (addr
) == POST_INC
12030 || GET_CODE (addr
) == PRE_MODIFY
12031 || GET_CODE (addr
) == POST_MODIFY
)
12034 if (! ix86_decompose_address (addr
, &parts
))
12038 index
= parts
.index
;
12043 - esp as the base always wants an index,
12044 - ebp as the base always wants a displacement. */
12046 /* Register Indirect. */
12047 if (base
&& !index
&& !disp
)
12049 /* esp (for its index) and ebp (for its displacement) need
12050 the two-byte modrm form. */
12051 if (addr
== stack_pointer_rtx
12052 || addr
== arg_pointer_rtx
12053 || addr
== frame_pointer_rtx
12054 || addr
== hard_frame_pointer_rtx
)
12058 /* Direct Addressing. */
12059 else if (disp
&& !base
&& !index
)
12064 /* Find the length of the displacement constant. */
12067 if (GET_CODE (disp
) == CONST_INT
12068 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
12074 /* ebp always wants a displacement. */
12075 else if (base
== hard_frame_pointer_rtx
)
12078 /* An index requires the two-byte modrm form.... */
12080 /* ...like esp, which always wants an index. */
12081 || base
== stack_pointer_rtx
12082 || base
== arg_pointer_rtx
12083 || base
== frame_pointer_rtx
)
12090 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12091 is set, expect that insn have 8bit immediate alternative. */
12093 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
12097 extract_insn_cached (insn
);
12098 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12099 if (CONSTANT_P (recog_data
.operand
[i
]))
12104 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
12105 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
12109 switch (get_attr_mode (insn
))
12120 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12125 fatal_insn ("unknown insn mode", insn
);
12131 /* Compute default value for "length_address" attribute. */
12133 ix86_attr_length_address_default (rtx insn
)
12137 if (get_attr_type (insn
) == TYPE_LEA
)
12139 rtx set
= PATTERN (insn
);
12140 if (GET_CODE (set
) == SET
)
12142 else if (GET_CODE (set
) == PARALLEL
12143 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
12144 set
= XVECEXP (set
, 0, 0);
12147 #ifdef ENABLE_CHECKING
12153 return memory_address_length (SET_SRC (set
));
12156 extract_insn_cached (insn
);
12157 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12158 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12160 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
12166 /* Return the maximum number of instructions a cpu can issue. */
12169 ix86_issue_rate (void)
12173 case PROCESSOR_PENTIUM
:
12177 case PROCESSOR_PENTIUMPRO
:
12178 case PROCESSOR_PENTIUM4
:
12179 case PROCESSOR_ATHLON
:
12181 case PROCESSOR_NOCONA
:
12189 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12190 by DEP_INSN and nothing set by DEP_INSN. */
12193 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12197 /* Simplify the test for uninteresting insns. */
12198 if (insn_type
!= TYPE_SETCC
12199 && insn_type
!= TYPE_ICMOV
12200 && insn_type
!= TYPE_FCMOV
12201 && insn_type
!= TYPE_IBR
)
12204 if ((set
= single_set (dep_insn
)) != 0)
12206 set
= SET_DEST (set
);
12209 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
12210 && XVECLEN (PATTERN (dep_insn
), 0) == 2
12211 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
12212 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
12214 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12215 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12220 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
12223 /* This test is true if the dependent insn reads the flags but
12224 not any other potentially set register. */
12225 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
12228 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
12234 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12235 address with operands set by DEP_INSN. */
12238 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12242 if (insn_type
== TYPE_LEA
12245 addr
= PATTERN (insn
);
12246 if (GET_CODE (addr
) == SET
)
12248 else if (GET_CODE (addr
) == PARALLEL
12249 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
12250 addr
= XVECEXP (addr
, 0, 0);
12253 addr
= SET_SRC (addr
);
12258 extract_insn_cached (insn
);
12259 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12260 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12262 addr
= XEXP (recog_data
.operand
[i
], 0);
12269 return modified_in_p (addr
, dep_insn
);
12273 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
12275 enum attr_type insn_type
, dep_insn_type
;
12276 enum attr_memory memory
;
12278 int dep_insn_code_number
;
12280 /* Anti and output dependencies have zero cost on all CPUs. */
12281 if (REG_NOTE_KIND (link
) != 0)
12284 dep_insn_code_number
= recog_memoized (dep_insn
);
12286 /* If we can't recognize the insns, we can't really do anything. */
12287 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
12290 insn_type
= get_attr_type (insn
);
12291 dep_insn_type
= get_attr_type (dep_insn
);
12295 case PROCESSOR_PENTIUM
:
12296 /* Address Generation Interlock adds a cycle of latency. */
12297 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12300 /* ??? Compares pair with jump/setcc. */
12301 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
12304 /* Floating point stores require value to be ready one cycle earlier. */
12305 if (insn_type
== TYPE_FMOV
12306 && get_attr_memory (insn
) == MEMORY_STORE
12307 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12311 case PROCESSOR_PENTIUMPRO
:
12312 memory
= get_attr_memory (insn
);
12314 /* INT->FP conversion is expensive. */
12315 if (get_attr_fp_int_src (dep_insn
))
12318 /* There is one cycle extra latency between an FP op and a store. */
12319 if (insn_type
== TYPE_FMOV
12320 && (set
= single_set (dep_insn
)) != NULL_RTX
12321 && (set2
= single_set (insn
)) != NULL_RTX
12322 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
12323 && GET_CODE (SET_DEST (set2
)) == MEM
)
12326 /* Show ability of reorder buffer to hide latency of load by executing
12327 in parallel with previous instruction in case
12328 previous instruction is not needed to compute the address. */
12329 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12330 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12332 /* Claim moves to take one cycle, as core can issue one load
12333 at time and the next load can start cycle later. */
12334 if (dep_insn_type
== TYPE_IMOV
12335 || dep_insn_type
== TYPE_FMOV
)
12343 memory
= get_attr_memory (insn
);
12345 /* The esp dependency is resolved before the instruction is really
12347 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
12348 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
12351 /* INT->FP conversion is expensive. */
12352 if (get_attr_fp_int_src (dep_insn
))
12355 /* Show ability of reorder buffer to hide latency of load by executing
12356 in parallel with previous instruction in case
12357 previous instruction is not needed to compute the address. */
12358 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12359 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12361 /* Claim moves to take one cycle, as core can issue one load
12362 at time and the next load can start cycle later. */
12363 if (dep_insn_type
== TYPE_IMOV
12364 || dep_insn_type
== TYPE_FMOV
)
12373 case PROCESSOR_ATHLON
:
12375 memory
= get_attr_memory (insn
);
12377 /* Show ability of reorder buffer to hide latency of load by executing
12378 in parallel with previous instruction in case
12379 previous instruction is not needed to compute the address. */
12380 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12381 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12383 enum attr_unit unit
= get_attr_unit (insn
);
12386 /* Because of the difference between the length of integer and
12387 floating unit pipeline preparation stages, the memory operands
12388 for floating point are cheaper.
12390 ??? For Athlon it the difference is most probably 2. */
12391 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
12394 loadcost
= TARGET_ATHLON
? 2 : 0;
12396 if (cost
>= loadcost
)
12409 /* How many alternative schedules to try. This should be as wide as the
12410 scheduling freedom in the DFA, but no wider. Making this value too
12411 large results extra work for the scheduler. */
12414 ia32_multipass_dfa_lookahead (void)
12416 if (ix86_tune
== PROCESSOR_PENTIUM
)
12419 if (ix86_tune
== PROCESSOR_PENTIUMPRO
12420 || ix86_tune
== PROCESSOR_K6
)
12428 /* Compute the alignment given to a constant that is being placed in memory.
12429 EXP is the constant and ALIGN is the alignment that the object would
12431 The value of this function is used instead of that alignment to align
12435 ix86_constant_alignment (tree exp
, int align
)
12437 if (TREE_CODE (exp
) == REAL_CST
)
12439 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12441 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12444 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
12445 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
12446 return BITS_PER_WORD
;
12451 /* Compute the alignment for a static variable.
12452 TYPE is the data type, and ALIGN is the alignment that
12453 the object would ordinarily have. The value of this function is used
12454 instead of that alignment to align the object. */
12457 ix86_data_alignment (tree type
, int align
)
12459 if (AGGREGATE_TYPE_P (type
)
12460 && TYPE_SIZE (type
)
12461 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12462 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12463 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12466 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12467 to 16byte boundary. */
12470 if (AGGREGATE_TYPE_P (type
)
12471 && TYPE_SIZE (type
)
12472 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12473 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12474 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12478 if (TREE_CODE (type
) == ARRAY_TYPE
)
12480 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12482 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12485 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12488 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12490 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12493 else if ((TREE_CODE (type
) == RECORD_TYPE
12494 || TREE_CODE (type
) == UNION_TYPE
12495 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12496 && TYPE_FIELDS (type
))
12498 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12500 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12503 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12504 || TREE_CODE (type
) == INTEGER_TYPE
)
12506 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12508 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12515 /* Compute the alignment for a local variable.
12516 TYPE is the data type, and ALIGN is the alignment that
12517 the object would ordinarily have. The value of this macro is used
12518 instead of that alignment to align the object. */
12521 ix86_local_alignment (tree type
, int align
)
12523 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12524 to 16byte boundary. */
12527 if (AGGREGATE_TYPE_P (type
)
12528 && TYPE_SIZE (type
)
12529 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12530 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12531 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12534 if (TREE_CODE (type
) == ARRAY_TYPE
)
12536 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12538 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12541 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12543 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12545 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12548 else if ((TREE_CODE (type
) == RECORD_TYPE
12549 || TREE_CODE (type
) == UNION_TYPE
12550 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12551 && TYPE_FIELDS (type
))
12553 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12555 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12558 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12559 || TREE_CODE (type
) == INTEGER_TYPE
)
12562 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12564 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12570 /* Emit RTL insns to initialize the variable parts of a trampoline.
12571 FNADDR is an RTX for the address of the function's pure code.
12572 CXT is an RTX for the static chain value for the function. */
12574 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12578 /* Compute offset from the end of the jmp to the target function. */
12579 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12580 plus_constant (tramp
, 10),
12581 NULL_RTX
, 1, OPTAB_DIRECT
);
12582 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12583 gen_int_mode (0xb9, QImode
));
12584 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12585 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12586 gen_int_mode (0xe9, QImode
));
12587 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12592 /* Try to load address using shorter movl instead of movabs.
12593 We may want to support movq for kernel mode, but kernel does not use
12594 trampolines at the moment. */
12595 if (x86_64_zero_extended_value (fnaddr
))
12597 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12598 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12599 gen_int_mode (0xbb41, HImode
));
12600 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12601 gen_lowpart (SImode
, fnaddr
));
12606 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12607 gen_int_mode (0xbb49, HImode
));
12608 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12612 /* Load static chain using movabs to r10. */
12613 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12614 gen_int_mode (0xba49, HImode
));
12615 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12618 /* Jump to the r11 */
12619 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12620 gen_int_mode (0xff49, HImode
));
12621 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12622 gen_int_mode (0xe3, QImode
));
12624 if (offset
> TRAMPOLINE_SIZE
)
12628 #ifdef TRANSFER_FROM_TRAMPOLINE
12629 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
12630 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12634 #define def_builtin(MASK, NAME, TYPE, CODE) \
12636 if ((MASK) & target_flags \
12637 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12638 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12639 NULL, NULL_TREE); \
12642 struct builtin_description
12644 const unsigned int mask
;
12645 const enum insn_code icode
;
12646 const char *const name
;
12647 const enum ix86_builtins code
;
12648 const enum rtx_code comparison
;
12649 const unsigned int flag
;
12652 static const struct builtin_description bdesc_comi
[] =
12654 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12655 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12656 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12657 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12658 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12659 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12660 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12661 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12662 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12663 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12664 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12665 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12666 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12667 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12668 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12669 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12670 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12671 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12672 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12673 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12674 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12675 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12676 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12677 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12680 static const struct builtin_description bdesc_2arg
[] =
12683 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12684 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12685 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12686 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12687 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12688 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12689 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12690 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12692 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12693 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12694 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12695 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
12696 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
12697 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12698 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
12699 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
12700 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
12701 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
12702 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
12703 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
12704 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12705 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12706 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12707 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12708 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
12709 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
12710 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
12711 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12713 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12714 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12715 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12716 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12718 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12719 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12720 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12721 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12723 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12724 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12725 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12726 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12727 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12730 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12731 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12732 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12733 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
12734 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12735 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12736 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12737 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
12739 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12740 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12741 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12742 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12743 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12744 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12745 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12746 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12748 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12749 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12750 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12752 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12753 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12754 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12755 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12757 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12758 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12760 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12761 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12762 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12763 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12764 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12765 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12767 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12768 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12769 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12770 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12772 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12773 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12774 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12775 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12776 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12777 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12780 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12781 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12782 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12784 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12785 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12786 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
12788 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12789 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12790 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12791 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12792 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12793 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12795 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12796 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12797 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12798 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12799 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12800 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12802 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12803 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12804 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12805 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12807 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12808 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12811 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12812 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12813 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12814 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12815 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12816 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12817 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12818 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12820 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12821 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12822 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12823 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12824 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12825 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12826 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12827 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12828 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12829 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12830 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12831 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12832 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12833 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12834 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12835 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12836 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12837 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12838 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12839 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12841 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12842 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12843 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12844 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12846 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12847 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12848 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12849 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12851 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12852 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12853 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12856 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12857 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12858 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12859 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12860 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12861 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12862 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12863 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12865 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12866 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12867 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12868 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12869 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12870 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12871 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12872 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12874 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12875 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12877 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12878 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12879 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12880 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12882 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12883 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12885 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12886 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12887 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12888 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12889 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12890 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12892 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12893 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12894 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12895 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12897 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12898 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12899 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12900 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12901 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12902 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12903 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12904 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12906 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12907 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12908 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12910 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12911 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12913 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
12914 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12916 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12917 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12918 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12919 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12920 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12921 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12923 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12924 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12925 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12926 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12927 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12928 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12930 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12931 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12932 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12933 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12935 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12937 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12938 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
12939 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12940 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
12943 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
12944 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
12945 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
12946 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
12947 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
12948 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
12951 static const struct builtin_description bdesc_1arg
[] =
12953 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12954 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12956 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12957 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12958 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12960 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12961 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12962 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
12963 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12964 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12965 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
12967 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12968 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12969 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12970 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12972 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12974 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12975 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12977 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12978 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12979 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12980 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12981 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12983 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12985 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12986 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12987 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
12988 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
12990 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12991 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12992 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12994 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
12997 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
12998 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
12999 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
13003 ix86_init_builtins (void)
13006 ix86_init_mmx_sse_builtins ();
13009 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13010 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13013 ix86_init_mmx_sse_builtins (void)
13015 const struct builtin_description
* d
;
13018 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
13019 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
13020 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
13021 tree V2DI_type_node
= build_vector_type_for_mode (intDI_type_node
, V2DImode
);
13022 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
13023 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
13024 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
13025 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
13026 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
13027 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
13029 tree pchar_type_node
= build_pointer_type (char_type_node
);
13030 tree pcchar_type_node
= build_pointer_type (
13031 build_type_variant (char_type_node
, 1, 0));
13032 tree pfloat_type_node
= build_pointer_type (float_type_node
);
13033 tree pcfloat_type_node
= build_pointer_type (
13034 build_type_variant (float_type_node
, 1, 0));
13035 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
13036 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
13037 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
13040 tree int_ftype_v4sf_v4sf
13041 = build_function_type_list (integer_type_node
,
13042 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13043 tree v4si_ftype_v4sf_v4sf
13044 = build_function_type_list (V4SI_type_node
,
13045 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13046 /* MMX/SSE/integer conversions. */
13047 tree int_ftype_v4sf
13048 = build_function_type_list (integer_type_node
,
13049 V4SF_type_node
, NULL_TREE
);
13050 tree int64_ftype_v4sf
13051 = build_function_type_list (long_long_integer_type_node
,
13052 V4SF_type_node
, NULL_TREE
);
13053 tree int_ftype_v8qi
13054 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
13055 tree v4sf_ftype_v4sf_int
13056 = build_function_type_list (V4SF_type_node
,
13057 V4SF_type_node
, integer_type_node
, NULL_TREE
);
13058 tree v4sf_ftype_v4sf_int64
13059 = build_function_type_list (V4SF_type_node
,
13060 V4SF_type_node
, long_long_integer_type_node
,
13062 tree v4sf_ftype_v4sf_v2si
13063 = build_function_type_list (V4SF_type_node
,
13064 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
13065 tree int_ftype_v4hi_int
13066 = build_function_type_list (integer_type_node
,
13067 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13068 tree v4hi_ftype_v4hi_int_int
13069 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
13070 integer_type_node
, integer_type_node
,
13072 /* Miscellaneous. */
13073 tree v8qi_ftype_v4hi_v4hi
13074 = build_function_type_list (V8QI_type_node
,
13075 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13076 tree v4hi_ftype_v2si_v2si
13077 = build_function_type_list (V4HI_type_node
,
13078 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13079 tree v4sf_ftype_v4sf_v4sf_int
13080 = build_function_type_list (V4SF_type_node
,
13081 V4SF_type_node
, V4SF_type_node
,
13082 integer_type_node
, NULL_TREE
);
13083 tree v2si_ftype_v4hi_v4hi
13084 = build_function_type_list (V2SI_type_node
,
13085 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13086 tree v4hi_ftype_v4hi_int
13087 = build_function_type_list (V4HI_type_node
,
13088 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13089 tree v4hi_ftype_v4hi_di
13090 = build_function_type_list (V4HI_type_node
,
13091 V4HI_type_node
, long_long_unsigned_type_node
,
13093 tree v2si_ftype_v2si_di
13094 = build_function_type_list (V2SI_type_node
,
13095 V2SI_type_node
, long_long_unsigned_type_node
,
13097 tree void_ftype_void
13098 = build_function_type (void_type_node
, void_list_node
);
13099 tree void_ftype_unsigned
13100 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
13101 tree void_ftype_unsigned_unsigned
13102 = build_function_type_list (void_type_node
, unsigned_type_node
,
13103 unsigned_type_node
, NULL_TREE
);
13104 tree void_ftype_pcvoid_unsigned_unsigned
13105 = build_function_type_list (void_type_node
, const_ptr_type_node
,
13106 unsigned_type_node
, unsigned_type_node
,
13108 tree unsigned_ftype_void
13109 = build_function_type (unsigned_type_node
, void_list_node
);
13111 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
13112 tree v4sf_ftype_void
13113 = build_function_type (V4SF_type_node
, void_list_node
);
13114 tree v2si_ftype_v4sf
13115 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
13116 /* Loads/stores. */
13117 tree void_ftype_v8qi_v8qi_pchar
13118 = build_function_type_list (void_type_node
,
13119 V8QI_type_node
, V8QI_type_node
,
13120 pchar_type_node
, NULL_TREE
);
13121 tree v4sf_ftype_pcfloat
13122 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
13123 /* @@@ the type is bogus */
13124 tree v4sf_ftype_v4sf_pv2si
13125 = build_function_type_list (V4SF_type_node
,
13126 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
13127 tree void_ftype_pv2si_v4sf
13128 = build_function_type_list (void_type_node
,
13129 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
13130 tree void_ftype_pfloat_v4sf
13131 = build_function_type_list (void_type_node
,
13132 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
13133 tree void_ftype_pdi_di
13134 = build_function_type_list (void_type_node
,
13135 pdi_type_node
, long_long_unsigned_type_node
,
13137 tree void_ftype_pv2di_v2di
13138 = build_function_type_list (void_type_node
,
13139 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
13140 /* Normal vector unops. */
13141 tree v4sf_ftype_v4sf
13142 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13144 /* Normal vector binops. */
13145 tree v4sf_ftype_v4sf_v4sf
13146 = build_function_type_list (V4SF_type_node
,
13147 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13148 tree v8qi_ftype_v8qi_v8qi
13149 = build_function_type_list (V8QI_type_node
,
13150 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13151 tree v4hi_ftype_v4hi_v4hi
13152 = build_function_type_list (V4HI_type_node
,
13153 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13154 tree v2si_ftype_v2si_v2si
13155 = build_function_type_list (V2SI_type_node
,
13156 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13157 tree di_ftype_di_di
13158 = build_function_type_list (long_long_unsigned_type_node
,
13159 long_long_unsigned_type_node
,
13160 long_long_unsigned_type_node
, NULL_TREE
);
13162 tree v2si_ftype_v2sf
13163 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
13164 tree v2sf_ftype_v2si
13165 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
13166 tree v2si_ftype_v2si
13167 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13168 tree v2sf_ftype_v2sf
13169 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13170 tree v2sf_ftype_v2sf_v2sf
13171 = build_function_type_list (V2SF_type_node
,
13172 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13173 tree v2si_ftype_v2sf_v2sf
13174 = build_function_type_list (V2SI_type_node
,
13175 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13176 tree pint_type_node
= build_pointer_type (integer_type_node
);
13177 tree pcint_type_node
= build_pointer_type (
13178 build_type_variant (integer_type_node
, 1, 0));
13179 tree pdouble_type_node
= build_pointer_type (double_type_node
);
13180 tree pcdouble_type_node
= build_pointer_type (
13181 build_type_variant (double_type_node
, 1, 0));
13182 tree int_ftype_v2df_v2df
13183 = build_function_type_list (integer_type_node
,
13184 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13187 = build_function_type (intTI_type_node
, void_list_node
);
13188 tree v2di_ftype_void
13189 = build_function_type (V2DI_type_node
, void_list_node
);
13190 tree ti_ftype_ti_ti
13191 = build_function_type_list (intTI_type_node
,
13192 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13193 tree void_ftype_pcvoid
13194 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13196 = build_function_type_list (V2DI_type_node
,
13197 long_long_unsigned_type_node
, NULL_TREE
);
13199 = build_function_type_list (long_long_unsigned_type_node
,
13200 V2DI_type_node
, NULL_TREE
);
13201 tree v4sf_ftype_v4si
13202 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13203 tree v4si_ftype_v4sf
13204 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13205 tree v2df_ftype_v4si
13206 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13207 tree v4si_ftype_v2df
13208 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13209 tree v2si_ftype_v2df
13210 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13211 tree v4sf_ftype_v2df
13212 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13213 tree v2df_ftype_v2si
13214 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13215 tree v2df_ftype_v4sf
13216 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13217 tree int_ftype_v2df
13218 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13219 tree int64_ftype_v2df
13220 = build_function_type_list (long_long_integer_type_node
,
13221 V2DF_type_node
, NULL_TREE
);
13222 tree v2df_ftype_v2df_int
13223 = build_function_type_list (V2DF_type_node
,
13224 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13225 tree v2df_ftype_v2df_int64
13226 = build_function_type_list (V2DF_type_node
,
13227 V2DF_type_node
, long_long_integer_type_node
,
13229 tree v4sf_ftype_v4sf_v2df
13230 = build_function_type_list (V4SF_type_node
,
13231 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13232 tree v2df_ftype_v2df_v4sf
13233 = build_function_type_list (V2DF_type_node
,
13234 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13235 tree v2df_ftype_v2df_v2df_int
13236 = build_function_type_list (V2DF_type_node
,
13237 V2DF_type_node
, V2DF_type_node
,
13240 tree v2df_ftype_v2df_pv2si
13241 = build_function_type_list (V2DF_type_node
,
13242 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
13243 tree void_ftype_pv2si_v2df
13244 = build_function_type_list (void_type_node
,
13245 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
13246 tree void_ftype_pdouble_v2df
13247 = build_function_type_list (void_type_node
,
13248 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13249 tree void_ftype_pint_int
13250 = build_function_type_list (void_type_node
,
13251 pint_type_node
, integer_type_node
, NULL_TREE
);
13252 tree void_ftype_v16qi_v16qi_pchar
13253 = build_function_type_list (void_type_node
,
13254 V16QI_type_node
, V16QI_type_node
,
13255 pchar_type_node
, NULL_TREE
);
13256 tree v2df_ftype_pcdouble
13257 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13258 tree v2df_ftype_v2df_v2df
13259 = build_function_type_list (V2DF_type_node
,
13260 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13261 tree v16qi_ftype_v16qi_v16qi
13262 = build_function_type_list (V16QI_type_node
,
13263 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13264 tree v8hi_ftype_v8hi_v8hi
13265 = build_function_type_list (V8HI_type_node
,
13266 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13267 tree v4si_ftype_v4si_v4si
13268 = build_function_type_list (V4SI_type_node
,
13269 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13270 tree v2di_ftype_v2di_v2di
13271 = build_function_type_list (V2DI_type_node
,
13272 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13273 tree v2di_ftype_v2df_v2df
13274 = build_function_type_list (V2DI_type_node
,
13275 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13276 tree v2df_ftype_v2df
13277 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13278 tree v2df_ftype_double
13279 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
13280 tree v2df_ftype_double_double
13281 = build_function_type_list (V2DF_type_node
,
13282 double_type_node
, double_type_node
, NULL_TREE
);
13283 tree int_ftype_v8hi_int
13284 = build_function_type_list (integer_type_node
,
13285 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13286 tree v8hi_ftype_v8hi_int_int
13287 = build_function_type_list (V8HI_type_node
,
13288 V8HI_type_node
, integer_type_node
,
13289 integer_type_node
, NULL_TREE
);
13290 tree v2di_ftype_v2di_int
13291 = build_function_type_list (V2DI_type_node
,
13292 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13293 tree v4si_ftype_v4si_int
13294 = build_function_type_list (V4SI_type_node
,
13295 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13296 tree v8hi_ftype_v8hi_int
13297 = build_function_type_list (V8HI_type_node
,
13298 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13299 tree v8hi_ftype_v8hi_v2di
13300 = build_function_type_list (V8HI_type_node
,
13301 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13302 tree v4si_ftype_v4si_v2di
13303 = build_function_type_list (V4SI_type_node
,
13304 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13305 tree v4si_ftype_v8hi_v8hi
13306 = build_function_type_list (V4SI_type_node
,
13307 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13308 tree di_ftype_v8qi_v8qi
13309 = build_function_type_list (long_long_unsigned_type_node
,
13310 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13311 tree di_ftype_v2si_v2si
13312 = build_function_type_list (long_long_unsigned_type_node
,
13313 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13314 tree v2di_ftype_v16qi_v16qi
13315 = build_function_type_list (V2DI_type_node
,
13316 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13317 tree v2di_ftype_v4si_v4si
13318 = build_function_type_list (V2DI_type_node
,
13319 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13320 tree int_ftype_v16qi
13321 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13322 tree v16qi_ftype_pcchar
13323 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13324 tree void_ftype_pchar_v16qi
13325 = build_function_type_list (void_type_node
,
13326 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13327 tree v4si_ftype_pcint
13328 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
13329 tree void_ftype_pcint_v4si
13330 = build_function_type_list (void_type_node
,
13331 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
13332 tree v2di_ftype_v2di
13333 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13336 tree float128_type
;
13338 /* The __float80 type. */
13339 if (TYPE_MODE (long_double_type_node
) == XFmode
)
13340 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
13344 /* The __float80 type. */
13345 float80_type
= make_node (REAL_TYPE
);
13346 TYPE_PRECISION (float80_type
) = 96;
13347 layout_type (float80_type
);
13348 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
13351 float128_type
= make_node (REAL_TYPE
);
13352 TYPE_PRECISION (float128_type
) = 128;
13353 layout_type (float128_type
);
13354 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
13356 /* Add all builtins that are more or less simple operations on two
13358 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13360 /* Use one of the operands; the target can have a different mode for
13361 mask-generating compares. */
13362 enum machine_mode mode
;
13367 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13372 type
= v16qi_ftype_v16qi_v16qi
;
13375 type
= v8hi_ftype_v8hi_v8hi
;
13378 type
= v4si_ftype_v4si_v4si
;
13381 type
= v2di_ftype_v2di_v2di
;
13384 type
= v2df_ftype_v2df_v2df
;
13387 type
= ti_ftype_ti_ti
;
13390 type
= v4sf_ftype_v4sf_v4sf
;
13393 type
= v8qi_ftype_v8qi_v8qi
;
13396 type
= v4hi_ftype_v4hi_v4hi
;
13399 type
= v2si_ftype_v2si_v2si
;
13402 type
= di_ftype_di_di
;
13409 /* Override for comparisons. */
13410 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13411 || d
->icode
== CODE_FOR_maskncmpv4sf3
13412 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13413 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
13414 type
= v4si_ftype_v4sf_v4sf
;
13416 if (d
->icode
== CODE_FOR_maskcmpv2df3
13417 || d
->icode
== CODE_FOR_maskncmpv2df3
13418 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13419 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13420 type
= v2di_ftype_v2df_v2df
;
13422 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13425 /* Add the remaining MMX insns with somewhat more complicated types. */
13426 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
13427 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13428 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13429 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
13430 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
13432 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
13433 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
13434 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
13436 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
13437 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
13439 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
13440 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
13442 /* comi/ucomi insns. */
13443 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13444 if (d
->mask
== MASK_SSE2
)
13445 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
13447 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
13449 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
13450 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
13451 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
13453 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
13454 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
13455 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
13456 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
13457 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
13458 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
13459 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
13460 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
13461 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
13462 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
13463 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
13465 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
13466 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
13468 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
13470 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
13471 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
13472 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
13473 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
13474 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
13475 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
13477 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
13478 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
13479 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
13480 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13482 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13483 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13484 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13485 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13487 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13489 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13491 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13492 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13493 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13494 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13495 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13496 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13498 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13500 /* Original 3DNow! */
13501 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13502 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13503 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13504 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13505 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13506 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13507 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13508 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13509 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13510 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13511 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13512 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13513 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13514 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13515 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13516 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13517 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13518 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13519 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13520 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13522 /* 3DNow! extension as used in the Athlon CPU. */
13523 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13524 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13525 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13526 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13527 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13528 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13530 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13533 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13534 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13536 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13537 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13538 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13540 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13541 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13542 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13543 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13544 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13545 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13547 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
13548 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
13549 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
13550 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
13552 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13553 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13554 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13555 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13556 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13558 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13559 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13560 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13561 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13563 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13564 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13566 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13568 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13569 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13571 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13572 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13573 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13574 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13575 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13577 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13579 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13580 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13581 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13582 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13584 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13585 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13586 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13588 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13589 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13590 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13591 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13593 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13594 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13595 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13596 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13597 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13598 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13599 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13601 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13602 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13603 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13605 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13606 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13607 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13608 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13609 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13610 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13611 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13613 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13615 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
13616 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
13618 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13619 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13620 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13622 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13623 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13624 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13626 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13627 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13629 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13630 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13631 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13632 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13634 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13635 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13636 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13637 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13639 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13640 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13642 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13644 /* Prescott New Instructions. */
13645 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
13646 void_ftype_pcvoid_unsigned_unsigned
,
13647 IX86_BUILTIN_MONITOR
);
13648 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
13649 void_ftype_unsigned_unsigned
,
13650 IX86_BUILTIN_MWAIT
);
13651 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
13653 IX86_BUILTIN_MOVSHDUP
);
13654 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
13656 IX86_BUILTIN_MOVSLDUP
);
13657 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
13658 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
13659 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
13660 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
13661 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
13662 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
13665 /* Errors in the source file can cause expand_expr to return const0_rtx
13666 where we expect a vector. To avoid crashing, use one of the vector
13667 clear instructions. */
13669 safe_vector_operand (rtx x
, enum machine_mode mode
)
13671 if (x
!= const0_rtx
)
13673 x
= gen_reg_rtx (mode
);
13675 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
13676 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
13677 : gen_rtx_SUBREG (DImode
, x
, 0)));
13679 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
13680 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
13681 CONST0_RTX (V4SFmode
)));
13685 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13688 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13691 tree arg0
= TREE_VALUE (arglist
);
13692 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13693 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13694 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13695 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13696 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13697 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13699 if (VECTOR_MODE_P (mode0
))
13700 op0
= safe_vector_operand (op0
, mode0
);
13701 if (VECTOR_MODE_P (mode1
))
13702 op1
= safe_vector_operand (op1
, mode1
);
13705 || GET_MODE (target
) != tmode
13706 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13707 target
= gen_reg_rtx (tmode
);
13709 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
13711 rtx x
= gen_reg_rtx (V4SImode
);
13712 emit_insn (gen_sse2_loadd (x
, op1
));
13713 op1
= gen_lowpart (TImode
, x
);
13716 /* In case the insn wants input operands in modes different from
13717 the result, abort. */
13718 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
13719 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
13722 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13723 op0
= copy_to_mode_reg (mode0
, op0
);
13724 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13725 op1
= copy_to_mode_reg (mode1
, op1
);
13727 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13728 yet one of the two must not be a memory. This is normally enforced
13729 by expanders, but we didn't bother to create one here. */
13730 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
13731 op0
= copy_to_mode_reg (mode0
, op0
);
13733 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13740 /* Subroutine of ix86_expand_builtin to take care of stores. */
13743 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
13746 tree arg0
= TREE_VALUE (arglist
);
13747 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13748 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13749 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13750 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13751 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13753 if (VECTOR_MODE_P (mode1
))
13754 op1
= safe_vector_operand (op1
, mode1
);
13756 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13757 op1
= copy_to_mode_reg (mode1
, op1
);
13759 pat
= GEN_FCN (icode
) (op0
, op1
);
13765 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13768 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
13769 rtx target
, int do_load
)
13772 tree arg0
= TREE_VALUE (arglist
);
13773 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13774 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13775 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13778 || GET_MODE (target
) != tmode
13779 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13780 target
= gen_reg_rtx (tmode
);
13782 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13785 if (VECTOR_MODE_P (mode0
))
13786 op0
= safe_vector_operand (op0
, mode0
);
13788 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13789 op0
= copy_to_mode_reg (mode0
, op0
);
13792 pat
= GEN_FCN (icode
) (target
, op0
);
13799 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13800 sqrtss, rsqrtss, rcpss. */
13803 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13806 tree arg0
= TREE_VALUE (arglist
);
13807 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13808 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13809 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13812 || GET_MODE (target
) != tmode
13813 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13814 target
= gen_reg_rtx (tmode
);
13816 if (VECTOR_MODE_P (mode0
))
13817 op0
= safe_vector_operand (op0
, mode0
);
13819 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13820 op0
= copy_to_mode_reg (mode0
, op0
);
13823 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13824 op1
= copy_to_mode_reg (mode0
, op1
);
13826 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13833 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13836 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
13840 tree arg0
= TREE_VALUE (arglist
);
13841 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13842 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13843 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13845 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13846 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13847 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13848 enum rtx_code comparison
= d
->comparison
;
13850 if (VECTOR_MODE_P (mode0
))
13851 op0
= safe_vector_operand (op0
, mode0
);
13852 if (VECTOR_MODE_P (mode1
))
13853 op1
= safe_vector_operand (op1
, mode1
);
13855 /* Swap operands if we have a comparison that isn't available in
13859 rtx tmp
= gen_reg_rtx (mode1
);
13860 emit_move_insn (tmp
, op1
);
13866 || GET_MODE (target
) != tmode
13867 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13868 target
= gen_reg_rtx (tmode
);
13870 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13871 op0
= copy_to_mode_reg (mode0
, op0
);
13872 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13873 op1
= copy_to_mode_reg (mode1
, op1
);
13875 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13876 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13883 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13886 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
13890 tree arg0
= TREE_VALUE (arglist
);
13891 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13892 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13893 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13895 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13896 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13897 enum rtx_code comparison
= d
->comparison
;
13899 if (VECTOR_MODE_P (mode0
))
13900 op0
= safe_vector_operand (op0
, mode0
);
13901 if (VECTOR_MODE_P (mode1
))
13902 op1
= safe_vector_operand (op1
, mode1
);
13904 /* Swap operands if we have a comparison that isn't available in
13913 target
= gen_reg_rtx (SImode
);
13914 emit_move_insn (target
, const0_rtx
);
13915 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13917 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13918 op0
= copy_to_mode_reg (mode0
, op0
);
13919 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13920 op1
= copy_to_mode_reg (mode1
, op1
);
13922 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13923 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13927 emit_insn (gen_rtx_SET (VOIDmode
,
13928 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13929 gen_rtx_fmt_ee (comparison
, QImode
,
13933 return SUBREG_REG (target
);
13936 /* Expand an expression EXP that calls a built-in function,
13937 with result going to TARGET if that's convenient
13938 (and in mode MODE if that's convenient).
13939 SUBTARGET may be used as the target for computing one of EXP's operands.
13940 IGNORE is nonzero if the value is to be ignored. */
13943 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
13944 enum machine_mode mode ATTRIBUTE_UNUSED
,
13945 int ignore ATTRIBUTE_UNUSED
)
13947 const struct builtin_description
*d
;
13949 enum insn_code icode
;
13950 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13951 tree arglist
= TREE_OPERAND (exp
, 1);
13952 tree arg0
, arg1
, arg2
;
13953 rtx op0
, op1
, op2
, pat
;
13954 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13955 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13959 case IX86_BUILTIN_EMMS
:
13960 emit_insn (gen_emms ());
13963 case IX86_BUILTIN_SFENCE
:
13964 emit_insn (gen_sfence ());
13967 case IX86_BUILTIN_PEXTRW
:
13968 case IX86_BUILTIN_PEXTRW128
:
13969 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13970 ? CODE_FOR_mmx_pextrw
13971 : CODE_FOR_sse2_pextrw
);
13972 arg0
= TREE_VALUE (arglist
);
13973 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13974 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13975 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13976 tmode
= insn_data
[icode
].operand
[0].mode
;
13977 mode0
= insn_data
[icode
].operand
[1].mode
;
13978 mode1
= insn_data
[icode
].operand
[2].mode
;
13980 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13981 op0
= copy_to_mode_reg (mode0
, op0
);
13982 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13984 error ("selector must be an integer constant in the range 0..%i",
13985 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
13986 return gen_reg_rtx (tmode
);
13989 || GET_MODE (target
) != tmode
13990 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13991 target
= gen_reg_rtx (tmode
);
13992 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13998 case IX86_BUILTIN_PINSRW
:
13999 case IX86_BUILTIN_PINSRW128
:
14000 icode
= (fcode
== IX86_BUILTIN_PINSRW
14001 ? CODE_FOR_mmx_pinsrw
14002 : CODE_FOR_sse2_pinsrw
);
14003 arg0
= TREE_VALUE (arglist
);
14004 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14005 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14006 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14007 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14008 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14009 tmode
= insn_data
[icode
].operand
[0].mode
;
14010 mode0
= insn_data
[icode
].operand
[1].mode
;
14011 mode1
= insn_data
[icode
].operand
[2].mode
;
14012 mode2
= insn_data
[icode
].operand
[3].mode
;
14014 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14015 op0
= copy_to_mode_reg (mode0
, op0
);
14016 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14017 op1
= copy_to_mode_reg (mode1
, op1
);
14018 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14020 error ("selector must be an integer constant in the range 0..%i",
14021 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
14025 || GET_MODE (target
) != tmode
14026 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14027 target
= gen_reg_rtx (tmode
);
14028 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14034 case IX86_BUILTIN_MASKMOVQ
:
14035 case IX86_BUILTIN_MASKMOVDQU
:
14036 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
14037 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
14038 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
14039 : CODE_FOR_sse2_maskmovdqu
));
14040 /* Note the arg order is different from the operand order. */
14041 arg1
= TREE_VALUE (arglist
);
14042 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
14043 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14044 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14045 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14046 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14047 mode0
= insn_data
[icode
].operand
[0].mode
;
14048 mode1
= insn_data
[icode
].operand
[1].mode
;
14049 mode2
= insn_data
[icode
].operand
[2].mode
;
14051 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14052 op0
= copy_to_mode_reg (mode0
, op0
);
14053 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14054 op1
= copy_to_mode_reg (mode1
, op1
);
14055 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
14056 op2
= copy_to_mode_reg (mode2
, op2
);
14057 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
14063 case IX86_BUILTIN_SQRTSS
:
14064 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
14065 case IX86_BUILTIN_RSQRTSS
:
14066 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
14067 case IX86_BUILTIN_RCPSS
:
14068 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
14070 case IX86_BUILTIN_LOADAPS
:
14071 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
14073 case IX86_BUILTIN_LOADUPS
:
14074 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
14076 case IX86_BUILTIN_STOREAPS
:
14077 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
14079 case IX86_BUILTIN_STOREUPS
:
14080 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
14082 case IX86_BUILTIN_LOADSS
:
14083 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
14085 case IX86_BUILTIN_STORESS
:
14086 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
14088 case IX86_BUILTIN_LOADHPS
:
14089 case IX86_BUILTIN_LOADLPS
:
14090 case IX86_BUILTIN_LOADHPD
:
14091 case IX86_BUILTIN_LOADLPD
:
14092 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
14093 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
14094 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
14095 : CODE_FOR_sse2_movsd
);
14096 arg0
= TREE_VALUE (arglist
);
14097 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14098 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14099 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14100 tmode
= insn_data
[icode
].operand
[0].mode
;
14101 mode0
= insn_data
[icode
].operand
[1].mode
;
14102 mode1
= insn_data
[icode
].operand
[2].mode
;
14104 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14105 op0
= copy_to_mode_reg (mode0
, op0
);
14106 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
14108 || GET_MODE (target
) != tmode
14109 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14110 target
= gen_reg_rtx (tmode
);
14111 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14117 case IX86_BUILTIN_STOREHPS
:
14118 case IX86_BUILTIN_STORELPS
:
14119 case IX86_BUILTIN_STOREHPD
:
14120 case IX86_BUILTIN_STORELPD
:
14121 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
14122 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
14123 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
14124 : CODE_FOR_sse2_movsd
);
14125 arg0
= TREE_VALUE (arglist
);
14126 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14127 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14128 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14129 mode0
= insn_data
[icode
].operand
[1].mode
;
14130 mode1
= insn_data
[icode
].operand
[2].mode
;
14132 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14133 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14134 op1
= copy_to_mode_reg (mode1
, op1
);
14136 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
14142 case IX86_BUILTIN_MOVNTPS
:
14143 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
14144 case IX86_BUILTIN_MOVNTQ
:
14145 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
14147 case IX86_BUILTIN_LDMXCSR
:
14148 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14149 target
= assign_386_stack_local (SImode
, 0);
14150 emit_move_insn (target
, op0
);
14151 emit_insn (gen_ldmxcsr (target
));
14154 case IX86_BUILTIN_STMXCSR
:
14155 target
= assign_386_stack_local (SImode
, 0);
14156 emit_insn (gen_stmxcsr (target
));
14157 return copy_to_mode_reg (SImode
, target
);
14159 case IX86_BUILTIN_SHUFPS
:
14160 case IX86_BUILTIN_SHUFPD
:
14161 icode
= (fcode
== IX86_BUILTIN_SHUFPS
14162 ? CODE_FOR_sse_shufps
14163 : CODE_FOR_sse2_shufpd
);
14164 arg0
= TREE_VALUE (arglist
);
14165 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14166 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14167 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14168 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14169 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14170 tmode
= insn_data
[icode
].operand
[0].mode
;
14171 mode0
= insn_data
[icode
].operand
[1].mode
;
14172 mode1
= insn_data
[icode
].operand
[2].mode
;
14173 mode2
= insn_data
[icode
].operand
[3].mode
;
14175 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14176 op0
= copy_to_mode_reg (mode0
, op0
);
14177 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14178 op1
= copy_to_mode_reg (mode1
, op1
);
14179 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14181 /* @@@ better error message */
14182 error ("mask must be an immediate");
14183 return gen_reg_rtx (tmode
);
14186 || GET_MODE (target
) != tmode
14187 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14188 target
= gen_reg_rtx (tmode
);
14189 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14195 case IX86_BUILTIN_PSHUFW
:
14196 case IX86_BUILTIN_PSHUFD
:
14197 case IX86_BUILTIN_PSHUFHW
:
14198 case IX86_BUILTIN_PSHUFLW
:
14199 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
14200 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
14201 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
14202 : CODE_FOR_mmx_pshufw
);
14203 arg0
= TREE_VALUE (arglist
);
14204 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14205 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14206 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14207 tmode
= insn_data
[icode
].operand
[0].mode
;
14208 mode1
= insn_data
[icode
].operand
[1].mode
;
14209 mode2
= insn_data
[icode
].operand
[2].mode
;
14211 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14212 op0
= copy_to_mode_reg (mode1
, op0
);
14213 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14215 /* @@@ better error message */
14216 error ("mask must be an immediate");
14220 || GET_MODE (target
) != tmode
14221 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14222 target
= gen_reg_rtx (tmode
);
14223 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14229 case IX86_BUILTIN_PSLLDQI128
:
14230 case IX86_BUILTIN_PSRLDQI128
:
14231 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
14232 : CODE_FOR_sse2_lshrti3
);
14233 arg0
= TREE_VALUE (arglist
);
14234 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14235 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14236 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14237 tmode
= insn_data
[icode
].operand
[0].mode
;
14238 mode1
= insn_data
[icode
].operand
[1].mode
;
14239 mode2
= insn_data
[icode
].operand
[2].mode
;
14241 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14243 op0
= copy_to_reg (op0
);
14244 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14246 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14248 error ("shift must be an immediate");
14251 target
= gen_reg_rtx (V2DImode
);
14252 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14258 case IX86_BUILTIN_FEMMS
:
14259 emit_insn (gen_femms ());
14262 case IX86_BUILTIN_PAVGUSB
:
14263 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
14265 case IX86_BUILTIN_PF2ID
:
14266 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
14268 case IX86_BUILTIN_PFACC
:
14269 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
14271 case IX86_BUILTIN_PFADD
:
14272 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
14274 case IX86_BUILTIN_PFCMPEQ
:
14275 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
14277 case IX86_BUILTIN_PFCMPGE
:
14278 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
14280 case IX86_BUILTIN_PFCMPGT
:
14281 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
14283 case IX86_BUILTIN_PFMAX
:
14284 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
14286 case IX86_BUILTIN_PFMIN
:
14287 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
14289 case IX86_BUILTIN_PFMUL
:
14290 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
14292 case IX86_BUILTIN_PFRCP
:
14293 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
14295 case IX86_BUILTIN_PFRCPIT1
:
14296 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
14298 case IX86_BUILTIN_PFRCPIT2
:
14299 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
14301 case IX86_BUILTIN_PFRSQIT1
:
14302 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
14304 case IX86_BUILTIN_PFRSQRT
:
14305 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
14307 case IX86_BUILTIN_PFSUB
:
14308 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
14310 case IX86_BUILTIN_PFSUBR
:
14311 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
14313 case IX86_BUILTIN_PI2FD
:
14314 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
14316 case IX86_BUILTIN_PMULHRW
:
14317 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
14319 case IX86_BUILTIN_PF2IW
:
14320 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
14322 case IX86_BUILTIN_PFNACC
:
14323 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
14325 case IX86_BUILTIN_PFPNACC
:
14326 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
14328 case IX86_BUILTIN_PI2FW
:
14329 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
14331 case IX86_BUILTIN_PSWAPDSI
:
14332 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
14334 case IX86_BUILTIN_PSWAPDSF
:
14335 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
14337 case IX86_BUILTIN_SSE_ZERO
:
14338 target
= gen_reg_rtx (V4SFmode
);
14339 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
14342 case IX86_BUILTIN_MMX_ZERO
:
14343 target
= gen_reg_rtx (DImode
);
14344 emit_insn (gen_mmx_clrdi (target
));
14347 case IX86_BUILTIN_CLRTI
:
14348 target
= gen_reg_rtx (V2DImode
);
14349 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
14353 case IX86_BUILTIN_SQRTSD
:
14354 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
14355 case IX86_BUILTIN_LOADAPD
:
14356 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
14357 case IX86_BUILTIN_LOADUPD
:
14358 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14360 case IX86_BUILTIN_STOREAPD
:
14361 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14362 case IX86_BUILTIN_STOREUPD
:
14363 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14365 case IX86_BUILTIN_LOADSD
:
14366 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
14368 case IX86_BUILTIN_STORESD
:
14369 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
14371 case IX86_BUILTIN_SETPD1
:
14372 target
= assign_386_stack_local (DFmode
, 0);
14373 arg0
= TREE_VALUE (arglist
);
14374 emit_move_insn (adjust_address (target
, DFmode
, 0),
14375 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14376 op0
= gen_reg_rtx (V2DFmode
);
14377 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
14378 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
14381 case IX86_BUILTIN_SETPD
:
14382 target
= assign_386_stack_local (V2DFmode
, 0);
14383 arg0
= TREE_VALUE (arglist
);
14384 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14385 emit_move_insn (adjust_address (target
, DFmode
, 0),
14386 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14387 emit_move_insn (adjust_address (target
, DFmode
, 8),
14388 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
14389 op0
= gen_reg_rtx (V2DFmode
);
14390 emit_insn (gen_sse2_movapd (op0
, target
));
14393 case IX86_BUILTIN_LOADRPD
:
14394 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
14395 gen_reg_rtx (V2DFmode
), 1);
14396 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
14399 case IX86_BUILTIN_LOADPD1
:
14400 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
14401 gen_reg_rtx (V2DFmode
), 1);
14402 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
14405 case IX86_BUILTIN_STOREPD1
:
14406 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14407 case IX86_BUILTIN_STORERPD
:
14408 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14410 case IX86_BUILTIN_CLRPD
:
14411 target
= gen_reg_rtx (V2DFmode
);
14412 emit_insn (gen_sse_clrv2df (target
));
14415 case IX86_BUILTIN_MFENCE
:
14416 emit_insn (gen_sse2_mfence ());
14418 case IX86_BUILTIN_LFENCE
:
14419 emit_insn (gen_sse2_lfence ());
14422 case IX86_BUILTIN_CLFLUSH
:
14423 arg0
= TREE_VALUE (arglist
);
14424 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14425 icode
= CODE_FOR_sse2_clflush
;
14426 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
14427 op0
= copy_to_mode_reg (Pmode
, op0
);
14429 emit_insn (gen_sse2_clflush (op0
));
14432 case IX86_BUILTIN_MOVNTPD
:
14433 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
14434 case IX86_BUILTIN_MOVNTDQ
:
14435 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
14436 case IX86_BUILTIN_MOVNTI
:
14437 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
14439 case IX86_BUILTIN_LOADDQA
:
14440 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
14441 case IX86_BUILTIN_LOADDQU
:
14442 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
14443 case IX86_BUILTIN_LOADD
:
14444 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
14446 case IX86_BUILTIN_STOREDQA
:
14447 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
14448 case IX86_BUILTIN_STOREDQU
:
14449 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
14450 case IX86_BUILTIN_STORED
:
14451 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
14453 case IX86_BUILTIN_MONITOR
:
14454 arg0
= TREE_VALUE (arglist
);
14455 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14456 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14457 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14458 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14459 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14461 op0
= copy_to_mode_reg (SImode
, op0
);
14463 op1
= copy_to_mode_reg (SImode
, op1
);
14465 op2
= copy_to_mode_reg (SImode
, op2
);
14466 emit_insn (gen_monitor (op0
, op1
, op2
));
14469 case IX86_BUILTIN_MWAIT
:
14470 arg0
= TREE_VALUE (arglist
);
14471 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14472 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14473 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14475 op0
= copy_to_mode_reg (SImode
, op0
);
14477 op1
= copy_to_mode_reg (SImode
, op1
);
14478 emit_insn (gen_mwait (op0
, op1
));
14481 case IX86_BUILTIN_LOADDDUP
:
14482 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
14484 case IX86_BUILTIN_LDDQU
:
14485 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
14492 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14493 if (d
->code
== fcode
)
14495 /* Compares are treated specially. */
14496 if (d
->icode
== CODE_FOR_maskcmpv4sf3
14497 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
14498 || d
->icode
== CODE_FOR_maskncmpv4sf3
14499 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
14500 || d
->icode
== CODE_FOR_maskcmpv2df3
14501 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
14502 || d
->icode
== CODE_FOR_maskncmpv2df3
14503 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
14504 return ix86_expand_sse_compare (d
, arglist
, target
);
14506 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14509 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14510 if (d
->code
== fcode
)
14511 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14513 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14514 if (d
->code
== fcode
)
14515 return ix86_expand_sse_comi (d
, arglist
, target
);
14517 /* @@@ Should really do something sensible here. */
14521 /* Store OPERAND to the memory after reload is completed. This means
14522 that we can't easily use assign_stack_local. */
14524 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
14527 if (!reload_completed
)
14529 if (TARGET_RED_ZONE
)
14531 result
= gen_rtx_MEM (mode
,
14532 gen_rtx_PLUS (Pmode
,
14534 GEN_INT (-RED_ZONE_SIZE
)));
14535 emit_move_insn (result
, operand
);
14537 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
14543 operand
= gen_lowpart (DImode
, operand
);
14547 gen_rtx_SET (VOIDmode
,
14548 gen_rtx_MEM (DImode
,
14549 gen_rtx_PRE_DEC (DImode
,
14550 stack_pointer_rtx
)),
14556 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14565 split_di (&operand
, 1, operands
, operands
+ 1);
14567 gen_rtx_SET (VOIDmode
,
14568 gen_rtx_MEM (SImode
,
14569 gen_rtx_PRE_DEC (Pmode
,
14570 stack_pointer_rtx
)),
14573 gen_rtx_SET (VOIDmode
,
14574 gen_rtx_MEM (SImode
,
14575 gen_rtx_PRE_DEC (Pmode
,
14576 stack_pointer_rtx
)),
14581 /* It is better to store HImodes as SImodes. */
14582 if (!TARGET_PARTIAL_REG_STALL
)
14583 operand
= gen_lowpart (SImode
, operand
);
14587 gen_rtx_SET (VOIDmode
,
14588 gen_rtx_MEM (GET_MODE (operand
),
14589 gen_rtx_PRE_DEC (SImode
,
14590 stack_pointer_rtx
)),
14596 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14601 /* Free operand from the memory. */
14603 ix86_free_from_memory (enum machine_mode mode
)
14605 if (!TARGET_RED_ZONE
)
14609 if (mode
== DImode
|| TARGET_64BIT
)
14611 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14615 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14616 to pop or add instruction if registers are available. */
14617 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14618 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14623 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14624 QImode must go into class Q_REGS.
14625 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14626 movdf to do mem-to-mem moves through integer regs. */
14628 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
14630 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14632 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14634 /* SSE can't load any constant directly yet. */
14635 if (SSE_CLASS_P (class))
14637 /* Floats can load 0 and 1. */
14638 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14640 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14641 if (MAYBE_SSE_CLASS_P (class))
14642 return (reg_class_subset_p (class, GENERAL_REGS
)
14643 ? GENERAL_REGS
: FLOAT_REGS
);
14647 /* General regs can load everything. */
14648 if (reg_class_subset_p (class, GENERAL_REGS
))
14649 return GENERAL_REGS
;
14650 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14651 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14654 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14656 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14661 /* If we are copying between general and FP registers, we need a memory
14662 location. The same is true for SSE and MMX registers.
14664 The macro can't work reliably when one of the CLASSES is class containing
14665 registers from multiple units (SSE, MMX, integer). We avoid this by never
14666 combining those units in single alternative in the machine description.
14667 Ensure that this constraint holds to avoid unexpected surprises.
14669 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14670 enforce these sanity checks. */
14672 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
14673 enum machine_mode mode
, int strict
)
14675 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14676 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14677 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14678 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14679 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14680 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14687 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14688 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14689 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
14690 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
14691 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
14693 /* Return the cost of moving data from a register in class CLASS1 to
14694 one in class CLASS2.
14696 It is not required that the cost always equal 2 when FROM is the same as TO;
14697 on some machines it is expensive to move between registers if they are not
14698 general registers. */
14700 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
14701 enum reg_class class2
)
14703 /* In case we require secondary memory, compute cost of the store followed
14704 by load. In order to avoid bad register allocation choices, we need
14705 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14707 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14711 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14712 MEMORY_MOVE_COST (mode
, class1
, 1));
14713 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14714 MEMORY_MOVE_COST (mode
, class2
, 1));
14716 /* In case of copying from general_purpose_register we may emit multiple
14717 stores followed by single load causing memory size mismatch stall.
14718 Count this as arbitrarily high cost of 20. */
14719 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14722 /* In the case of FP/MMX moves, the registers actually overlap, and we
14723 have to switch modes in order to treat them differently. */
14724 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14725 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14731 /* Moves between SSE/MMX and integer unit are expensive. */
14732 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14733 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14734 return ix86_cost
->mmxsse_to_integer
;
14735 if (MAYBE_FLOAT_CLASS_P (class1
))
14736 return ix86_cost
->fp_move
;
14737 if (MAYBE_SSE_CLASS_P (class1
))
14738 return ix86_cost
->sse_move
;
14739 if (MAYBE_MMX_CLASS_P (class1
))
14740 return ix86_cost
->mmx_move
;
14744 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14746 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
14748 /* Flags and only flags can only hold CCmode values. */
14749 if (CC_REGNO_P (regno
))
14750 return GET_MODE_CLASS (mode
) == MODE_CC
;
14751 if (GET_MODE_CLASS (mode
) == MODE_CC
14752 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14753 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14755 if (FP_REGNO_P (regno
))
14756 return VALID_FP_MODE_P (mode
);
14757 if (SSE_REGNO_P (regno
))
14758 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
14759 if (MMX_REGNO_P (regno
))
14761 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
14762 /* We handle both integer and floats in the general purpose registers.
14763 In future we should be able to handle vector modes as well. */
14764 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14766 /* Take care for QImode values - they can be in non-QI regs, but then
14767 they do cause partial register stalls. */
14768 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14770 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14773 /* Return the cost of moving data of mode M between a
14774 register and memory. A value of 2 is the default; this cost is
14775 relative to those in `REGISTER_MOVE_COST'.
14777 If moving between registers and memory is more expensive than
14778 between two registers, you should define this macro to express the
14781 Model also increased moving costs of QImode registers in non
14785 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
14787 if (FLOAT_CLASS_P (class))
14804 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14806 if (SSE_CLASS_P (class))
14809 switch (GET_MODE_SIZE (mode
))
14823 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14825 if (MMX_CLASS_P (class))
14828 switch (GET_MODE_SIZE (mode
))
14839 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14841 switch (GET_MODE_SIZE (mode
))
14845 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14846 : ix86_cost
->movzbl_load
);
14848 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14849 : ix86_cost
->int_store
[0] + 4);
14852 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14854 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14855 if (mode
== TFmode
)
14857 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14858 * (((int) GET_MODE_SIZE (mode
)
14859 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
14863 /* Compute a (partial) cost for rtx X. Return true if the complete
14864 cost has been computed, and false if subexpressions should be
14865 scanned. In either case, *TOTAL contains the cost result. */
14868 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
14870 enum machine_mode mode
= GET_MODE (x
);
14878 if (TARGET_64BIT
&& !x86_64_sign_extended_value (x
))
14880 else if (TARGET_64BIT
&& !x86_64_zero_extended_value (x
))
14882 else if (flag_pic
&& SYMBOLIC_CONST (x
)
14884 || (!GET_CODE (x
) != LABEL_REF
14885 && (GET_CODE (x
) != SYMBOL_REF
14886 || !SYMBOL_REF_LOCAL_P (x
)))))
14893 if (mode
== VOIDmode
)
14896 switch (standard_80387_constant_p (x
))
14901 default: /* Other constants */
14906 /* Start with (MEM (SYMBOL_REF)), since that's where
14907 it'll probably end up. Add a penalty for size. */
14908 *total
= (COSTS_N_INSNS (1)
14909 + (flag_pic
!= 0 && !TARGET_64BIT
)
14910 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
14916 /* The zero extensions is often completely free on x86_64, so make
14917 it as cheap as possible. */
14918 if (TARGET_64BIT
&& mode
== DImode
14919 && GET_MODE (XEXP (x
, 0)) == SImode
)
14921 else if (TARGET_ZERO_EXTEND_WITH_AND
)
14922 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14924 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
14928 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
14932 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
14933 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
14935 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14938 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14941 if ((value
== 2 || value
== 3)
14942 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
14944 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14954 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
14956 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14958 if (INTVAL (XEXP (x
, 1)) > 32)
14959 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
14961 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
14965 if (GET_CODE (XEXP (x
, 1)) == AND
)
14966 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
14968 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
14973 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14974 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
14976 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
14981 if (FLOAT_MODE_P (mode
))
14983 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
14988 rtx op0
= XEXP (x
, 0);
14989 rtx op1
= XEXP (x
, 1);
14991 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14993 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14994 for (nbits
= 0; value
!= 0; value
&= value
- 1)
14998 /* This is arbitrary. */
15001 /* Compute costs correctly for widening multiplication. */
15002 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
15003 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
15004 == GET_MODE_SIZE (mode
))
15006 int is_mulwiden
= 0;
15007 enum machine_mode inner_mode
= GET_MODE (op0
);
15009 if (GET_CODE (op0
) == GET_CODE (op1
))
15010 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
15011 else if (GET_CODE (op1
) == CONST_INT
)
15013 if (GET_CODE (op0
) == SIGN_EXTEND
)
15014 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
15017 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
15021 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
15024 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
15025 + nbits
* ix86_cost
->mult_bit
)
15026 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
15035 if (FLOAT_MODE_P (mode
))
15036 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
15038 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
15042 if (FLOAT_MODE_P (mode
))
15043 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15044 else if (GET_MODE_CLASS (mode
) == MODE_INT
15045 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
15047 if (GET_CODE (XEXP (x
, 0)) == PLUS
15048 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
15049 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
15050 && CONSTANT_P (XEXP (x
, 1)))
15052 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
15053 if (val
== 2 || val
== 4 || val
== 8)
15055 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15056 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15057 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
15059 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15063 else if (GET_CODE (XEXP (x
, 0)) == MULT
15064 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
15066 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
15067 if (val
== 2 || val
== 4 || val
== 8)
15069 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15070 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15071 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15075 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
15077 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15078 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15079 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15080 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15087 if (FLOAT_MODE_P (mode
))
15089 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15097 if (!TARGET_64BIT
&& mode
== DImode
)
15099 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
15100 + (rtx_cost (XEXP (x
, 0), outer_code
)
15101 << (GET_MODE (XEXP (x
, 0)) != DImode
))
15102 + (rtx_cost (XEXP (x
, 1), outer_code
)
15103 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
15109 if (FLOAT_MODE_P (mode
))
15111 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
15117 if (!TARGET_64BIT
&& mode
== DImode
)
15118 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
15120 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15124 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
15129 if (FLOAT_MODE_P (mode
))
15130 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
15134 if (FLOAT_MODE_P (mode
))
15135 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
15139 if (XINT (x
, 1) == UNSPEC_TP
)
15148 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15150 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
15153 fputs ("\tpushl $", asm_out_file
);
15154 assemble_name (asm_out_file
, XSTR (symbol
, 0));
15155 fputc ('\n', asm_out_file
);
15161 static int current_machopic_label_num
;
15163 /* Given a symbol name and its associated stub, write out the
15164 definition of the stub. */
15167 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
15169 unsigned int length
;
15170 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
15171 int label
= ++current_machopic_label_num
;
15173 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15174 symb
= (*targetm
.strip_name_encoding
) (symb
);
15176 length
= strlen (stub
);
15177 binder_name
= alloca (length
+ 32);
15178 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
15180 length
= strlen (symb
);
15181 symbol_name
= alloca (length
+ 32);
15182 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
15184 sprintf (lazy_ptr_name
, "L%d$lz", label
);
15187 machopic_picsymbol_stub_section ();
15189 machopic_symbol_stub_section ();
15191 fprintf (file
, "%s:\n", stub
);
15192 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15196 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
15197 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
15198 fprintf (file
, "\tjmp %%edx\n");
15201 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
15203 fprintf (file
, "%s:\n", binder_name
);
15207 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
15208 fprintf (file
, "\tpushl %%eax\n");
15211 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
15213 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
15215 machopic_lazy_symbol_ptr_section ();
15216 fprintf (file
, "%s:\n", lazy_ptr_name
);
15217 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15218 fprintf (file
, "\t.long %s\n", binder_name
);
15220 #endif /* TARGET_MACHO */
15222 /* Order the registers for register allocator. */
15225 x86_order_regs_for_local_alloc (void)
15230 /* First allocate the local general purpose registers. */
15231 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15232 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
15233 reg_alloc_order
[pos
++] = i
;
15235 /* Global general purpose registers. */
15236 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15237 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
15238 reg_alloc_order
[pos
++] = i
;
15240 /* x87 registers come first in case we are doing FP math
15242 if (!TARGET_SSE_MATH
)
15243 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15244 reg_alloc_order
[pos
++] = i
;
15246 /* SSE registers. */
15247 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15248 reg_alloc_order
[pos
++] = i
;
15249 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15250 reg_alloc_order
[pos
++] = i
;
15252 /* x87 registers. */
15253 if (TARGET_SSE_MATH
)
15254 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15255 reg_alloc_order
[pos
++] = i
;
15257 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
15258 reg_alloc_order
[pos
++] = i
;
15260 /* Initialize the rest of array as we do not allocate some registers
15262 while (pos
< FIRST_PSEUDO_REGISTER
)
15263 reg_alloc_order
[pos
++] = 0;
15266 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15267 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15270 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15271 struct attribute_spec.handler. */
15273 ix86_handle_struct_attribute (tree
*node
, tree name
,
15274 tree args ATTRIBUTE_UNUSED
,
15275 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
15278 if (DECL_P (*node
))
15280 if (TREE_CODE (*node
) == TYPE_DECL
)
15281 type
= &TREE_TYPE (*node
);
15286 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
15287 || TREE_CODE (*type
) == UNION_TYPE
)))
15289 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
15290 *no_add_attrs
= true;
15293 else if ((is_attribute_p ("ms_struct", name
)
15294 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
15295 || ((is_attribute_p ("gcc_struct", name
)
15296 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
15298 warning ("`%s' incompatible attribute ignored",
15299 IDENTIFIER_POINTER (name
));
15300 *no_add_attrs
= true;
15307 ix86_ms_bitfield_layout_p (tree record_type
)
15309 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
15310 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
15311 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
15314 /* Returns an expression indicating where the this parameter is
15315 located on entry to the FUNCTION. */
15318 x86_this_parameter (tree function
)
15320 tree type
= TREE_TYPE (function
);
15324 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
15325 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
15328 if (ix86_function_regparm (type
, function
) > 0)
15332 parm
= TYPE_ARG_TYPES (type
);
15333 /* Figure out whether or not the function has a variable number of
15335 for (; parm
; parm
= TREE_CHAIN (parm
))
15336 if (TREE_VALUE (parm
) == void_type_node
)
15338 /* If not, the this parameter is in the first argument. */
15342 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
15344 return gen_rtx_REG (SImode
, regno
);
15348 if (aggregate_value_p (TREE_TYPE (type
), type
))
15349 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
15351 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
15354 /* Determine whether x86_output_mi_thunk can succeed. */
15357 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
15358 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
15359 HOST_WIDE_INT vcall_offset
, tree function
)
15361 /* 64-bit can handle anything. */
15365 /* For 32-bit, everything's fine if we have one free register. */
15366 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
15369 /* Need a free register for vcall_offset. */
15373 /* Need a free register for GOT references. */
15374 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
15377 /* Otherwise ok. */
15381 /* Output the assembler code for a thunk function. THUNK_DECL is the
15382 declaration for the thunk function itself, FUNCTION is the decl for
15383 the target function. DELTA is an immediate constant offset to be
15384 added to THIS. If VCALL_OFFSET is nonzero, the word at
15385 *(*this + vcall_offset) should be added to THIS. */
15388 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
15389 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
15390 HOST_WIDE_INT vcall_offset
, tree function
)
15393 rtx
this = x86_this_parameter (function
);
15396 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15397 pull it in now and let DELTA benefit. */
15400 else if (vcall_offset
)
15402 /* Put the this parameter into %eax. */
15404 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
15405 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15408 this_reg
= NULL_RTX
;
15410 /* Adjust the this parameter by a fixed constant. */
15413 xops
[0] = GEN_INT (delta
);
15414 xops
[1] = this_reg
? this_reg
: this;
15417 if (!x86_64_general_operand (xops
[0], DImode
))
15419 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15421 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
15425 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15428 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15431 /* Adjust the this parameter by a value stored in the vtable. */
15435 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15438 int tmp_regno
= 2 /* ECX */;
15439 if (lookup_attribute ("fastcall",
15440 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
15441 tmp_regno
= 0 /* EAX */;
15442 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
15445 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
15448 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15450 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15452 /* Adjust the this parameter. */
15453 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
15454 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
15456 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
15457 xops
[0] = GEN_INT (vcall_offset
);
15459 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15460 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
15462 xops
[1] = this_reg
;
15464 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15466 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15469 /* If necessary, drop THIS back to its stack slot. */
15470 if (this_reg
&& this_reg
!= this)
15472 xops
[0] = this_reg
;
15474 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15477 xops
[0] = XEXP (DECL_RTL (function
), 0);
15480 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15481 output_asm_insn ("jmp\t%P0", xops
);
15484 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
15485 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15486 tmp
= gen_rtx_MEM (QImode
, tmp
);
15488 output_asm_insn ("jmp\t%A0", xops
);
15493 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15494 output_asm_insn ("jmp\t%P0", xops
);
15499 const char *ip
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function
));
15500 tmp
= gen_rtx_SYMBOL_REF (Pmode
, machopic_stub_name (ip
));
15501 tmp
= gen_rtx_MEM (QImode
, tmp
);
15503 output_asm_insn ("jmp\t%0", xops
);
15506 #endif /* TARGET_MACHO */
15508 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15509 output_set_got (tmp
);
15512 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15513 output_asm_insn ("jmp\t{*}%1", xops
);
15519 x86_file_start (void)
15521 default_file_start ();
15522 if (X86_FILE_START_VERSION_DIRECTIVE
)
15523 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
15524 if (X86_FILE_START_FLTUSED
)
15525 fputs ("\t.global\t__fltused\n", asm_out_file
);
15526 if (ix86_asm_dialect
== ASM_INTEL
)
15527 fputs ("\t.intel_syntax\n", asm_out_file
);
15531 x86_field_alignment (tree field
, int computed
)
15533 enum machine_mode mode
;
15534 tree type
= TREE_TYPE (field
);
15536 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
15538 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
15539 ? get_inner_array_type (type
) : type
);
15540 if (mode
== DFmode
|| mode
== DCmode
15541 || GET_MODE_CLASS (mode
) == MODE_INT
15542 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
15543 return MIN (32, computed
);
15547 /* Output assembler code to FILE to increment profiler label # LABELNO
15548 for profiling a function entry. */
15550 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
15555 #ifndef NO_PROFILE_COUNTERS
15556 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
15558 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
15562 #ifndef NO_PROFILE_COUNTERS
15563 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
15565 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15569 #ifndef NO_PROFILE_COUNTERS
15570 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15571 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
15573 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
15577 #ifndef NO_PROFILE_COUNTERS
15578 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
15579 PROFILE_COUNT_REGISTER
);
15581 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15585 /* We don't have exact information about the insn sizes, but we may assume
15586 quite safely that we are informed about all 1 byte insns and memory
15587 address sizes. This is enough to eliminate unnecessary padding in
15591 min_insn_size (rtx insn
)
15595 if (!INSN_P (insn
) || !active_insn_p (insn
))
15598 /* Discard alignments we've emit and jump instructions. */
15599 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
15600 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
15602 if (GET_CODE (insn
) == JUMP_INSN
15603 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
15604 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
15607 /* Important case - calls are always 5 bytes.
15608 It is common to have many calls in the row. */
15609 if (GET_CODE (insn
) == CALL_INSN
15610 && symbolic_reference_mentioned_p (PATTERN (insn
))
15611 && !SIBLING_CALL_P (insn
))
15613 if (get_attr_length (insn
) <= 1)
15616 /* For normal instructions we may rely on the sizes of addresses
15617 and the presence of symbol to require 4 bytes of encoding.
15618 This is not the case for jumps where references are PC relative. */
15619 if (GET_CODE (insn
) != JUMP_INSN
)
15621 l
= get_attr_length_address (insn
);
15622 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
15631 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15635 ix86_avoid_jump_misspredicts (void)
15637 rtx insn
, start
= get_insns ();
15638 int nbytes
= 0, njumps
= 0;
15641 /* Look for all minimal intervals of instructions containing 4 jumps.
15642 The intervals are bounded by START and INSN. NBYTES is the total
15643 size of instructions in the interval including INSN and not including
15644 START. When the NBYTES is smaller than 16 bytes, it is possible
15645 that the end of START and INSN ends up in the same 16byte page.
15647 The smallest offset in the page INSN can start is the case where START
15648 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15649 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15651 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15654 nbytes
+= min_insn_size (insn
);
15656 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
15657 INSN_UID (insn
), min_insn_size (insn
));
15658 if ((GET_CODE (insn
) == JUMP_INSN
15659 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
15660 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
15661 || GET_CODE (insn
) == CALL_INSN
)
15668 start
= NEXT_INSN (start
);
15669 if ((GET_CODE (start
) == JUMP_INSN
15670 && GET_CODE (PATTERN (start
)) != ADDR_VEC
15671 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
15672 || GET_CODE (start
) == CALL_INSN
)
15673 njumps
--, isjump
= 1;
15676 nbytes
-= min_insn_size (start
);
15681 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
15682 INSN_UID (start
), INSN_UID (insn
), nbytes
);
15684 if (njumps
== 3 && isjump
&& nbytes
< 16)
15686 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
15689 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
15690 INSN_UID (insn
), padsize
);
15691 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
15696 /* AMD Athlon works faster
15697 when RET is not destination of conditional jump or directly preceded
15698 by other jump instruction. We avoid the penalty by inserting NOP just
15699 before the RET instructions in such cases. */
15701 ix86_pad_returns (void)
15705 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
15707 basic_block bb
= e
->src
;
15708 rtx ret
= BB_END (bb
);
15710 bool replace
= false;
15712 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
15713 || !maybe_hot_bb_p (bb
))
15715 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
15716 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
15718 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
15721 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
15722 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
15723 && !(e
->flags
& EDGE_FALLTHRU
))
15728 prev
= prev_active_insn (ret
);
15730 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
15731 || GET_CODE (prev
) == CALL_INSN
))
15733 /* Empty functions get branch mispredict even when the jump destination
15734 is not visible to us. */
15735 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
15740 emit_insn_before (gen_return_internal_long (), ret
);
15746 /* Implement machine specific optimizations. We implement padding of returns
15747 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15751 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
15752 ix86_pad_returns ();
15753 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
15754 ix86_avoid_jump_misspredicts ();
15757 /* Return nonzero when QImode register that must be represented via REX prefix
15760 x86_extended_QIreg_mentioned_p (rtx insn
)
15763 extract_insn_cached (insn
);
15764 for (i
= 0; i
< recog_data
.n_operands
; i
++)
15765 if (REG_P (recog_data
.operand
[i
])
15766 && REGNO (recog_data
.operand
[i
]) >= 4)
15771 /* Return nonzero when P points to register encoded via REX prefix.
15772 Called via for_each_rtx. */
15774 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
15776 unsigned int regno
;
15779 regno
= REGNO (*p
);
15780 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
15783 /* Return true when INSN mentions register that must be encoded using REX
15786 x86_extended_reg_mentioned_p (rtx insn
)
15788 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
15791 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15792 optabs would emit if we didn't have TFmode patterns. */
15795 x86_emit_floatuns (rtx operands
[2])
15797 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
15798 enum machine_mode mode
, inmode
;
15800 inmode
= GET_MODE (operands
[1]);
15801 if (inmode
!= SImode
15802 && inmode
!= DImode
)
15806 in
= force_reg (inmode
, operands
[1]);
15807 mode
= GET_MODE (out
);
15808 neglab
= gen_label_rtx ();
15809 donelab
= gen_label_rtx ();
15810 i1
= gen_reg_rtx (Pmode
);
15811 f0
= gen_reg_rtx (mode
);
15813 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
15815 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
15816 emit_jump_insn (gen_jump (donelab
));
15819 emit_label (neglab
);
15821 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15822 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15823 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
15824 expand_float (f0
, i0
, 0);
15825 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
15827 emit_label (donelab
);
15830 /* Return if we do not know how to pass TYPE solely in registers. */
15832 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
15834 if (default_must_pass_in_stack (mode
, type
))
15836 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
15839 /* Initialize vector TARGET via VALS. */
15841 ix86_expand_vector_init (rtx target
, rtx vals
)
15843 enum machine_mode mode
= GET_MODE (target
);
15844 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15845 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
15848 for (i
= n_elts
- 1; i
>= 0; i
--)
15849 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
15850 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
15853 /* Few special cases first...
15854 ... constants are best loaded from constant pool. */
15857 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15861 /* ... values where only first field is non-constant are best loaded
15862 from the pool and overwritten via move later. */
15865 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
15866 GET_MODE_INNER (mode
), 0);
15868 op
= force_reg (mode
, op
);
15869 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
15870 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15871 switch (GET_MODE (target
))
15874 emit_insn (gen_sse2_movsd (target
, target
, op
));
15877 emit_insn (gen_sse_movss (target
, target
, op
));
15885 /* And the busy sequence doing rotations. */
15886 switch (GET_MODE (target
))
15891 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
15893 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
15895 vecop0
= force_reg (V2DFmode
, vecop0
);
15896 vecop1
= force_reg (V2DFmode
, vecop1
);
15897 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
15903 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
15905 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
15907 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
15909 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
15910 rtx tmp1
= gen_reg_rtx (V4SFmode
);
15911 rtx tmp2
= gen_reg_rtx (V4SFmode
);
15913 vecop0
= force_reg (V4SFmode
, vecop0
);
15914 vecop1
= force_reg (V4SFmode
, vecop1
);
15915 vecop2
= force_reg (V4SFmode
, vecop2
);
15916 vecop3
= force_reg (V4SFmode
, vecop3
);
15917 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
15918 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
15919 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
15927 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15929 We do this in the new i386 backend to maintain source compatibility
15930 with the old cc0-based compiler. */
15933 ix86_md_asm_clobbers (tree clobbers
)
15935 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
15937 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
15939 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
15944 /* Worker function for REVERSE_CONDITION. */
15947 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
15949 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
15950 ? reverse_condition (code
)
15951 : reverse_condition_maybe_unordered (code
));
15954 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15958 output_387_reg_move (rtx insn
, rtx
*operands
)
15960 if (REG_P (operands
[1])
15961 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15963 if (REGNO (operands
[0]) == FIRST_STACK_REG
15964 && TARGET_USE_FFREEP
)
15965 return "ffreep\t%y0";
15966 return "fstp\t%y0";
15968 if (STACK_TOP_P (operands
[0]))
15969 return "fld%z1\t%y1";
15973 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15974 FP status register is set. */
15977 ix86_emit_fp_unordered_jump (rtx label
)
15979 rtx reg
= gen_reg_rtx (HImode
);
15982 emit_insn (gen_x86_fnstsw_1 (reg
));
15984 if (TARGET_USE_SAHF
)
15986 emit_insn (gen_x86_sahf_1 (reg
));
15988 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
15989 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
15993 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
15995 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15996 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
15999 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
16000 gen_rtx_LABEL_REF (VOIDmode
, label
),
16002 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
16003 emit_jump_insn (temp
);
16006 /* Output code to perform a log1p XFmode calculation. */
16008 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
16010 rtx label1
= gen_label_rtx ();
16011 rtx label2
= gen_label_rtx ();
16013 rtx tmp
= gen_reg_rtx (XFmode
);
16014 rtx tmp2
= gen_reg_rtx (XFmode
);
16016 emit_insn (gen_absxf2 (tmp
, op1
));
16017 emit_insn (gen_cmpxf (tmp
,
16018 CONST_DOUBLE_FROM_REAL_VALUE (
16019 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
16021 emit_jump_insn (gen_bge (label1
));
16023 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
16024 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
16025 emit_jump (label2
);
16027 emit_label (label1
);
16028 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
16029 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
16030 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
16031 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
16033 emit_label (label2
);
16036 #include "gt-i386.h"