1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost
= { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost
= { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost
= { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost
= {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost
= {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost
= {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost
= {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost
= {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost
= {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs
*ix86_cost
= &pentium_cost
;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
474 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
;
475 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
476 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
477 const int x86_double_with_add
= ~m_386
;
478 const int x86_use_bit_test
= m_386
;
479 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
480 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
;
481 const int x86_3dnow_a
= m_ATHLON_K8
;
482 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
;
483 const int x86_branch_hints
= m_PENT4
;
484 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
485 const int x86_partial_reg_stall
= m_PPRO
;
486 const int x86_use_loop
= m_K6
;
487 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
488 const int x86_use_mov0
= m_K6
;
489 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
490 const int x86_read_modify_write
= ~m_PENT
;
491 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
492 const int x86_split_long_moves
= m_PPRO
;
493 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
494 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
495 const int x86_single_stringop
= m_386
| m_PENT4
;
496 const int x86_qimode_math
= ~(0);
497 const int x86_promote_qi_regs
= 0;
498 const int x86_himode_math
= ~(m_PPRO
);
499 const int x86_promote_hi_regs
= m_PPRO
;
500 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
;
501 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
;
502 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
;
503 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
504 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_PPRO
);
505 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
;
506 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
;
507 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_PPRO
;
508 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PENT4
| m_PPRO
;
509 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PENT4
| m_PPRO
;
510 const int x86_decompose_lea
= m_PENT4
;
511 const int x86_shift1
= ~m_486
;
512 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
;
513 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_PPRO
;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs
= m_ATHLON_K8
;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
521 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
522 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
;
523 const int x86_use_ffreep
= m_ATHLON_K8
;
524 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
525 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
526 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_PPRO
;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
535 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
536 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
544 AREG
, DREG
, CREG
, BREG
,
546 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
548 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
549 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
554 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
556 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
558 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
559 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
560 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers
[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
583 static int const x86_64_int_return_registers
[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0
= NULL_RTX
;
669 rtx ix86_compare_op1
= NULL_RTX
;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry
GTY(())
682 struct stack_local_entry
*next
;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size
;
714 HOST_WIDE_INT to_allocate
;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset
;
717 HOST_WIDE_INT hard_frame_pointer_offset
;
718 HOST_WIDE_INT stack_pointer_offset
;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov
;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string
;
730 enum cmodel ix86_cmodel
;
732 const char *ix86_asm_string
;
733 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
735 const char *ix86_tls_dialect_string
;
736 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath
;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune
;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch
;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string
; /* for -mtune=<xxx> */
748 const char *ix86_arch_string
; /* for -march=<xxx> */
749 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string
;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse
;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string
;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string
;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string
;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary
;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost
;
776 const char *ix86_branch_cost_string
;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string
;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix
[16];
783 static int internal_label_prefix_len
;
785 static int local_symbolic_operand (rtx
, enum machine_mode
);
786 static int tls_symbolic_operand_1 (rtx
, enum tls_model
);
787 static void output_pic_addr_const (FILE *, rtx
, int);
788 static void put_condition_code (enum rtx_code
, enum machine_mode
,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx
*, void *);
792 static rtx
maybe_get_pool_constant (rtx
);
793 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
794 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
799 static rtx
get_thread_pointer (int);
800 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx
gen_push (rtx
);
803 static int memory_address_length (rtx addr
);
804 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
805 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
806 static enum attr_ppro_uops
ix86_safe_ppro_uops (rtx
);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx
*, rtx
*);
809 static struct machine_function
* ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
814 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
816 static void ix86_sched_reorder_ppro (rtx
*, rtx
*);
817 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
819 static rtx
ix86_expand_aligntest (rtx
, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx
*, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx
, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx
x86_this_parameter (tree
);
830 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
831 HOST_WIDE_INT
, tree
);
832 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
836 static tree
ix86_build_builtin_va_list (void);
840 rtx base
, index
, disp
;
842 enum ix86_address_seg
{ SEG_DEFAULT
, SEG_FS
, SEG_GS
} seg
;
845 static int ix86_decompose_address (rtx
, struct ix86_address
*);
846 static int ix86_address_cost (rtx
);
847 static bool ix86_cannot_force_const_mem (rtx
);
848 static rtx
ix86_delegitimize_address (rtx
);
850 struct builtin_description
;
851 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
853 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
855 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
856 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
857 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
858 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
859 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
860 static enum rtx_code
ix86_fp_compare_code_to_integer (enum rtx_code
);
861 static void ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*,
862 enum rtx_code
*, enum rtx_code
*);
863 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
864 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
865 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
866 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
867 static int ix86_fp_comparison_cost (enum rtx_code code
);
868 static unsigned int ix86_select_alt_pic_regnum (void);
869 static int ix86_save_reg (unsigned int, int);
870 static void ix86_compute_frame_layout (struct ix86_frame
*);
871 static int ix86_comp_type_attributes (tree
, tree
);
872 static int ix86_function_regparm (tree
, tree
);
873 const struct attribute_spec ix86_attribute_table
[];
874 static bool ix86_function_ok_for_sibcall (tree
, tree
);
875 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
876 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
877 static int ix86_value_regno (enum machine_mode
);
878 static bool contains_128bit_aligned_vector_p (tree
);
879 static bool ix86_ms_bitfield_layout_p (tree
);
880 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
881 static int extended_reg_mentioned_1 (rtx
*, void *);
882 static bool ix86_rtx_costs (rtx
, int, int, int *);
883 static int min_insn_size (rtx
);
884 static void k8_avoid_jump_misspredicts (void);
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor (rtx
, int);
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
901 X86_64_INTEGER_CLASS
,
902 X86_64_INTEGERSI_CLASS
,
911 static const char * const x86_64_reg_class_name
[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument (enum machine_mode
, tree
,
916 enum x86_64_reg_class
[MAX_CLASSES
], int);
917 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
918 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
920 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
921 enum x86_64_reg_class
);
923 /* Table of constants used by fldpi, fldln2, etc.... */
924 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
925 static bool ext_80387_constants_init
= 0;
926 static void init_ext_80387_constants (void);
928 /* Initialize the GCC target structure. */
929 #undef TARGET_ATTRIBUTE_TABLE
930 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
931 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
932 # undef TARGET_MERGE_DECL_ATTRIBUTES
933 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
936 #undef TARGET_COMP_TYPE_ATTRIBUTES
937 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
939 #undef TARGET_INIT_BUILTINS
940 #define TARGET_INIT_BUILTINS ix86_init_builtins
942 #undef TARGET_EXPAND_BUILTIN
943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
945 #undef TARGET_ASM_FUNCTION_EPILOGUE
946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
948 #undef TARGET_ASM_OPEN_PAREN
949 #define TARGET_ASM_OPEN_PAREN ""
950 #undef TARGET_ASM_CLOSE_PAREN
951 #define TARGET_ASM_CLOSE_PAREN ""
953 #undef TARGET_ASM_ALIGNED_HI_OP
954 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
955 #undef TARGET_ASM_ALIGNED_SI_OP
956 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
958 #undef TARGET_ASM_ALIGNED_DI_OP
959 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
962 #undef TARGET_ASM_UNALIGNED_HI_OP
963 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
964 #undef TARGET_ASM_UNALIGNED_SI_OP
965 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
966 #undef TARGET_ASM_UNALIGNED_DI_OP
967 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
969 #undef TARGET_SCHED_ADJUST_COST
970 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
971 #undef TARGET_SCHED_ISSUE_RATE
972 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
973 #undef TARGET_SCHED_VARIABLE_ISSUE
974 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
975 #undef TARGET_SCHED_INIT
976 #define TARGET_SCHED_INIT ix86_sched_init
977 #undef TARGET_SCHED_REORDER
978 #define TARGET_SCHED_REORDER ix86_sched_reorder
979 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
980 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
981 ia32_use_dfa_pipeline_interface
982 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
983 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
984 ia32_multipass_dfa_lookahead
986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
987 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
990 #undef TARGET_HAVE_TLS
991 #define TARGET_HAVE_TLS true
993 #undef TARGET_CANNOT_FORCE_CONST_MEM
994 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
996 #undef TARGET_DELEGITIMIZE_ADDRESS
997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
999 #undef TARGET_MS_BITFIELD_LAYOUT_P
1000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1002 #undef TARGET_ASM_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1004 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1007 #undef TARGET_ASM_FILE_START
1008 #define TARGET_ASM_FILE_START x86_file_start
1010 #undef TARGET_RTX_COSTS
1011 #define TARGET_RTX_COSTS ix86_rtx_costs
1012 #undef TARGET_ADDRESS_COST
1013 #define TARGET_ADDRESS_COST ix86_address_cost
1015 #undef TARGET_FIXED_CONDITION_CODE_REGS
1016 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1017 #undef TARGET_CC_MODES_COMPATIBLE
1018 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1020 #undef TARGET_MACHINE_DEPENDENT_REORG
1021 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1023 #undef TARGET_BUILD_BUILTIN_VA_LIST
1024 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1026 struct gcc_target targetm
= TARGET_INITIALIZER
;
1028 /* The svr4 ABI for the i386 says that records and unions are returned
1030 #ifndef DEFAULT_PCC_STRUCT_RETURN
1031 #define DEFAULT_PCC_STRUCT_RETURN 1
1034 /* Sometimes certain combinations of command options do not make
1035 sense on a particular target machine. You can define a macro
1036 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1037 defined, is executed once just after all the command options have
1040 Don't use this macro to turn on various extra optimizations for
1041 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1044 override_options (void)
1047 /* Comes from final.c -- no real reason to change it. */
1048 #define MAX_CODE_ALIGN 16
1052 const struct processor_costs
*cost
; /* Processor costs */
1053 const int target_enable
; /* Target flags to enable. */
1054 const int target_disable
; /* Target flags to disable. */
1055 const int align_loop
; /* Default alignments. */
1056 const int align_loop_max_skip
;
1057 const int align_jump
;
1058 const int align_jump_max_skip
;
1059 const int align_func
;
1061 const processor_target_table
[PROCESSOR_max
] =
1063 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1064 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1065 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1066 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1067 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1068 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1069 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1070 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16}
1073 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1076 const char *const name
; /* processor name or nickname. */
1077 const enum processor_type processor
;
1078 const enum pta_flags
1083 PTA_PREFETCH_SSE
= 8,
1089 const processor_alias_table
[] =
1091 {"i386", PROCESSOR_I386
, 0},
1092 {"i486", PROCESSOR_I486
, 0},
1093 {"i586", PROCESSOR_PENTIUM
, 0},
1094 {"pentium", PROCESSOR_PENTIUM
, 0},
1095 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1096 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1097 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1098 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1099 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1100 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1101 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1102 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1103 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1104 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
1105 PTA_MMX
| PTA_PREFETCH_SSE
},
1106 {"k6", PROCESSOR_K6
, PTA_MMX
},
1107 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1108 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1109 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1111 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1112 | PTA_3DNOW
| PTA_3DNOW_A
},
1113 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1114 | PTA_3DNOW_A
| PTA_SSE
},
1115 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1116 | PTA_3DNOW_A
| PTA_SSE
},
1117 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1118 | PTA_3DNOW_A
| PTA_SSE
},
1119 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1120 | PTA_SSE
| PTA_SSE2
},
1121 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1122 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1123 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1124 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1125 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1126 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1127 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1128 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1131 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1133 /* Set the default values for switches whose default depends on TARGET_64BIT
1134 in case they weren't overwritten by command line options. */
1137 if (flag_omit_frame_pointer
== 2)
1138 flag_omit_frame_pointer
= 1;
1139 if (flag_asynchronous_unwind_tables
== 2)
1140 flag_asynchronous_unwind_tables
= 1;
1141 if (flag_pcc_struct_return
== 2)
1142 flag_pcc_struct_return
= 0;
1146 if (flag_omit_frame_pointer
== 2)
1147 flag_omit_frame_pointer
= 0;
1148 if (flag_asynchronous_unwind_tables
== 2)
1149 flag_asynchronous_unwind_tables
= 0;
1150 if (flag_pcc_struct_return
== 2)
1151 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1154 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1155 SUBTARGET_OVERRIDE_OPTIONS
;
1158 if (!ix86_tune_string
&& ix86_arch_string
)
1159 ix86_tune_string
= ix86_arch_string
;
1160 if (!ix86_tune_string
)
1161 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1162 if (!ix86_arch_string
)
1163 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1165 if (ix86_cmodel_string
!= 0)
1167 if (!strcmp (ix86_cmodel_string
, "small"))
1168 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1170 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1171 else if (!strcmp (ix86_cmodel_string
, "32"))
1172 ix86_cmodel
= CM_32
;
1173 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1174 ix86_cmodel
= CM_KERNEL
;
1175 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1176 ix86_cmodel
= CM_MEDIUM
;
1177 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1178 ix86_cmodel
= CM_LARGE
;
1180 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1184 ix86_cmodel
= CM_32
;
1186 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1188 if (ix86_asm_string
!= 0)
1190 if (!strcmp (ix86_asm_string
, "intel"))
1191 ix86_asm_dialect
= ASM_INTEL
;
1192 else if (!strcmp (ix86_asm_string
, "att"))
1193 ix86_asm_dialect
= ASM_ATT
;
1195 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1197 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1198 error ("code model `%s' not supported in the %s bit mode",
1199 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1200 if (ix86_cmodel
== CM_LARGE
)
1201 sorry ("code model `large' not supported yet");
1202 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1203 sorry ("%i-bit mode not compiled in",
1204 (target_flags
& MASK_64BIT
) ? 64 : 32);
1206 for (i
= 0; i
< pta_size
; i
++)
1207 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1209 ix86_arch
= processor_alias_table
[i
].processor
;
1210 /* Default cpu tuning to the architecture. */
1211 ix86_tune
= ix86_arch
;
1212 if (processor_alias_table
[i
].flags
& PTA_MMX
1213 && !(target_flags_explicit
& MASK_MMX
))
1214 target_flags
|= MASK_MMX
;
1215 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1216 && !(target_flags_explicit
& MASK_3DNOW
))
1217 target_flags
|= MASK_3DNOW
;
1218 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1219 && !(target_flags_explicit
& MASK_3DNOW_A
))
1220 target_flags
|= MASK_3DNOW_A
;
1221 if (processor_alias_table
[i
].flags
& PTA_SSE
1222 && !(target_flags_explicit
& MASK_SSE
))
1223 target_flags
|= MASK_SSE
;
1224 if (processor_alias_table
[i
].flags
& PTA_SSE2
1225 && !(target_flags_explicit
& MASK_SSE2
))
1226 target_flags
|= MASK_SSE2
;
1227 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1228 x86_prefetch_sse
= true;
1229 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1230 error ("CPU you selected does not support x86-64 instruction set");
1235 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1237 for (i
= 0; i
< pta_size
; i
++)
1238 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1240 ix86_tune
= processor_alias_table
[i
].processor
;
1241 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1242 error ("CPU you selected does not support x86-64 instruction set");
1245 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1246 x86_prefetch_sse
= true;
1248 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1251 ix86_cost
= &size_cost
;
1253 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1254 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1255 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1257 /* Arrange to set up i386_stack_locals for all functions. */
1258 init_machine_status
= ix86_init_machine_status
;
1260 /* Validate -mregparm= value. */
1261 if (ix86_regparm_string
)
1263 i
= atoi (ix86_regparm_string
);
1264 if (i
< 0 || i
> REGPARM_MAX
)
1265 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1271 ix86_regparm
= REGPARM_MAX
;
1273 /* If the user has provided any of the -malign-* options,
1274 warn and use that value only if -falign-* is not set.
1275 Remove this code in GCC 3.2 or later. */
1276 if (ix86_align_loops_string
)
1278 warning ("-malign-loops is obsolete, use -falign-loops");
1279 if (align_loops
== 0)
1281 i
= atoi (ix86_align_loops_string
);
1282 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1283 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1285 align_loops
= 1 << i
;
1289 if (ix86_align_jumps_string
)
1291 warning ("-malign-jumps is obsolete, use -falign-jumps");
1292 if (align_jumps
== 0)
1294 i
= atoi (ix86_align_jumps_string
);
1295 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1296 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1298 align_jumps
= 1 << i
;
1302 if (ix86_align_funcs_string
)
1304 warning ("-malign-functions is obsolete, use -falign-functions");
1305 if (align_functions
== 0)
1307 i
= atoi (ix86_align_funcs_string
);
1308 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1309 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1311 align_functions
= 1 << i
;
1315 /* Default align_* from the processor table. */
1316 if (align_loops
== 0)
1318 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1319 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1321 if (align_jumps
== 0)
1323 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1324 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1326 if (align_functions
== 0)
1328 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1331 /* Validate -mpreferred-stack-boundary= value, or provide default.
1332 The default of 128 bits is for Pentium III's SSE __m128, but we
1333 don't want additional code to keep the stack aligned when
1334 optimizing for code size. */
1335 ix86_preferred_stack_boundary
= (optimize_size
1336 ? TARGET_64BIT
? 128 : 32
1338 if (ix86_preferred_stack_boundary_string
)
1340 i
= atoi (ix86_preferred_stack_boundary_string
);
1341 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1342 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1343 TARGET_64BIT
? 4 : 2);
1345 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1348 /* Validate -mbranch-cost= value, or provide default. */
1349 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1350 if (ix86_branch_cost_string
)
1352 i
= atoi (ix86_branch_cost_string
);
1354 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1356 ix86_branch_cost
= i
;
1359 if (ix86_tls_dialect_string
)
1361 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1362 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1363 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1364 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1366 error ("bad value (%s) for -mtls-dialect= switch",
1367 ix86_tls_dialect_string
);
1370 /* Keep nonleaf frame pointers. */
1371 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1372 flag_omit_frame_pointer
= 1;
1374 /* If we're doing fast math, we don't care about comparison order
1375 wrt NaNs. This lets us use a shorter comparison sequence. */
1376 if (flag_unsafe_math_optimizations
)
1377 target_flags
&= ~MASK_IEEE_FP
;
1379 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1380 since the insns won't need emulation. */
1381 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1382 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1384 /* Turn on SSE2 builtins for -msse3. */
1386 target_flags
|= MASK_SSE2
;
1388 /* Turn on SSE builtins for -msse2. */
1390 target_flags
|= MASK_SSE
;
1394 if (TARGET_ALIGN_DOUBLE
)
1395 error ("-malign-double makes no sense in the 64bit mode");
1397 error ("-mrtd calling convention not supported in the 64bit mode");
1398 /* Enable by default the SSE and MMX builtins. */
1399 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1400 ix86_fpmath
= FPMATH_SSE
;
1404 ix86_fpmath
= FPMATH_387
;
1405 /* i386 ABI does not specify red zone. It still makes sense to use it
1406 when programmer takes care to stack from being destroyed. */
1407 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1408 target_flags
|= MASK_NO_RED_ZONE
;
1411 if (ix86_fpmath_string
!= 0)
1413 if (! strcmp (ix86_fpmath_string
, "387"))
1414 ix86_fpmath
= FPMATH_387
;
1415 else if (! strcmp (ix86_fpmath_string
, "sse"))
1419 warning ("SSE instruction set disabled, using 387 arithmetics");
1420 ix86_fpmath
= FPMATH_387
;
1423 ix86_fpmath
= FPMATH_SSE
;
1425 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1426 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1430 warning ("SSE instruction set disabled, using 387 arithmetics");
1431 ix86_fpmath
= FPMATH_387
;
1433 else if (!TARGET_80387
)
1435 warning ("387 instruction set disabled, using SSE arithmetics");
1436 ix86_fpmath
= FPMATH_SSE
;
1439 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1442 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1445 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1449 target_flags
|= MASK_MMX
;
1450 x86_prefetch_sse
= true;
1453 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1456 target_flags
|= MASK_MMX
;
1457 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1458 extensions it adds. */
1459 if (x86_3dnow_a
& (1 << ix86_arch
))
1460 target_flags
|= MASK_3DNOW_A
;
1462 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1463 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1465 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1467 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1470 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1471 p
= strchr (internal_label_prefix
, 'X');
1472 internal_label_prefix_len
= p
- internal_label_prefix
;
1478 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1480 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1481 make the problem with not enough registers even worse. */
1482 #ifdef INSN_SCHEDULING
1484 flag_schedule_insns
= 0;
1487 /* The default values of these switches depend on the TARGET_64BIT
1488 that is not known at this moment. Mark these values with 2 and
1489 let user the to override these. In case there is no command line option
1490 specifying them, we will set the defaults in override_options. */
1492 flag_omit_frame_pointer
= 2;
1493 flag_pcc_struct_return
= 2;
1494 flag_asynchronous_unwind_tables
= 2;
1497 /* Table of valid machine attributes. */
1498 const struct attribute_spec ix86_attribute_table
[] =
1500 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1501 /* Stdcall attribute says callee is responsible for popping arguments
1502 if they are not variable. */
1503 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1504 /* Fastcall attribute says callee is responsible for popping arguments
1505 if they are not variable. */
1506 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1507 /* Cdecl attribute says the callee is a normal C declaration */
1508 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1509 /* Regparm attribute specifies how many integer arguments are to be
1510 passed in registers. */
1511 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1512 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1513 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1514 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1515 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1517 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1518 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1519 { NULL
, 0, 0, false, false, false, NULL
}
1522 /* Decide whether we can make a sibling call to a function. DECL is the
1523 declaration of the function being targeted by the call and EXP is the
1524 CALL_EXPR representing the call. */
1527 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1529 /* If we are generating position-independent code, we cannot sibcall
1530 optimize any indirect call, or a direct call to a global function,
1531 as the PLT requires %ebx be live. */
1532 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1535 /* If we are returning floats on the 80387 register stack, we cannot
1536 make a sibcall from a function that doesn't return a float to a
1537 function that does or, conversely, from a function that does return
1538 a float to a function that doesn't; the necessary stack adjustment
1539 would not be executed. */
1540 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1541 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1544 /* If this call is indirect, we'll need to be able to use a call-clobbered
1545 register for the address of the target function. Make sure that all
1546 such registers are not used for passing parameters. */
1547 if (!decl
&& !TARGET_64BIT
)
1551 /* We're looking at the CALL_EXPR, we need the type of the function. */
1552 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1553 type
= TREE_TYPE (type
); /* pointer type */
1554 type
= TREE_TYPE (type
); /* function type */
1556 if (ix86_function_regparm (type
, NULL
) >= 3)
1558 /* ??? Need to count the actual number of registers to be used,
1559 not the possible number of registers. Fix later. */
1564 /* Otherwise okay. That also includes certain types of indirect calls. */
1568 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1569 arguments as in struct attribute_spec.handler. */
1571 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1572 tree args ATTRIBUTE_UNUSED
,
1573 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1575 if (TREE_CODE (*node
) != FUNCTION_TYPE
1576 && TREE_CODE (*node
) != METHOD_TYPE
1577 && TREE_CODE (*node
) != FIELD_DECL
1578 && TREE_CODE (*node
) != TYPE_DECL
)
1580 warning ("`%s' attribute only applies to functions",
1581 IDENTIFIER_POINTER (name
));
1582 *no_add_attrs
= true;
1586 if (is_attribute_p ("fastcall", name
))
1588 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1590 error ("fastcall and stdcall attributes are not compatible");
1592 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1594 error ("fastcall and regparm attributes are not compatible");
1597 else if (is_attribute_p ("stdcall", name
))
1599 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1601 error ("fastcall and stdcall attributes are not compatible");
1608 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1609 *no_add_attrs
= true;
1615 /* Handle a "regparm" attribute;
1616 arguments as in struct attribute_spec.handler. */
1618 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1619 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1621 if (TREE_CODE (*node
) != FUNCTION_TYPE
1622 && TREE_CODE (*node
) != METHOD_TYPE
1623 && TREE_CODE (*node
) != FIELD_DECL
1624 && TREE_CODE (*node
) != TYPE_DECL
)
1626 warning ("`%s' attribute only applies to functions",
1627 IDENTIFIER_POINTER (name
));
1628 *no_add_attrs
= true;
1634 cst
= TREE_VALUE (args
);
1635 if (TREE_CODE (cst
) != INTEGER_CST
)
1637 warning ("`%s' attribute requires an integer constant argument",
1638 IDENTIFIER_POINTER (name
));
1639 *no_add_attrs
= true;
1641 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1643 warning ("argument to `%s' attribute larger than %d",
1644 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1645 *no_add_attrs
= true;
1648 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1650 error ("fastcall and regparm attributes are not compatible");
1657 /* Return 0 if the attributes for two types are incompatible, 1 if they
1658 are compatible, and 2 if they are nearly compatible (which causes a
1659 warning to be generated). */
1662 ix86_comp_type_attributes (tree type1
, tree type2
)
1664 /* Check for mismatch of non-default calling convention. */
1665 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1667 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1670 /* Check for mismatched fastcall types */
1671 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1672 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1675 /* Check for mismatched return types (cdecl vs stdcall). */
1676 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1677 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1679 if (ix86_function_regparm (type1
, NULL
)
1680 != ix86_function_regparm (type2
, NULL
))
1685 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1686 DECL may be NULL when calling function indirectly
1687 or considering a libcall. */
1690 ix86_function_regparm (tree type
, tree decl
)
1693 int regparm
= ix86_regparm
;
1694 bool user_convention
= false;
1698 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1701 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1702 user_convention
= true;
1705 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1708 user_convention
= true;
1711 /* Use register calling convention for local functions when possible. */
1712 if (!TARGET_64BIT
&& !user_convention
&& decl
1713 && flag_unit_at_a_time
&& !profile_flag
)
1715 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1718 /* We can't use regparm(3) for nested functions as these use
1719 static chain pointer in third argument. */
1720 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1730 /* Return true if EAX is live at the start of the function. Used by
1731 ix86_expand_prologue to determine if we need special help before
1732 calling allocate_stack_worker. */
1735 ix86_eax_live_at_start_p (void)
1737 /* Cheat. Don't bother working forward from ix86_function_regparm
1738 to the function type to whether an actual argument is located in
1739 eax. Instead just look at cfg info, which is still close enough
1740 to correct at this point. This gives false positives for broken
1741 functions that might use uninitialized data that happens to be
1742 allocated in eax, but who cares? */
1743 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1746 /* Value is the number of bytes of arguments automatically
1747 popped when returning from a subroutine call.
1748 FUNDECL is the declaration node of the function (as a tree),
1749 FUNTYPE is the data type of the function (as a tree),
1750 or for a library call it is an identifier node for the subroutine name.
1751 SIZE is the number of bytes of arguments passed on the stack.
1753 On the 80386, the RTD insn may be used to pop them if the number
1754 of args is fixed, but if the number is variable then the caller
1755 must pop them all. RTD can't be used for library calls now
1756 because the library is compiled with the Unix compiler.
1757 Use of RTD is a selectable option, since it is incompatible with
1758 standard Unix calling sequences. If the option is not selected,
1759 the caller must always pop the args.
1761 The attribute stdcall is equivalent to RTD on a per module basis. */
1764 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1766 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1768 /* Cdecl functions override -mrtd, and never pop the stack. */
1769 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1771 /* Stdcall and fastcall functions will pop the stack if not
1773 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1774 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1778 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1779 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1780 == void_type_node
)))
1784 /* Lose any fake structure return argument if it is passed on the stack. */
1785 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1788 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1791 return GET_MODE_SIZE (Pmode
);
1797 /* Argument support functions. */
1799 /* Return true when register may be used to pass function parameters. */
1801 ix86_function_arg_regno_p (int regno
)
1805 return (regno
< REGPARM_MAX
1806 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1807 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1809 /* RAX is used as hidden argument to va_arg functions. */
1812 for (i
= 0; i
< REGPARM_MAX
; i
++)
1813 if (regno
== x86_64_int_parameter_registers
[i
])
1818 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1819 for a call to a function whose data type is FNTYPE.
1820 For a library call, FNTYPE is 0. */
1823 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1824 tree fntype
, /* tree ptr for function decl */
1825 rtx libname
, /* SYMBOL_REF of library name or 0 */
1828 static CUMULATIVE_ARGS zero_cum
;
1829 tree param
, next_param
;
1831 if (TARGET_DEBUG_ARG
)
1833 fprintf (stderr
, "\ninit_cumulative_args (");
1835 fprintf (stderr
, "fntype code = %s, ret code = %s",
1836 tree_code_name
[(int) TREE_CODE (fntype
)],
1837 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1839 fprintf (stderr
, "no fntype");
1842 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1847 /* Set up the number of registers to use for passing arguments. */
1849 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1851 cum
->nregs
= ix86_regparm
;
1852 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1853 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1854 cum
->warn_sse
= true;
1855 cum
->warn_mmx
= true;
1856 cum
->maybe_vaarg
= false;
1858 /* Use ecx and edx registers if function has fastcall attribute */
1859 if (fntype
&& !TARGET_64BIT
)
1861 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1869 /* Determine if this function has variable arguments. This is
1870 indicated by the last argument being 'void_type_mode' if there
1871 are no variable arguments. If there are variable arguments, then
1872 we won't pass anything in registers */
1874 if (cum
->nregs
|| !TARGET_MMX
|| !TARGET_SSE
)
1876 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1877 param
!= 0; param
= next_param
)
1879 next_param
= TREE_CHAIN (param
);
1880 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1891 cum
->maybe_vaarg
= true;
1895 if ((!fntype
&& !libname
)
1896 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1897 cum
->maybe_vaarg
= 1;
1899 if (TARGET_DEBUG_ARG
)
1900 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1905 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1906 of this code is to classify each 8bytes of incoming argument by the register
1907 class and assign registers accordingly. */
1909 /* Return the union class of CLASS1 and CLASS2.
1910 See the x86-64 PS ABI for details. */
1912 static enum x86_64_reg_class
1913 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
1915 /* Rule #1: If both classes are equal, this is the resulting class. */
1916 if (class1
== class2
)
1919 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1921 if (class1
== X86_64_NO_CLASS
)
1923 if (class2
== X86_64_NO_CLASS
)
1926 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1927 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1928 return X86_64_MEMORY_CLASS
;
1930 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1931 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1932 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1933 return X86_64_INTEGERSI_CLASS
;
1934 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1935 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1936 return X86_64_INTEGER_CLASS
;
1938 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1939 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1940 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1941 return X86_64_MEMORY_CLASS
;
1943 /* Rule #6: Otherwise class SSE is used. */
1944 return X86_64_SSE_CLASS
;
1947 /* Classify the argument of type TYPE and mode MODE.
1948 CLASSES will be filled by the register class used to pass each word
1949 of the operand. The number of words is returned. In case the parameter
1950 should be passed in memory, 0 is returned. As a special case for zero
1951 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1953 BIT_OFFSET is used internally for handling records and specifies offset
1954 of the offset in bits modulo 256 to avoid overflow cases.
1956 See the x86-64 PS ABI for details.
1960 classify_argument (enum machine_mode mode
, tree type
,
1961 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
1963 HOST_WIDE_INT bytes
=
1964 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1965 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1967 /* Variable sized entities are always passed/returned in memory. */
1971 if (mode
!= VOIDmode
1972 && MUST_PASS_IN_STACK (mode
, type
))
1975 if (type
&& AGGREGATE_TYPE_P (type
))
1979 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1981 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1985 for (i
= 0; i
< words
; i
++)
1986 classes
[i
] = X86_64_NO_CLASS
;
1988 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1989 signalize memory class, so handle it as special case. */
1992 classes
[0] = X86_64_NO_CLASS
;
1996 /* Classify each field of record and merge classes. */
1997 if (TREE_CODE (type
) == RECORD_TYPE
)
1999 /* For classes first merge in the field of the subclasses. */
2000 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
2002 tree bases
= TYPE_BINFO_BASETYPES (type
);
2003 int n_bases
= TREE_VEC_LENGTH (bases
);
2006 for (i
= 0; i
< n_bases
; ++i
)
2008 tree binfo
= TREE_VEC_ELT (bases
, i
);
2010 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2011 tree type
= BINFO_TYPE (binfo
);
2013 num
= classify_argument (TYPE_MODE (type
),
2015 (offset
+ bit_offset
) % 256);
2018 for (i
= 0; i
< num
; i
++)
2020 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2022 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2026 /* And now merge the fields of structure. */
2027 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2029 if (TREE_CODE (field
) == FIELD_DECL
)
2033 /* Bitfields are always classified as integer. Handle them
2034 early, since later code would consider them to be
2035 misaligned integers. */
2036 if (DECL_BIT_FIELD (field
))
2038 for (i
= int_bit_position (field
) / 8 / 8;
2039 i
< (int_bit_position (field
)
2040 + tree_low_cst (DECL_SIZE (field
), 0)
2043 merge_classes (X86_64_INTEGER_CLASS
,
2048 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2049 TREE_TYPE (field
), subclasses
,
2050 (int_bit_position (field
)
2051 + bit_offset
) % 256);
2054 for (i
= 0; i
< num
; i
++)
2057 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2059 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2065 /* Arrays are handled as small records. */
2066 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2069 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2070 TREE_TYPE (type
), subclasses
, bit_offset
);
2074 /* The partial classes are now full classes. */
2075 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2076 subclasses
[0] = X86_64_SSE_CLASS
;
2077 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2078 subclasses
[0] = X86_64_INTEGER_CLASS
;
2080 for (i
= 0; i
< words
; i
++)
2081 classes
[i
] = subclasses
[i
% num
];
2083 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2084 else if (TREE_CODE (type
) == UNION_TYPE
2085 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2087 /* For classes first merge in the field of the subclasses. */
2088 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
2090 tree bases
= TYPE_BINFO_BASETYPES (type
);
2091 int n_bases
= TREE_VEC_LENGTH (bases
);
2094 for (i
= 0; i
< n_bases
; ++i
)
2096 tree binfo
= TREE_VEC_ELT (bases
, i
);
2098 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2099 tree type
= BINFO_TYPE (binfo
);
2101 num
= classify_argument (TYPE_MODE (type
),
2103 (offset
+ (bit_offset
% 64)) % 256);
2106 for (i
= 0; i
< num
; i
++)
2108 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2110 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2114 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2116 if (TREE_CODE (field
) == FIELD_DECL
)
2119 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2120 TREE_TYPE (field
), subclasses
,
2124 for (i
= 0; i
< num
; i
++)
2125 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2129 else if (TREE_CODE (type
) == SET_TYPE
)
2133 classes
[0] = X86_64_INTEGERSI_CLASS
;
2136 else if (bytes
<= 8)
2138 classes
[0] = X86_64_INTEGER_CLASS
;
2141 else if (bytes
<= 12)
2143 classes
[0] = X86_64_INTEGER_CLASS
;
2144 classes
[1] = X86_64_INTEGERSI_CLASS
;
2149 classes
[0] = X86_64_INTEGER_CLASS
;
2150 classes
[1] = X86_64_INTEGER_CLASS
;
2157 /* Final merger cleanup. */
2158 for (i
= 0; i
< words
; i
++)
2160 /* If one class is MEMORY, everything should be passed in
2162 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2165 /* The X86_64_SSEUP_CLASS should be always preceded by
2166 X86_64_SSE_CLASS. */
2167 if (classes
[i
] == X86_64_SSEUP_CLASS
2168 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2169 classes
[i
] = X86_64_SSE_CLASS
;
2171 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2172 if (classes
[i
] == X86_64_X87UP_CLASS
2173 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2174 classes
[i
] = X86_64_SSE_CLASS
;
2179 /* Compute alignment needed. We align all types to natural boundaries with
2180 exception of XFmode that is aligned to 64bits. */
2181 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2183 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2186 mode_alignment
= 128;
2187 else if (mode
== XCmode
)
2188 mode_alignment
= 256;
2189 /* Misaligned fields are always returned in memory. */
2190 if (bit_offset
% mode_alignment
)
2194 /* Classification of atomic types. */
2204 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2205 classes
[0] = X86_64_INTEGERSI_CLASS
;
2207 classes
[0] = X86_64_INTEGER_CLASS
;
2211 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2214 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2215 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
2218 if (!(bit_offset
% 64))
2219 classes
[0] = X86_64_SSESF_CLASS
;
2221 classes
[0] = X86_64_SSE_CLASS
;
2224 classes
[0] = X86_64_SSEDF_CLASS
;
2227 classes
[0] = X86_64_X87_CLASS
;
2228 classes
[1] = X86_64_X87UP_CLASS
;
2234 classes
[0] = X86_64_X87_CLASS
;
2235 classes
[1] = X86_64_X87UP_CLASS
;
2236 classes
[2] = X86_64_X87_CLASS
;
2237 classes
[3] = X86_64_X87UP_CLASS
;
2240 classes
[0] = X86_64_SSEDF_CLASS
;
2241 classes
[1] = X86_64_SSEDF_CLASS
;
2244 classes
[0] = X86_64_SSE_CLASS
;
2252 classes
[0] = X86_64_SSE_CLASS
;
2253 classes
[1] = X86_64_SSEUP_CLASS
;
2268 /* Examine the argument and return set number of register required in each
2269 class. Return 0 iff parameter should be passed in memory. */
2271 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2272 int *int_nregs
, int *sse_nregs
)
2274 enum x86_64_reg_class
class[MAX_CLASSES
];
2275 int n
= classify_argument (mode
, type
, class, 0);
2281 for (n
--; n
>= 0; n
--)
2284 case X86_64_INTEGER_CLASS
:
2285 case X86_64_INTEGERSI_CLASS
:
2288 case X86_64_SSE_CLASS
:
2289 case X86_64_SSESF_CLASS
:
2290 case X86_64_SSEDF_CLASS
:
2293 case X86_64_NO_CLASS
:
2294 case X86_64_SSEUP_CLASS
:
2296 case X86_64_X87_CLASS
:
2297 case X86_64_X87UP_CLASS
:
2301 case X86_64_MEMORY_CLASS
:
2306 /* Construct container for the argument used by GCC interface. See
2307 FUNCTION_ARG for the detailed description. */
2309 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2310 int nintregs
, int nsseregs
, const int * intreg
,
2313 enum machine_mode tmpmode
;
2315 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2316 enum x86_64_reg_class
class[MAX_CLASSES
];
2320 int needed_sseregs
, needed_intregs
;
2321 rtx exp
[MAX_CLASSES
];
2324 n
= classify_argument (mode
, type
, class, 0);
2325 if (TARGET_DEBUG_ARG
)
2328 fprintf (stderr
, "Memory class\n");
2331 fprintf (stderr
, "Classes:");
2332 for (i
= 0; i
< n
; i
++)
2334 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2336 fprintf (stderr
, "\n");
2341 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2343 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2346 /* First construct simple cases. Avoid SCmode, since we want to use
2347 single register to pass this type. */
2348 if (n
== 1 && mode
!= SCmode
)
2351 case X86_64_INTEGER_CLASS
:
2352 case X86_64_INTEGERSI_CLASS
:
2353 return gen_rtx_REG (mode
, intreg
[0]);
2354 case X86_64_SSE_CLASS
:
2355 case X86_64_SSESF_CLASS
:
2356 case X86_64_SSEDF_CLASS
:
2357 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2358 case X86_64_X87_CLASS
:
2359 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2360 case X86_64_NO_CLASS
:
2361 /* Zero sized array, struct or class. */
2366 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2367 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2369 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2370 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2371 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2372 && class[1] == X86_64_INTEGER_CLASS
2373 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2374 && intreg
[0] + 1 == intreg
[1])
2375 return gen_rtx_REG (mode
, intreg
[0]);
2377 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2378 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2379 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2381 /* Otherwise figure out the entries of the PARALLEL. */
2382 for (i
= 0; i
< n
; i
++)
2386 case X86_64_NO_CLASS
:
2388 case X86_64_INTEGER_CLASS
:
2389 case X86_64_INTEGERSI_CLASS
:
2390 /* Merge TImodes on aligned occasions here too. */
2391 if (i
* 8 + 8 > bytes
)
2392 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2393 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2397 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2398 if (tmpmode
== BLKmode
)
2400 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2401 gen_rtx_REG (tmpmode
, *intreg
),
2405 case X86_64_SSESF_CLASS
:
2406 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2407 gen_rtx_REG (SFmode
,
2408 SSE_REGNO (sse_regno
)),
2412 case X86_64_SSEDF_CLASS
:
2413 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2414 gen_rtx_REG (DFmode
,
2415 SSE_REGNO (sse_regno
)),
2419 case X86_64_SSE_CLASS
:
2420 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2424 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2425 gen_rtx_REG (tmpmode
,
2426 SSE_REGNO (sse_regno
)),
2428 if (tmpmode
== TImode
)
2436 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2437 for (i
= 0; i
< nexps
; i
++)
2438 XVECEXP (ret
, 0, i
) = exp
[i
];
2442 /* Update the data in CUM to advance over an argument
2443 of mode MODE and data type TYPE.
2444 (TYPE is null for libcalls where that information may not be available.) */
2447 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2448 enum machine_mode mode
, /* current arg mode */
2449 tree type
, /* type of the argument or 0 if lib support */
2450 int named
) /* whether or not the argument was named */
2453 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2454 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2456 if (TARGET_DEBUG_ARG
)
2458 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2459 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2462 int int_nregs
, sse_nregs
;
2463 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2464 cum
->words
+= words
;
2465 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2467 cum
->nregs
-= int_nregs
;
2468 cum
->sse_nregs
-= sse_nregs
;
2469 cum
->regno
+= int_nregs
;
2470 cum
->sse_regno
+= sse_nregs
;
2473 cum
->words
+= words
;
2477 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2478 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2480 cum
->sse_words
+= words
;
2481 cum
->sse_nregs
-= 1;
2482 cum
->sse_regno
+= 1;
2483 if (cum
->sse_nregs
<= 0)
2489 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2490 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2492 cum
->mmx_words
+= words
;
2493 cum
->mmx_nregs
-= 1;
2494 cum
->mmx_regno
+= 1;
2495 if (cum
->mmx_nregs
<= 0)
2503 cum
->words
+= words
;
2504 cum
->nregs
-= words
;
2505 cum
->regno
+= words
;
2507 if (cum
->nregs
<= 0)
2517 /* Define where to put the arguments to a function.
2518 Value is zero to push the argument on the stack,
2519 or a hard register in which to store the argument.
2521 MODE is the argument's machine mode.
2522 TYPE is the data type of the argument (as a tree).
2523 This is null for libcalls where that information may
2525 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2526 the preceding args and about the function being called.
2527 NAMED is nonzero if this argument is a named parameter
2528 (otherwise it is an extra parameter matching an ellipsis). */
2531 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2532 enum machine_mode mode
, /* current arg mode */
2533 tree type
, /* type of the argument or 0 if lib support */
2534 int named
) /* != 0 for normal args, == 0 for ... args */
2538 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2539 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2540 static bool warnedsse
, warnedmmx
;
2542 /* Handle a hidden AL argument containing number of registers for varargs
2543 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2545 if (mode
== VOIDmode
)
2548 return GEN_INT (cum
->maybe_vaarg
2549 ? (cum
->sse_nregs
< 0
2557 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2558 &x86_64_int_parameter_registers
[cum
->regno
],
2563 /* For now, pass fp/complex values on the stack. */
2575 if (words
<= cum
->nregs
)
2577 int regno
= cum
->regno
;
2579 /* Fastcall allocates the first two DWORD (SImode) or
2580 smaller arguments to ECX and EDX. */
2583 if (mode
== BLKmode
|| mode
== DImode
)
2586 /* ECX not EAX is the first allocated register. */
2590 ret
= gen_rtx_REG (mode
, regno
);
2600 if (!type
|| !AGGREGATE_TYPE_P (type
))
2602 if (!TARGET_SSE
&& !warnedmmx
&& cum
->warn_sse
)
2605 warning ("SSE vector argument without SSE enabled "
2609 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2616 if (!type
|| !AGGREGATE_TYPE_P (type
))
2618 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2621 warning ("MMX vector argument without MMX enabled "
2625 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2630 if (TARGET_DEBUG_ARG
)
2633 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2634 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2637 print_simple_rtl (stderr
, ret
);
2639 fprintf (stderr
, ", stack");
2641 fprintf (stderr
, " )\n");
2647 /* A C expression that indicates when an argument must be passed by
2648 reference. If nonzero for an argument, a copy of that argument is
2649 made in memory and a pointer to the argument is passed instead of
2650 the argument itself. The pointer is passed in whatever way is
2651 appropriate for passing a pointer to that type. */
2654 function_arg_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2655 enum machine_mode mode ATTRIBUTE_UNUSED
,
2656 tree type
, int named ATTRIBUTE_UNUSED
)
2661 if (type
&& int_size_in_bytes (type
) == -1)
2663 if (TARGET_DEBUG_ARG
)
2664 fprintf (stderr
, "function_arg_pass_by_reference\n");
2671 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2674 contains_128bit_aligned_vector_p (tree type
)
2676 enum machine_mode mode
= TYPE_MODE (type
);
2677 if (SSE_REG_MODE_P (mode
)
2678 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2680 if (TYPE_ALIGN (type
) < 128)
2683 if (AGGREGATE_TYPE_P (type
))
2685 /* Walk the aggregates recursively. */
2686 if (TREE_CODE (type
) == RECORD_TYPE
2687 || TREE_CODE (type
) == UNION_TYPE
2688 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2692 if (TYPE_BINFO (type
) != NULL
2693 && TYPE_BINFO_BASETYPES (type
) != NULL
)
2695 tree bases
= TYPE_BINFO_BASETYPES (type
);
2696 int n_bases
= TREE_VEC_LENGTH (bases
);
2699 for (i
= 0; i
< n_bases
; ++i
)
2701 tree binfo
= TREE_VEC_ELT (bases
, i
);
2702 tree type
= BINFO_TYPE (binfo
);
2704 if (contains_128bit_aligned_vector_p (type
))
2708 /* And now merge the fields of structure. */
2709 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2711 if (TREE_CODE (field
) == FIELD_DECL
2712 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2716 /* Just for use if some languages passes arrays by value. */
2717 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2719 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2728 /* Gives the alignment boundary, in bits, of an argument with the
2729 specified mode and type. */
2732 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2736 align
= TYPE_ALIGN (type
);
2738 align
= GET_MODE_ALIGNMENT (mode
);
2739 if (align
< PARM_BOUNDARY
)
2740 align
= PARM_BOUNDARY
;
2743 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2744 make an exception for SSE modes since these require 128bit
2747 The handling here differs from field_alignment. ICC aligns MMX
2748 arguments to 4 byte boundaries, while structure fields are aligned
2749 to 8 byte boundaries. */
2752 if (!SSE_REG_MODE_P (mode
))
2753 align
= PARM_BOUNDARY
;
2757 if (!contains_128bit_aligned_vector_p (type
))
2758 align
= PARM_BOUNDARY
;
2766 /* Return true if N is a possible register number of function value. */
2768 ix86_function_value_regno_p (int regno
)
2772 return ((regno
) == 0
2773 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2774 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2776 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2777 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2778 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2781 /* Define how to find the value returned by a function.
2782 VALTYPE is the data type of the value (as a tree).
2783 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2784 otherwise, FUNC is 0. */
2786 ix86_function_value (tree valtype
)
2790 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2791 REGPARM_MAX
, SSE_REGPARM_MAX
,
2792 x86_64_int_return_registers
, 0);
2793 /* For zero sized structures, construct_container return NULL, but we need
2794 to keep rest of compiler happy by returning meaningful value. */
2796 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2800 return gen_rtx_REG (TYPE_MODE (valtype
),
2801 ix86_value_regno (TYPE_MODE (valtype
)));
2804 /* Return false iff type is returned in memory. */
2806 ix86_return_in_memory (tree type
)
2808 int needed_intregs
, needed_sseregs
, size
;
2809 enum machine_mode mode
= TYPE_MODE (type
);
2812 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2814 if (mode
== BLKmode
)
2817 size
= int_size_in_bytes (type
);
2819 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2822 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2824 /* User-created vectors small enough to fit in EAX. */
2828 /* MMX/3dNow values are returned on the stack, since we've
2829 got to EMMS/FEMMS before returning. */
2833 /* SSE values are returned in XMM0. */
2834 /* ??? Except when it doesn't exist? We have a choice of
2835 either (1) being abi incompatible with a -march switch,
2836 or (2) generating an error here. Given no good solution,
2837 I think the safest thing is one warning. The user won't
2838 be able to use -Werror, but.... */
2849 warning ("SSE vector return without SSE enabled "
2864 /* Define how to find the value returned by a library function
2865 assuming the value has mode MODE. */
2867 ix86_libcall_value (enum machine_mode mode
)
2877 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2880 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2885 return gen_rtx_REG (mode
, 0);
2889 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2892 /* Given a mode, return the register to use for a return value. */
2895 ix86_value_regno (enum machine_mode mode
)
2897 /* Floating point return values in %st(0). */
2898 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2899 return FIRST_FLOAT_REG
;
2900 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2901 we prevent this case when sse is not available. */
2902 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
2903 return FIRST_SSE_REG
;
2904 /* Everything else in %eax. */
2908 /* Create the va_list data type. */
2911 ix86_build_builtin_va_list (void)
2913 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2915 /* For i386 we use plain pointer to argument area. */
2917 return build_pointer_type (char_type_node
);
2919 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2920 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2922 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2923 unsigned_type_node
);
2924 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2925 unsigned_type_node
);
2926 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2928 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2931 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2932 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2933 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2934 DECL_FIELD_CONTEXT (f_sav
) = record
;
2936 TREE_CHAIN (record
) = type_decl
;
2937 TYPE_NAME (record
) = type_decl
;
2938 TYPE_FIELDS (record
) = f_gpr
;
2939 TREE_CHAIN (f_gpr
) = f_fpr
;
2940 TREE_CHAIN (f_fpr
) = f_ovf
;
2941 TREE_CHAIN (f_ovf
) = f_sav
;
2943 layout_type (record
);
2945 /* The correct type is an array type of one element. */
2946 return build_array_type (record
, build_index_type (size_zero_node
));
2949 /* Perform any needed actions needed for a function that is receiving a
2950 variable number of arguments.
2954 MODE and TYPE are the mode and type of the current parameter.
2956 PRETEND_SIZE is a variable that should be set to the amount of stack
2957 that must be pushed by the prolog to pretend that our caller pushed
2960 Normally, this macro will push all remaining incoming registers on the
2961 stack and set PRETEND_SIZE to the length of the registers pushed. */
2964 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2965 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
2968 CUMULATIVE_ARGS next_cum
;
2969 rtx save_area
= NULL_RTX
, mem
;
2982 /* Indicate to allocate space on the stack for varargs save area. */
2983 ix86_save_varrargs_registers
= 1;
2985 cfun
->stack_alignment_needed
= 128;
2987 fntype
= TREE_TYPE (current_function_decl
);
2988 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2989 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2990 != void_type_node
));
2992 /* For varargs, we do not want to skip the dummy va_dcl argument.
2993 For stdargs, we do want to skip the last named argument. */
2996 function_arg_advance (&next_cum
, mode
, type
, 1);
2999 save_area
= frame_pointer_rtx
;
3001 set
= get_varargs_alias_set ();
3003 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3005 mem
= gen_rtx_MEM (Pmode
,
3006 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3007 set_mem_alias_set (mem
, set
);
3008 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3009 x86_64_int_parameter_registers
[i
]));
3012 if (next_cum
.sse_nregs
)
3014 /* Now emit code to save SSE registers. The AX parameter contains number
3015 of SSE parameter registers used to call this function. We use
3016 sse_prologue_save insn template that produces computed jump across
3017 SSE saves. We need some preparation work to get this working. */
3019 label
= gen_label_rtx ();
3020 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3022 /* Compute address to jump to :
3023 label - 5*eax + nnamed_sse_arguments*5 */
3024 tmp_reg
= gen_reg_rtx (Pmode
);
3025 nsse_reg
= gen_reg_rtx (Pmode
);
3026 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3027 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3028 gen_rtx_MULT (Pmode
, nsse_reg
,
3030 if (next_cum
.sse_regno
)
3033 gen_rtx_CONST (DImode
,
3034 gen_rtx_PLUS (DImode
,
3036 GEN_INT (next_cum
.sse_regno
* 4))));
3038 emit_move_insn (nsse_reg
, label_ref
);
3039 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3041 /* Compute address of memory block we save into. We always use pointer
3042 pointing 127 bytes after first byte to store - this is needed to keep
3043 instruction size limited by 4 bytes. */
3044 tmp_reg
= gen_reg_rtx (Pmode
);
3045 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3046 plus_constant (save_area
,
3047 8 * REGPARM_MAX
+ 127)));
3048 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3049 set_mem_alias_set (mem
, set
);
3050 set_mem_align (mem
, BITS_PER_WORD
);
3052 /* And finally do the dirty job! */
3053 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3054 GEN_INT (next_cum
.sse_regno
), label
));
3059 /* Implement va_start. */
3062 ix86_va_start (tree valist
, rtx nextarg
)
3064 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3065 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3066 tree gpr
, fpr
, ovf
, sav
, t
;
3068 /* Only 64bit target needs something special. */
3071 std_expand_builtin_va_start (valist
, nextarg
);
3075 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3076 f_fpr
= TREE_CHAIN (f_gpr
);
3077 f_ovf
= TREE_CHAIN (f_fpr
);
3078 f_sav
= TREE_CHAIN (f_ovf
);
3080 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3081 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
3082 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
3083 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
3084 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
3086 /* Count number of gp and fp argument registers used. */
3087 words
= current_function_args_info
.words
;
3088 n_gpr
= current_function_args_info
.regno
;
3089 n_fpr
= current_function_args_info
.sse_regno
;
3091 if (TARGET_DEBUG_ARG
)
3092 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3093 (int) words
, (int) n_gpr
, (int) n_fpr
);
3095 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3096 build_int_2 (n_gpr
* 8, 0));
3097 TREE_SIDE_EFFECTS (t
) = 1;
3098 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3100 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3101 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
3102 TREE_SIDE_EFFECTS (t
) = 1;
3103 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3105 /* Find the overflow area. */
3106 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3108 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3109 build_int_2 (words
* UNITS_PER_WORD
, 0));
3110 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3111 TREE_SIDE_EFFECTS (t
) = 1;
3112 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3114 /* Find the register save area.
3115 Prologue of the function save it right above stack frame. */
3116 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3117 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3118 TREE_SIDE_EFFECTS (t
) = 1;
3119 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3122 /* Implement va_arg. */
3124 ix86_va_arg (tree valist
, tree type
)
3126 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3127 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3128 tree gpr
, fpr
, ovf
, sav
, t
;
3130 rtx lab_false
, lab_over
= NULL_RTX
;
3135 /* Only 64bit target needs something special. */
3138 return std_expand_builtin_va_arg (valist
, type
);
3141 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3142 f_fpr
= TREE_CHAIN (f_gpr
);
3143 f_ovf
= TREE_CHAIN (f_fpr
);
3144 f_sav
= TREE_CHAIN (f_ovf
);
3146 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3147 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
3148 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
3149 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
3150 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
3152 size
= int_size_in_bytes (type
);
3155 /* Passed by reference. */
3157 type
= build_pointer_type (type
);
3158 size
= int_size_in_bytes (type
);
3160 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3162 container
= construct_container (TYPE_MODE (type
), type
, 0,
3163 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3165 * Pull the value out of the saved registers ...
3168 addr_rtx
= gen_reg_rtx (Pmode
);
3172 rtx int_addr_rtx
, sse_addr_rtx
;
3173 int needed_intregs
, needed_sseregs
;
3176 lab_over
= gen_label_rtx ();
3177 lab_false
= gen_label_rtx ();
3179 examine_argument (TYPE_MODE (type
), type
, 0,
3180 &needed_intregs
, &needed_sseregs
);
3183 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3184 || TYPE_ALIGN (type
) > 128);
3186 /* In case we are passing structure, verify that it is consecutive block
3187 on the register save area. If not we need to do moves. */
3188 if (!need_temp
&& !REG_P (container
))
3190 /* Verify that all registers are strictly consecutive */
3191 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3195 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3197 rtx slot
= XVECEXP (container
, 0, i
);
3198 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3199 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3207 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3209 rtx slot
= XVECEXP (container
, 0, i
);
3210 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3211 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3218 int_addr_rtx
= addr_rtx
;
3219 sse_addr_rtx
= addr_rtx
;
3223 int_addr_rtx
= gen_reg_rtx (Pmode
);
3224 sse_addr_rtx
= gen_reg_rtx (Pmode
);
3226 /* First ensure that we fit completely in registers. */
3229 emit_cmp_and_jump_insns (expand_expr
3230 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
3231 GEN_INT ((REGPARM_MAX
- needed_intregs
+
3232 1) * 8), GE
, const1_rtx
, SImode
,
3237 emit_cmp_and_jump_insns (expand_expr
3238 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
3239 GEN_INT ((SSE_REGPARM_MAX
-
3240 needed_sseregs
+ 1) * 16 +
3241 REGPARM_MAX
* 8), GE
, const1_rtx
,
3242 SImode
, 1, lab_false
);
3245 /* Compute index to start of area used for integer regs. */
3248 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3249 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
3250 if (r
!= int_addr_rtx
)
3251 emit_move_insn (int_addr_rtx
, r
);
3255 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3256 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
3257 if (r
!= sse_addr_rtx
)
3258 emit_move_insn (sse_addr_rtx
, r
);
3266 /* Never use the memory itself, as it has the alias set. */
3267 x
= XEXP (assign_temp (type
, 0, 1, 0), 0);
3268 mem
= gen_rtx_MEM (BLKmode
, x
);
3269 force_operand (x
, addr_rtx
);
3270 set_mem_alias_set (mem
, get_varargs_alias_set ());
3271 set_mem_align (mem
, BITS_PER_UNIT
);
3273 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3275 rtx slot
= XVECEXP (container
, 0, i
);
3276 rtx reg
= XEXP (slot
, 0);
3277 enum machine_mode mode
= GET_MODE (reg
);
3283 if (SSE_REGNO_P (REGNO (reg
)))
3285 src_addr
= sse_addr_rtx
;
3286 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3290 src_addr
= int_addr_rtx
;
3291 src_offset
= REGNO (reg
) * 8;
3293 src_mem
= gen_rtx_MEM (mode
, src_addr
);
3294 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
3295 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
3296 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
3297 emit_move_insn (dest_mem
, src_mem
);
3304 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3305 build_int_2 (needed_intregs
* 8, 0));
3306 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3307 TREE_SIDE_EFFECTS (t
) = 1;
3308 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3313 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3314 build_int_2 (needed_sseregs
* 16, 0));
3315 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3316 TREE_SIDE_EFFECTS (t
) = 1;
3317 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3320 emit_jump_insn (gen_jump (lab_over
));
3322 emit_label (lab_false
);
3325 /* ... otherwise out of the overflow area. */
3327 /* Care for on-stack alignment if needed. */
3328 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3332 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3333 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
3334 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
3338 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
3340 emit_move_insn (addr_rtx
, r
);
3343 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
3344 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
3345 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3346 TREE_SIDE_EFFECTS (t
) = 1;
3347 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3350 emit_label (lab_over
);
3354 r
= gen_rtx_MEM (Pmode
, addr_rtx
);
3355 set_mem_alias_set (r
, get_varargs_alias_set ());
3356 emit_move_insn (addr_rtx
, r
);
3362 /* Return nonzero if OP is either a i387 or SSE fp register. */
3364 any_fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3366 return ANY_FP_REG_P (op
);
3369 /* Return nonzero if OP is an i387 fp register. */
3371 fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3373 return FP_REG_P (op
);
3376 /* Return nonzero if OP is a non-fp register_operand. */
3378 register_and_not_any_fp_reg_operand (rtx op
, enum machine_mode mode
)
3380 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
3383 /* Return nonzero if OP is a register operand other than an
3384 i387 fp register. */
3386 register_and_not_fp_reg_operand (rtx op
, enum machine_mode mode
)
3388 return register_operand (op
, mode
) && !FP_REG_P (op
);
3391 /* Return nonzero if OP is general operand representable on x86_64. */
3394 x86_64_general_operand (rtx op
, enum machine_mode mode
)
3397 return general_operand (op
, mode
);
3398 if (nonimmediate_operand (op
, mode
))
3400 return x86_64_sign_extended_value (op
);
3403 /* Return nonzero if OP is general operand representable on x86_64
3404 as either sign extended or zero extended constant. */
3407 x86_64_szext_general_operand (rtx op
, enum machine_mode mode
)
3410 return general_operand (op
, mode
);
3411 if (nonimmediate_operand (op
, mode
))
3413 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3416 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3419 x86_64_nonmemory_operand (rtx op
, enum machine_mode mode
)
3422 return nonmemory_operand (op
, mode
);
3423 if (register_operand (op
, mode
))
3425 return x86_64_sign_extended_value (op
);
3428 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3431 x86_64_movabs_operand (rtx op
, enum machine_mode mode
)
3433 if (!TARGET_64BIT
|| !flag_pic
)
3434 return nonmemory_operand (op
, mode
);
3435 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
3437 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
3442 /* Return nonzero if OPNUM's MEM should be matched
3443 in movabs* patterns. */
3446 ix86_check_movabs (rtx insn
, int opnum
)
3450 set
= PATTERN (insn
);
3451 if (GET_CODE (set
) == PARALLEL
)
3452 set
= XVECEXP (set
, 0, 0);
3453 if (GET_CODE (set
) != SET
)
3455 mem
= XEXP (set
, opnum
);
3456 while (GET_CODE (mem
) == SUBREG
)
3457 mem
= SUBREG_REG (mem
);
3458 if (GET_CODE (mem
) != MEM
)
3460 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3463 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3466 x86_64_szext_nonmemory_operand (rtx op
, enum machine_mode mode
)
3469 return nonmemory_operand (op
, mode
);
3470 if (register_operand (op
, mode
))
3472 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3475 /* Return nonzero if OP is immediate operand representable on x86_64. */
3478 x86_64_immediate_operand (rtx op
, enum machine_mode mode
)
3481 return immediate_operand (op
, mode
);
3482 return x86_64_sign_extended_value (op
);
3485 /* Return nonzero if OP is immediate operand representable on x86_64. */
3488 x86_64_zext_immediate_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3490 return x86_64_zero_extended_value (op
);
3493 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3494 for shift & compare patterns, as shifting by 0 does not change flags),
3495 else return zero. */
3498 const_int_1_31_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3500 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3503 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3504 reference and a constant. */
3507 symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3509 switch (GET_CODE (op
))
3517 if (GET_CODE (op
) == SYMBOL_REF
3518 || GET_CODE (op
) == LABEL_REF
3519 || (GET_CODE (op
) == UNSPEC
3520 && (XINT (op
, 1) == UNSPEC_GOT
3521 || XINT (op
, 1) == UNSPEC_GOTOFF
3522 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3524 if (GET_CODE (op
) != PLUS
3525 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3529 if (GET_CODE (op
) == SYMBOL_REF
3530 || GET_CODE (op
) == LABEL_REF
)
3532 /* Only @GOTOFF gets offsets. */
3533 if (GET_CODE (op
) != UNSPEC
3534 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3537 op
= XVECEXP (op
, 0, 0);
3538 if (GET_CODE (op
) == SYMBOL_REF
3539 || GET_CODE (op
) == LABEL_REF
)
3548 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3551 pic_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3553 if (GET_CODE (op
) != CONST
)
3558 if (GET_CODE (op
) == UNSPEC
3559 && XINT (op
, 1) == UNSPEC_GOTPCREL
)
3561 if (GET_CODE (op
) == PLUS
3562 && GET_CODE (XEXP (op
, 0)) == UNSPEC
3563 && XINT (XEXP (op
, 0), 1) == UNSPEC_GOTPCREL
)
3568 if (GET_CODE (op
) == UNSPEC
)
3570 if (GET_CODE (op
) != PLUS
3571 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3574 if (GET_CODE (op
) == UNSPEC
)
3580 /* Return true if OP is a symbolic operand that resolves locally. */
3583 local_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3585 if (GET_CODE (op
) == CONST
3586 && GET_CODE (XEXP (op
, 0)) == PLUS
3587 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3588 op
= XEXP (XEXP (op
, 0), 0);
3590 if (GET_CODE (op
) == LABEL_REF
)
3593 if (GET_CODE (op
) != SYMBOL_REF
)
3596 if (SYMBOL_REF_LOCAL_P (op
))
3599 /* There is, however, a not insubstantial body of code in the rest of
3600 the compiler that assumes it can just stick the results of
3601 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3602 /* ??? This is a hack. Should update the body of the compiler to
3603 always create a DECL an invoke targetm.encode_section_info. */
3604 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3605 internal_label_prefix_len
) == 0)
3611 /* Test for various thread-local symbols. */
3614 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3616 if (GET_CODE (op
) != SYMBOL_REF
)
3618 return SYMBOL_REF_TLS_MODEL (op
);
3622 tls_symbolic_operand_1 (rtx op
, enum tls_model kind
)
3624 if (GET_CODE (op
) != SYMBOL_REF
)
3626 return SYMBOL_REF_TLS_MODEL (op
) == kind
;
3630 global_dynamic_symbolic_operand (rtx op
,
3631 enum machine_mode mode ATTRIBUTE_UNUSED
)
3633 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3637 local_dynamic_symbolic_operand (rtx op
,
3638 enum machine_mode mode ATTRIBUTE_UNUSED
)
3640 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3644 initial_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3646 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3650 local_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3652 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3655 /* Test for a valid operand for a call instruction. Don't allow the
3656 arg pointer register or virtual regs since they may decay into
3657 reg + const, which the patterns can't handle. */
3660 call_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3662 /* Disallow indirect through a virtual register. This leads to
3663 compiler aborts when trying to eliminate them. */
3664 if (GET_CODE (op
) == REG
3665 && (op
== arg_pointer_rtx
3666 || op
== frame_pointer_rtx
3667 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3668 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3671 /* Disallow `call 1234'. Due to varying assembler lameness this
3672 gets either rejected or translated to `call .+1234'. */
3673 if (GET_CODE (op
) == CONST_INT
)
3676 /* Explicitly allow SYMBOL_REF even if pic. */
3677 if (GET_CODE (op
) == SYMBOL_REF
)
3680 /* Otherwise we can allow any general_operand in the address. */
3681 return general_operand (op
, Pmode
);
3684 /* Test for a valid operand for a call instruction. Don't allow the
3685 arg pointer register or virtual regs since they may decay into
3686 reg + const, which the patterns can't handle. */
3689 sibcall_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3691 /* Disallow indirect through a virtual register. This leads to
3692 compiler aborts when trying to eliminate them. */
3693 if (GET_CODE (op
) == REG
3694 && (op
== arg_pointer_rtx
3695 || op
== frame_pointer_rtx
3696 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3697 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3700 /* Explicitly allow SYMBOL_REF even if pic. */
3701 if (GET_CODE (op
) == SYMBOL_REF
)
3704 /* Otherwise we can only allow register operands. */
3705 return register_operand (op
, Pmode
);
3709 constant_call_address_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3711 if (GET_CODE (op
) == CONST
3712 && GET_CODE (XEXP (op
, 0)) == PLUS
3713 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3714 op
= XEXP (XEXP (op
, 0), 0);
3715 return GET_CODE (op
) == SYMBOL_REF
;
3718 /* Match exactly zero and one. */
3721 const0_operand (rtx op
, enum machine_mode mode
)
3723 return op
== CONST0_RTX (mode
);
3727 const1_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3729 return op
== const1_rtx
;
3732 /* Match 2, 4, or 8. Used for leal multiplicands. */
3735 const248_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3737 return (GET_CODE (op
) == CONST_INT
3738 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3742 const_0_to_3_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3744 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 4);
3748 const_0_to_7_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3750 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 8);
3754 const_0_to_15_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3756 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 16);
3760 const_0_to_255_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3762 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 256);
3766 /* True if this is a constant appropriate for an increment or decrement. */
3769 incdec_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3771 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3772 registers, since carry flag is not set. */
3773 if (TARGET_PENTIUM4
&& !optimize_size
)
3775 return op
== const1_rtx
|| op
== constm1_rtx
;
3778 /* Return nonzero if OP is acceptable as operand of DImode shift
3782 shiftdi_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3785 return nonimmediate_operand (op
, mode
);
3787 return register_operand (op
, mode
);
3790 /* Return false if this is the stack pointer, or any other fake
3791 register eliminable to the stack pointer. Otherwise, this is
3794 This is used to prevent esp from being used as an index reg.
3795 Which would only happen in pathological cases. */
3798 reg_no_sp_operand (rtx op
, enum machine_mode mode
)
3801 if (GET_CODE (t
) == SUBREG
)
3803 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3806 return register_operand (op
, mode
);
3810 mmx_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3812 return MMX_REG_P (op
);
3815 /* Return false if this is any eliminable register. Otherwise
3819 general_no_elim_operand (rtx op
, enum machine_mode mode
)
3822 if (GET_CODE (t
) == SUBREG
)
3824 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3825 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3826 || t
== virtual_stack_dynamic_rtx
)
3829 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3830 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3833 return general_operand (op
, mode
);
3836 /* Return false if this is any eliminable register. Otherwise
3837 register_operand or const_int. */
3840 nonmemory_no_elim_operand (rtx op
, enum machine_mode mode
)
3843 if (GET_CODE (t
) == SUBREG
)
3845 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3846 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3847 || t
== virtual_stack_dynamic_rtx
)
3850 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3853 /* Return false if this is any eliminable register or stack register,
3854 otherwise work like register_operand. */
3857 index_register_operand (rtx op
, enum machine_mode mode
)
3860 if (GET_CODE (t
) == SUBREG
)
3864 if (t
== arg_pointer_rtx
3865 || t
== frame_pointer_rtx
3866 || t
== virtual_incoming_args_rtx
3867 || t
== virtual_stack_vars_rtx
3868 || t
== virtual_stack_dynamic_rtx
3869 || REGNO (t
) == STACK_POINTER_REGNUM
)
3872 return general_operand (op
, mode
);
3875 /* Return true if op is a Q_REGS class register. */
3878 q_regs_operand (rtx op
, enum machine_mode mode
)
3880 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3882 if (GET_CODE (op
) == SUBREG
)
3883 op
= SUBREG_REG (op
);
3884 return ANY_QI_REG_P (op
);
3887 /* Return true if op is an flags register. */
3890 flags_reg_operand (rtx op
, enum machine_mode mode
)
3892 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3894 return REG_P (op
) && REGNO (op
) == FLAGS_REG
&& GET_MODE (op
) != VOIDmode
;
3897 /* Return true if op is a NON_Q_REGS class register. */
3900 non_q_regs_operand (rtx op
, enum machine_mode mode
)
3902 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3904 if (GET_CODE (op
) == SUBREG
)
3905 op
= SUBREG_REG (op
);
3906 return NON_QI_REG_P (op
);
3910 zero_extended_scalar_load_operand (rtx op
,
3911 enum machine_mode mode ATTRIBUTE_UNUSED
)
3914 if (GET_CODE (op
) != MEM
)
3916 op
= maybe_get_pool_constant (op
);
3919 if (GET_CODE (op
) != CONST_VECTOR
)
3922 (GET_MODE_SIZE (GET_MODE (op
)) /
3923 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op
))));
3924 for (n_elts
--; n_elts
> 0; n_elts
--)
3926 rtx elt
= CONST_VECTOR_ELT (op
, n_elts
);
3927 if (elt
!= CONST0_RTX (GET_MODE_INNER (GET_MODE (op
))))
3933 /* Return 1 when OP is operand acceptable for standard SSE move. */
3935 vector_move_operand (rtx op
, enum machine_mode mode
)
3937 if (nonimmediate_operand (op
, mode
))
3939 if (GET_MODE (op
) != mode
&& mode
!= VOIDmode
)
3941 return (op
== CONST0_RTX (GET_MODE (op
)));
3944 /* Return true if op if a valid address, and does not contain
3945 a segment override. */
3948 no_seg_address_operand (rtx op
, enum machine_mode mode
)
3950 struct ix86_address parts
;
3952 if (! address_operand (op
, mode
))
3955 if (! ix86_decompose_address (op
, &parts
))
3958 return parts
.seg
== SEG_DEFAULT
;
3961 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3964 sse_comparison_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3966 enum rtx_code code
= GET_CODE (op
);
3969 /* Operations supported directly. */
3979 /* These are equivalent to ones above in non-IEEE comparisons. */
3986 return !TARGET_IEEE_FP
;
3991 /* Return 1 if OP is a valid comparison operator in valid mode. */
3993 ix86_comparison_operator (rtx op
, enum machine_mode mode
)
3995 enum machine_mode inmode
;
3996 enum rtx_code code
= GET_CODE (op
);
3997 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3999 if (GET_RTX_CLASS (code
) != '<')
4001 inmode
= GET_MODE (XEXP (op
, 0));
4003 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4005 enum rtx_code second_code
, bypass_code
;
4006 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4007 return (bypass_code
== NIL
&& second_code
== NIL
);
4014 if (inmode
== CCmode
|| inmode
== CCGCmode
4015 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
4018 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
4019 if (inmode
== CCmode
)
4023 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
4031 /* Return 1 if OP is a valid comparison operator testing carry flag
4034 ix86_carry_flag_operator (rtx op
, enum machine_mode mode
)
4036 enum machine_mode inmode
;
4037 enum rtx_code code
= GET_CODE (op
);
4039 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4041 if (GET_RTX_CLASS (code
) != '<')
4043 inmode
= GET_MODE (XEXP (op
, 0));
4044 if (GET_CODE (XEXP (op
, 0)) != REG
4045 || REGNO (XEXP (op
, 0)) != 17
4046 || XEXP (op
, 1) != const0_rtx
)
4049 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4051 enum rtx_code second_code
, bypass_code
;
4053 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4054 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4056 code
= ix86_fp_compare_code_to_integer (code
);
4058 else if (inmode
!= CCmode
)
4063 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4066 fcmov_comparison_operator (rtx op
, enum machine_mode mode
)
4068 enum machine_mode inmode
;
4069 enum rtx_code code
= GET_CODE (op
);
4071 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4073 if (GET_RTX_CLASS (code
) != '<')
4075 inmode
= GET_MODE (XEXP (op
, 0));
4076 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4078 enum rtx_code second_code
, bypass_code
;
4080 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4081 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4083 code
= ix86_fp_compare_code_to_integer (code
);
4085 /* i387 supports just limited amount of conditional codes. */
4088 case LTU
: case GTU
: case LEU
: case GEU
:
4089 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4092 case ORDERED
: case UNORDERED
:
4100 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4103 promotable_binary_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4105 switch (GET_CODE (op
))
4108 /* Modern CPUs have same latency for HImode and SImode multiply,
4109 but 386 and 486 do HImode multiply faster. */
4110 return ix86_tune
> PROCESSOR_I486
;
4122 /* Nearly general operand, but accept any const_double, since we wish
4123 to be able to drop them into memory rather than have them get pulled
4127 cmp_fp_expander_operand (rtx op
, enum machine_mode mode
)
4129 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4131 if (GET_CODE (op
) == CONST_DOUBLE
)
4133 return general_operand (op
, mode
);
4136 /* Match an SI or HImode register for a zero_extract. */
4139 ext_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4142 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
4143 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
4146 if (!register_operand (op
, VOIDmode
))
4149 /* Be careful to accept only registers having upper parts. */
4150 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
4151 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
4154 /* Return 1 if this is a valid binary floating-point operation.
4155 OP is the expression matched, and MODE is its mode. */
4158 binary_fp_operator (rtx op
, enum machine_mode mode
)
4160 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4163 switch (GET_CODE (op
))
4169 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
4177 mult_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4179 return GET_CODE (op
) == MULT
;
4183 div_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4185 return GET_CODE (op
) == DIV
;
4189 arith_or_logical_operator (rtx op
, enum machine_mode mode
)
4191 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
4192 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
4193 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
4196 /* Returns 1 if OP is memory operand with a displacement. */
4199 memory_displacement_operand (rtx op
, enum machine_mode mode
)
4201 struct ix86_address parts
;
4203 if (! memory_operand (op
, mode
))
4206 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
4209 return parts
.disp
!= NULL_RTX
;
4212 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4213 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4215 ??? It seems likely that this will only work because cmpsi is an
4216 expander, and no actual insns use this. */
4219 cmpsi_operand (rtx op
, enum machine_mode mode
)
4221 if (nonimmediate_operand (op
, mode
))
4224 if (GET_CODE (op
) == AND
4225 && GET_MODE (op
) == SImode
4226 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
4227 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
4228 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
4229 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
4230 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
4231 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
4237 /* Returns 1 if OP is memory operand that can not be represented by the
4241 long_memory_operand (rtx op
, enum machine_mode mode
)
4243 if (! memory_operand (op
, mode
))
4246 return memory_address_length (op
) != 0;
4249 /* Return nonzero if the rtx is known aligned. */
4252 aligned_operand (rtx op
, enum machine_mode mode
)
4254 struct ix86_address parts
;
4256 if (!general_operand (op
, mode
))
4259 /* Registers and immediate operands are always "aligned". */
4260 if (GET_CODE (op
) != MEM
)
4263 /* Don't even try to do any aligned optimizations with volatiles. */
4264 if (MEM_VOLATILE_P (op
))
4269 /* Pushes and pops are only valid on the stack pointer. */
4270 if (GET_CODE (op
) == PRE_DEC
4271 || GET_CODE (op
) == POST_INC
)
4274 /* Decode the address. */
4275 if (! ix86_decompose_address (op
, &parts
))
4278 /* Look for some component that isn't known to be aligned. */
4282 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
4287 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
4292 if (GET_CODE (parts
.disp
) != CONST_INT
4293 || (INTVAL (parts
.disp
) & 3) != 0)
4297 /* Didn't find one -- this must be an aligned address. */
4301 /* Initialize the table of extra 80387 mathematical constants. */
4304 init_ext_80387_constants (void)
4306 static const char * cst
[5] =
4308 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4309 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4310 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4311 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4312 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4316 for (i
= 0; i
< 5; i
++)
4318 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4319 /* Ensure each constant is rounded to XFmode precision. */
4320 real_convert (&ext_80387_constants_table
[i
],
4321 XFmode
, &ext_80387_constants_table
[i
]);
4324 ext_80387_constants_init
= 1;
4327 /* Return true if the constant is something that can be loaded with
4328 a special instruction. */
4331 standard_80387_constant_p (rtx x
)
4333 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4336 if (x
== CONST0_RTX (GET_MODE (x
)))
4338 if (x
== CONST1_RTX (GET_MODE (x
)))
4341 /* For XFmode constants, try to find a special 80387 instruction on
4342 those CPUs that benefit from them. */
4343 if (GET_MODE (x
) == XFmode
4344 && x86_ext_80387_constants
& TUNEMASK
)
4349 if (! ext_80387_constants_init
)
4350 init_ext_80387_constants ();
4352 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4353 for (i
= 0; i
< 5; i
++)
4354 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4361 /* Return the opcode of the special instruction to be used to load
4365 standard_80387_constant_opcode (rtx x
)
4367 switch (standard_80387_constant_p (x
))
4387 /* Return the CONST_DOUBLE representing the 80387 constant that is
4388 loaded by the specified special instruction. The argument IDX
4389 matches the return value from standard_80387_constant_p. */
4392 standard_80387_constant_rtx (int idx
)
4396 if (! ext_80387_constants_init
)
4397 init_ext_80387_constants ();
4413 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4417 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4420 standard_sse_constant_p (rtx x
)
4422 if (x
== const0_rtx
)
4424 return (x
== CONST0_RTX (GET_MODE (x
)));
4427 /* Returns 1 if OP contains a symbol reference */
4430 symbolic_reference_mentioned_p (rtx op
)
4435 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4438 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4439 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4445 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4446 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4450 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4457 /* Return 1 if it is appropriate to emit `ret' instructions in the
4458 body of a function. Do this only if the epilogue is simple, needing a
4459 couple of insns. Prior to reloading, we can't tell how many registers
4460 must be saved, so return 0 then. Return 0 if there is no frame
4461 marker to de-allocate.
4463 If NON_SAVING_SETJMP is defined and true, then it is not possible
4464 for the epilogue to be simple, so return 0. This is a special case
4465 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4466 until final, but jump_optimize may need to know sooner if a
4470 ix86_can_use_return_insn_p (void)
4472 struct ix86_frame frame
;
4474 #ifdef NON_SAVING_SETJMP
4475 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
4479 if (! reload_completed
|| frame_pointer_needed
)
4482 /* Don't allow more than 32 pop, since that's all we can do
4483 with one instruction. */
4484 if (current_function_pops_args
4485 && current_function_args_size
>= 32768)
4488 ix86_compute_frame_layout (&frame
);
4489 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4492 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4494 x86_64_sign_extended_value (rtx value
)
4496 switch (GET_CODE (value
))
4498 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4499 to be at least 32 and this all acceptable constants are
4500 represented as CONST_INT. */
4502 if (HOST_BITS_PER_WIDE_INT
== 32)
4506 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
4507 return trunc_int_for_mode (val
, SImode
) == val
;
4511 /* For certain code models, the symbolic references are known to fit.
4512 in CM_SMALL_PIC model we know it fits if it is local to the shared
4513 library. Don't count TLS SYMBOL_REFs here, since they should fit
4514 only if inside of UNSPEC handled below. */
4516 /* TLS symbols are not constant. */
4517 if (tls_symbolic_operand (value
, Pmode
))
4519 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
4521 /* For certain code models, the code is near as well. */
4523 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
4524 || ix86_cmodel
== CM_KERNEL
);
4526 /* We also may accept the offsetted memory references in certain special
4529 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
4530 switch (XINT (XEXP (value
, 0), 1))
4532 case UNSPEC_GOTPCREL
:
4534 case UNSPEC_GOTNTPOFF
:
4540 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4542 rtx op1
= XEXP (XEXP (value
, 0), 0);
4543 rtx op2
= XEXP (XEXP (value
, 0), 1);
4544 HOST_WIDE_INT offset
;
4546 if (ix86_cmodel
== CM_LARGE
)
4548 if (GET_CODE (op2
) != CONST_INT
)
4550 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
4551 switch (GET_CODE (op1
))
4554 /* For CM_SMALL assume that latest object is 16MB before
4555 end of 31bits boundary. We may also accept pretty
4556 large negative constants knowing that all objects are
4557 in the positive half of address space. */
4558 if (ix86_cmodel
== CM_SMALL
4559 && offset
< 16*1024*1024
4560 && trunc_int_for_mode (offset
, SImode
) == offset
)
4562 /* For CM_KERNEL we know that all object resist in the
4563 negative half of 32bits address space. We may not
4564 accept negative offsets, since they may be just off
4565 and we may accept pretty large positive ones. */
4566 if (ix86_cmodel
== CM_KERNEL
4568 && trunc_int_for_mode (offset
, SImode
) == offset
)
4572 /* These conditions are similar to SYMBOL_REF ones, just the
4573 constraints for code models differ. */
4574 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4575 && offset
< 16*1024*1024
4576 && trunc_int_for_mode (offset
, SImode
) == offset
)
4578 if (ix86_cmodel
== CM_KERNEL
4580 && trunc_int_for_mode (offset
, SImode
) == offset
)
4584 switch (XINT (op1
, 1))
4589 && trunc_int_for_mode (offset
, SImode
) == offset
)
4603 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4605 x86_64_zero_extended_value (rtx value
)
4607 switch (GET_CODE (value
))
4610 if (HOST_BITS_PER_WIDE_INT
== 32)
4611 return (GET_MODE (value
) == VOIDmode
4612 && !CONST_DOUBLE_HIGH (value
));
4616 if (HOST_BITS_PER_WIDE_INT
== 32)
4617 return INTVAL (value
) >= 0;
4619 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
4622 /* For certain code models, the symbolic references are known to fit. */
4624 /* TLS symbols are not constant. */
4625 if (tls_symbolic_operand (value
, Pmode
))
4627 return ix86_cmodel
== CM_SMALL
;
4629 /* For certain code models, the code is near as well. */
4631 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
4633 /* We also may accept the offsetted memory references in certain special
4636 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4638 rtx op1
= XEXP (XEXP (value
, 0), 0);
4639 rtx op2
= XEXP (XEXP (value
, 0), 1);
4641 if (ix86_cmodel
== CM_LARGE
)
4643 switch (GET_CODE (op1
))
4647 /* For small code model we may accept pretty large positive
4648 offsets, since one bit is available for free. Negative
4649 offsets are limited by the size of NULL pointer area
4650 specified by the ABI. */
4651 if (ix86_cmodel
== CM_SMALL
4652 && GET_CODE (op2
) == CONST_INT
4653 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4654 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4657 /* ??? For the kernel, we may accept adjustment of
4658 -0x10000000, since we know that it will just convert
4659 negative address space to positive, but perhaps this
4660 is not worthwhile. */
4663 /* These conditions are similar to SYMBOL_REF ones, just the
4664 constraints for code models differ. */
4665 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4666 && GET_CODE (op2
) == CONST_INT
4667 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4668 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4682 /* Value should be nonzero if functions must have frame pointers.
4683 Zero means the frame pointer need not be set up (and parms may
4684 be accessed via the stack pointer) in functions that seem suitable. */
4687 ix86_frame_pointer_required (void)
4689 /* If we accessed previous frames, then the generated code expects
4690 to be able to access the saved ebp value in our frame. */
4691 if (cfun
->machine
->accesses_prev_frame
)
4694 /* Several x86 os'es need a frame pointer for other reasons,
4695 usually pertaining to setjmp. */
4696 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4699 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4700 the frame pointer by default. Turn it back on now if we've not
4701 got a leaf function. */
4702 if (TARGET_OMIT_LEAF_FRAME_POINTER
4703 && (!current_function_is_leaf
))
4706 if (current_function_profile
)
4712 /* Record that the current function accesses previous call frames. */
4715 ix86_setup_frame_addresses (void)
4717 cfun
->machine
->accesses_prev_frame
= 1;
4720 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4721 # define USE_HIDDEN_LINKONCE 1
4723 # define USE_HIDDEN_LINKONCE 0
4726 static int pic_labels_used
;
4728 /* Fills in the label name that should be used for a pc thunk for
4729 the given register. */
4732 get_pc_thunk_name (char name
[32], unsigned int regno
)
4734 if (USE_HIDDEN_LINKONCE
)
4735 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4737 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4741 /* This function generates code for -fpic that loads %ebx with
4742 the return address of the caller and then returns. */
4745 ix86_file_end (void)
4750 for (regno
= 0; regno
< 8; ++regno
)
4754 if (! ((pic_labels_used
>> regno
) & 1))
4757 get_pc_thunk_name (name
, regno
);
4759 if (USE_HIDDEN_LINKONCE
)
4763 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4765 TREE_PUBLIC (decl
) = 1;
4766 TREE_STATIC (decl
) = 1;
4767 DECL_ONE_ONLY (decl
) = 1;
4769 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4770 named_section (decl
, NULL
, 0);
4772 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4773 fputs ("\t.hidden\t", asm_out_file
);
4774 assemble_name (asm_out_file
, name
);
4775 fputc ('\n', asm_out_file
);
4776 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4781 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4784 xops
[0] = gen_rtx_REG (SImode
, regno
);
4785 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4786 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4787 output_asm_insn ("ret", xops
);
4790 if (NEED_INDICATE_EXEC_STACK
)
4791 file_end_indicate_exec_stack ();
4794 /* Emit code for the SET_GOT patterns. */
4797 output_set_got (rtx dest
)
4802 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4804 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4806 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4809 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4811 output_asm_insn ("call\t%a2", xops
);
4814 /* Output the "canonical" label name ("Lxx$pb") here too. This
4815 is what will be referred to by the Mach-O PIC subsystem. */
4816 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4818 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4819 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4822 output_asm_insn ("pop{l}\t%0", xops
);
4827 get_pc_thunk_name (name
, REGNO (dest
));
4828 pic_labels_used
|= 1 << REGNO (dest
);
4830 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4831 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4832 output_asm_insn ("call\t%X2", xops
);
4835 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4836 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4837 else if (!TARGET_MACHO
)
4838 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4843 /* Generate an "push" pattern for input ARG. */
4848 return gen_rtx_SET (VOIDmode
,
4850 gen_rtx_PRE_DEC (Pmode
,
4851 stack_pointer_rtx
)),
4855 /* Return >= 0 if there is an unused call-clobbered register available
4856 for the entire function. */
4859 ix86_select_alt_pic_regnum (void)
4861 if (current_function_is_leaf
&& !current_function_profile
)
4864 for (i
= 2; i
>= 0; --i
)
4865 if (!regs_ever_live
[i
])
4869 return INVALID_REGNUM
;
4872 /* Return 1 if we need to save REGNO. */
4874 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4876 if (pic_offset_table_rtx
4877 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4878 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4879 || current_function_profile
4880 || current_function_calls_eh_return
4881 || current_function_uses_const_pool
))
4883 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4888 if (current_function_calls_eh_return
&& maybe_eh_return
)
4893 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4894 if (test
== INVALID_REGNUM
)
4901 return (regs_ever_live
[regno
]
4902 && !call_used_regs
[regno
]
4903 && !fixed_regs
[regno
]
4904 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4907 /* Return number of registers to be saved on the stack. */
4910 ix86_nsaved_regs (void)
4915 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4916 if (ix86_save_reg (regno
, true))
4921 /* Return the offset between two registers, one to be eliminated, and the other
4922 its replacement, at the start of a routine. */
4925 ix86_initial_elimination_offset (int from
, int to
)
4927 struct ix86_frame frame
;
4928 ix86_compute_frame_layout (&frame
);
4930 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4931 return frame
.hard_frame_pointer_offset
;
4932 else if (from
== FRAME_POINTER_REGNUM
4933 && to
== HARD_FRAME_POINTER_REGNUM
)
4934 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4937 if (to
!= STACK_POINTER_REGNUM
)
4939 else if (from
== ARG_POINTER_REGNUM
)
4940 return frame
.stack_pointer_offset
;
4941 else if (from
!= FRAME_POINTER_REGNUM
)
4944 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4948 /* Fill structure ix86_frame about frame of currently computed function. */
4951 ix86_compute_frame_layout (struct ix86_frame
*frame
)
4953 HOST_WIDE_INT total_size
;
4954 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4955 HOST_WIDE_INT offset
;
4956 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4957 HOST_WIDE_INT size
= get_frame_size ();
4959 frame
->nregs
= ix86_nsaved_regs ();
4962 /* During reload iteration the amount of registers saved can change.
4963 Recompute the value as needed. Do not recompute when amount of registers
4964 didn't change as reload does mutiple calls to the function and does not
4965 expect the decision to change within single iteration. */
4967 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4969 int count
= frame
->nregs
;
4971 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4972 /* The fast prologue uses move instead of push to save registers. This
4973 is significantly longer, but also executes faster as modern hardware
4974 can execute the moves in parallel, but can't do that for push/pop.
4976 Be careful about choosing what prologue to emit: When function takes
4977 many instructions to execute we may use slow version as well as in
4978 case function is known to be outside hot spot (this is known with
4979 feedback only). Weight the size of function by number of registers
4980 to save as it is cheap to use one or two push instructions but very
4981 slow to use many of them. */
4983 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4984 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4985 || (flag_branch_probabilities
4986 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4987 cfun
->machine
->use_fast_prologue_epilogue
= false;
4989 cfun
->machine
->use_fast_prologue_epilogue
4990 = !expensive_function_p (count
);
4992 if (TARGET_PROLOGUE_USING_MOVE
4993 && cfun
->machine
->use_fast_prologue_epilogue
)
4994 frame
->save_regs_using_mov
= true;
4996 frame
->save_regs_using_mov
= false;
4999 /* Skip return address and saved base pointer. */
5000 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5002 frame
->hard_frame_pointer_offset
= offset
;
5004 /* Do some sanity checking of stack_alignment_needed and
5005 preferred_alignment, since i386 port is the only using those features
5006 that may break easily. */
5008 if (size
&& !stack_alignment_needed
)
5010 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5012 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5014 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5017 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5018 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5020 /* Register save area */
5021 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5024 if (ix86_save_varrargs_registers
)
5026 offset
+= X86_64_VARARGS_SIZE
;
5027 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5030 frame
->va_arg_size
= 0;
5032 /* Align start of frame for local function. */
5033 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5034 & -stack_alignment_needed
) - offset
;
5036 offset
+= frame
->padding1
;
5038 /* Frame pointer points here. */
5039 frame
->frame_pointer_offset
= offset
;
5043 /* Add outgoing arguments area. Can be skipped if we eliminated
5044 all the function calls as dead code.
5045 Skipping is however impossible when function calls alloca. Alloca
5046 expander assumes that last current_function_outgoing_args_size
5047 of stack frame are unused. */
5048 if (ACCUMULATE_OUTGOING_ARGS
5049 && (!current_function_is_leaf
|| current_function_calls_alloca
))
5051 offset
+= current_function_outgoing_args_size
;
5052 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5055 frame
->outgoing_arguments_size
= 0;
5057 /* Align stack boundary. Only needed if we're calling another function
5059 if (!current_function_is_leaf
|| current_function_calls_alloca
)
5060 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5061 & -preferred_alignment
) - offset
;
5063 frame
->padding2
= 0;
5065 offset
+= frame
->padding2
;
5067 /* We've reached end of stack frame. */
5068 frame
->stack_pointer_offset
= offset
;
5070 /* Size prologue needs to allocate. */
5071 frame
->to_allocate
=
5072 (size
+ frame
->padding1
+ frame
->padding2
5073 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5075 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5076 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5077 frame
->save_regs_using_mov
= false;
5079 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5080 && current_function_is_leaf
)
5082 frame
->red_zone_size
= frame
->to_allocate
;
5083 if (frame
->save_regs_using_mov
)
5084 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5085 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5086 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5089 frame
->red_zone_size
= 0;
5090 frame
->to_allocate
-= frame
->red_zone_size
;
5091 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5093 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5094 fprintf (stderr
, "size: %i\n", size
);
5095 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5096 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5097 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5098 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5099 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5100 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5101 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5102 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5103 frame
->hard_frame_pointer_offset
);
5104 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5108 /* Emit code to save registers in the prologue. */
5111 ix86_emit_save_regs (void)
5116 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5117 if (ix86_save_reg (regno
, true))
5119 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5120 RTX_FRAME_RELATED_P (insn
) = 1;
5124 /* Emit code to save registers using MOV insns. First register
5125 is restored from POINTER + OFFSET. */
5127 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5132 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5133 if (ix86_save_reg (regno
, true))
5135 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5137 gen_rtx_REG (Pmode
, regno
));
5138 RTX_FRAME_RELATED_P (insn
) = 1;
5139 offset
+= UNITS_PER_WORD
;
5143 /* Expand prologue or epilogue stack adjustment.
5144 The pattern exist to put a dependency on all ebp-based memory accesses.
5145 STYLE should be negative if instructions should be marked as frame related,
5146 zero if %r11 register is live and cannot be freely used and positive
5150 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5155 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5156 else if (x86_64_immediate_operand (offset
, DImode
))
5157 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5161 /* r11 is used by indirect sibcall return as well, set before the
5162 epilogue and used after the epilogue. ATM indirect sibcall
5163 shouldn't be used together with huge frame sizes in one
5164 function because of the frame_size check in sibcall.c. */
5167 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5168 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5170 RTX_FRAME_RELATED_P (insn
) = 1;
5171 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5175 RTX_FRAME_RELATED_P (insn
) = 1;
5178 /* Expand the prologue into a bunch of separate insns. */
5181 ix86_expand_prologue (void)
5185 struct ix86_frame frame
;
5186 HOST_WIDE_INT allocate
;
5188 ix86_compute_frame_layout (&frame
);
5190 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5191 slower on all targets. Also sdb doesn't like it. */
5193 if (frame_pointer_needed
)
5195 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5196 RTX_FRAME_RELATED_P (insn
) = 1;
5198 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5199 RTX_FRAME_RELATED_P (insn
) = 1;
5202 allocate
= frame
.to_allocate
;
5204 if (!frame
.save_regs_using_mov
)
5205 ix86_emit_save_regs ();
5207 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5209 /* When using red zone we may start register saving before allocating
5210 the stack frame saving one cycle of the prologue. */
5211 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5212 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5213 : stack_pointer_rtx
,
5214 -frame
.nregs
* UNITS_PER_WORD
);
5218 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5219 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5220 GEN_INT (-allocate
), -1);
5223 /* Only valid for Win32. */
5224 rtx eax
= gen_rtx_REG (SImode
, 0);
5225 bool eax_live
= ix86_eax_live_at_start_p ();
5232 emit_insn (gen_push (eax
));
5236 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
5237 RTX_FRAME_RELATED_P (insn
) = 1;
5239 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5240 RTX_FRAME_RELATED_P (insn
) = 1;
5244 rtx t
= plus_constant (stack_pointer_rtx
, allocate
);
5245 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5249 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5251 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5252 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5254 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5255 -frame
.nregs
* UNITS_PER_WORD
);
5258 pic_reg_used
= false;
5259 if (pic_offset_table_rtx
5260 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5261 || current_function_profile
))
5263 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5265 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5266 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5268 pic_reg_used
= true;
5273 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5275 /* Even with accurate pre-reload life analysis, we can wind up
5276 deleting all references to the pic register after reload.
5277 Consider if cross-jumping unifies two sides of a branch
5278 controlled by a comparison vs the only read from a global.
5279 In which case, allow the set_got to be deleted, though we're
5280 too late to do anything about the ebx save in the prologue. */
5281 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5284 /* Prevent function calls from be scheduled before the call to mcount.
5285 In the pic_reg_used case, make sure that the got load isn't deleted. */
5286 if (current_function_profile
)
5287 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5290 /* Emit code to restore saved registers using MOV insns. First register
5291 is restored from POINTER + OFFSET. */
5293 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5294 int maybe_eh_return
)
5297 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5299 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5300 if (ix86_save_reg (regno
, maybe_eh_return
))
5302 /* Ensure that adjust_address won't be forced to produce pointer
5303 out of range allowed by x86-64 instruction set. */
5304 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5308 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5309 emit_move_insn (r11
, GEN_INT (offset
));
5310 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5311 base_address
= gen_rtx_MEM (Pmode
, r11
);
5314 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5315 adjust_address (base_address
, Pmode
, offset
));
5316 offset
+= UNITS_PER_WORD
;
5320 /* Restore function stack, frame, and registers. */
5323 ix86_expand_epilogue (int style
)
5326 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5327 struct ix86_frame frame
;
5328 HOST_WIDE_INT offset
;
5330 ix86_compute_frame_layout (&frame
);
5332 /* Calculate start of saved registers relative to ebp. Special care
5333 must be taken for the normal return case of a function using
5334 eh_return: the eax and edx registers are marked as saved, but not
5335 restored along this path. */
5336 offset
= frame
.nregs
;
5337 if (current_function_calls_eh_return
&& style
!= 2)
5339 offset
*= -UNITS_PER_WORD
;
5341 /* If we're only restoring one register and sp is not valid then
5342 using a move instruction to restore the register since it's
5343 less work than reloading sp and popping the register.
5345 The default code result in stack adjustment using add/lea instruction,
5346 while this code results in LEAVE instruction (or discrete equivalent),
5347 so it is profitable in some other cases as well. Especially when there
5348 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5349 and there is exactly one register to pop. This heuristic may need some
5350 tuning in future. */
5351 if ((!sp_valid
&& frame
.nregs
<= 1)
5352 || (TARGET_EPILOGUE_USING_MOVE
5353 && cfun
->machine
->use_fast_prologue_epilogue
5354 && (frame
.nregs
> 1 || frame
.to_allocate
))
5355 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5356 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5357 && cfun
->machine
->use_fast_prologue_epilogue
5358 && frame
.nregs
== 1)
5359 || current_function_calls_eh_return
)
5361 /* Restore registers. We can use ebp or esp to address the memory
5362 locations. If both are available, default to ebp, since offsets
5363 are known to be small. Only exception is esp pointing directly to the
5364 end of block of saved registers, where we may simplify addressing
5367 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5368 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5369 frame
.to_allocate
, style
== 2);
5371 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5372 offset
, style
== 2);
5374 /* eh_return epilogues need %ecx added to the stack pointer. */
5377 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5379 if (frame_pointer_needed
)
5381 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5382 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5383 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5385 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5386 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5388 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5393 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5394 tmp
= plus_constant (tmp
, (frame
.to_allocate
5395 + frame
.nregs
* UNITS_PER_WORD
));
5396 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5399 else if (!frame_pointer_needed
)
5400 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5401 GEN_INT (frame
.to_allocate
5402 + frame
.nregs
* UNITS_PER_WORD
),
5404 /* If not an i386, mov & pop is faster than "leave". */
5405 else if (TARGET_USE_LEAVE
|| optimize_size
5406 || !cfun
->machine
->use_fast_prologue_epilogue
)
5407 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5410 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5411 hard_frame_pointer_rtx
,
5414 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5416 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5421 /* First step is to deallocate the stack frame so that we can
5422 pop the registers. */
5425 if (!frame_pointer_needed
)
5427 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5428 hard_frame_pointer_rtx
,
5429 GEN_INT (offset
), style
);
5431 else if (frame
.to_allocate
)
5432 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5433 GEN_INT (frame
.to_allocate
), style
);
5435 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5436 if (ix86_save_reg (regno
, false))
5439 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5441 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5443 if (frame_pointer_needed
)
5445 /* Leave results in shorter dependency chains on CPUs that are
5446 able to grok it fast. */
5447 if (TARGET_USE_LEAVE
)
5448 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5449 else if (TARGET_64BIT
)
5450 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5452 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5456 /* Sibcall epilogues don't want a return instruction. */
5460 if (current_function_pops_args
&& current_function_args_size
)
5462 rtx popc
= GEN_INT (current_function_pops_args
);
5464 /* i386 can only pop 64K bytes. If asked to pop more, pop
5465 return address, do explicit add, and jump indirectly to the
5468 if (current_function_pops_args
>= 65536)
5470 rtx ecx
= gen_rtx_REG (SImode
, 2);
5472 /* There is no "pascal" calling convention in 64bit ABI. */
5476 emit_insn (gen_popsi1 (ecx
));
5477 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5478 emit_jump_insn (gen_return_indirect_internal (ecx
));
5481 emit_jump_insn (gen_return_pop_internal (popc
));
5484 emit_jump_insn (gen_return_internal ());
5487 /* Reset from the function's potential modifications. */
5490 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5491 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5493 if (pic_offset_table_rtx
)
5494 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5497 /* Extract the parts of an RTL expression that is a valid memory address
5498 for an instruction. Return 0 if the structure of the address is
5499 grossly off. Return -1 if the address contains ASHIFT, so it is not
5500 strictly valid, but still used for computing length of lea instruction. */
5503 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5505 rtx base
= NULL_RTX
;
5506 rtx index
= NULL_RTX
;
5507 rtx disp
= NULL_RTX
;
5508 HOST_WIDE_INT scale
= 1;
5509 rtx scale_rtx
= NULL_RTX
;
5511 enum ix86_address_seg seg
= SEG_DEFAULT
;
5513 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5515 else if (GET_CODE (addr
) == PLUS
)
5525 addends
[n
++] = XEXP (op
, 1);
5528 while (GET_CODE (op
) == PLUS
);
5533 for (i
= n
; i
>= 0; --i
)
5536 switch (GET_CODE (op
))
5541 index
= XEXP (op
, 0);
5542 scale_rtx
= XEXP (op
, 1);
5546 if (XINT (op
, 1) == UNSPEC_TP
5547 && TARGET_TLS_DIRECT_SEG_REFS
5548 && seg
== SEG_DEFAULT
)
5549 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5578 else if (GET_CODE (addr
) == MULT
)
5580 index
= XEXP (addr
, 0); /* index*scale */
5581 scale_rtx
= XEXP (addr
, 1);
5583 else if (GET_CODE (addr
) == ASHIFT
)
5587 /* We're called for lea too, which implements ashift on occasion. */
5588 index
= XEXP (addr
, 0);
5589 tmp
= XEXP (addr
, 1);
5590 if (GET_CODE (tmp
) != CONST_INT
)
5592 scale
= INTVAL (tmp
);
5593 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5599 disp
= addr
; /* displacement */
5601 /* Extract the integral value of scale. */
5604 if (GET_CODE (scale_rtx
) != CONST_INT
)
5606 scale
= INTVAL (scale_rtx
);
5609 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5610 if (base
&& index
&& scale
== 1
5611 && (index
== arg_pointer_rtx
5612 || index
== frame_pointer_rtx
5613 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
5620 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5621 if ((base
== hard_frame_pointer_rtx
5622 || base
== frame_pointer_rtx
5623 || base
== arg_pointer_rtx
) && !disp
)
5626 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5627 Avoid this by transforming to [%esi+0]. */
5628 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5629 && base
&& !index
&& !disp
5631 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
5634 /* Special case: encode reg+reg instead of reg*2. */
5635 if (!base
&& index
&& scale
&& scale
== 2)
5636 base
= index
, scale
= 1;
5638 /* Special case: scaling cannot be encoded without base or displacement. */
5639 if (!base
&& !disp
&& index
&& scale
!= 1)
5651 /* Return cost of the memory address x.
5652 For i386, it is better to use a complex address than let gcc copy
5653 the address into a reg and make a new pseudo. But not if the address
5654 requires to two regs - that would mean more pseudos with longer
5657 ix86_address_cost (rtx x
)
5659 struct ix86_address parts
;
5662 if (!ix86_decompose_address (x
, &parts
))
5665 /* More complex memory references are better. */
5666 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5668 if (parts
.seg
!= SEG_DEFAULT
)
5671 /* Attempt to minimize number of registers in the address. */
5673 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5675 && (!REG_P (parts
.index
)
5676 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5680 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5682 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5683 && parts
.base
!= parts
.index
)
5686 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5687 since it's predecode logic can't detect the length of instructions
5688 and it degenerates to vector decoded. Increase cost of such
5689 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5690 to split such addresses or even refuse such addresses at all.
5692 Following addressing modes are affected:
5697 The first and last case may be avoidable by explicitly coding the zero in
5698 memory address, but I don't have AMD-K6 machine handy to check this
5702 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5703 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5704 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5710 /* If X is a machine specific address (i.e. a symbol or label being
5711 referenced as a displacement from the GOT implemented using an
5712 UNSPEC), then return the base term. Otherwise return X. */
5715 ix86_find_base_term (rtx x
)
5721 if (GET_CODE (x
) != CONST
)
5724 if (GET_CODE (term
) == PLUS
5725 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5726 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5727 term
= XEXP (term
, 0);
5728 if (GET_CODE (term
) != UNSPEC
5729 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5732 term
= XVECEXP (term
, 0, 0);
5734 if (GET_CODE (term
) != SYMBOL_REF
5735 && GET_CODE (term
) != LABEL_REF
)
5741 term
= ix86_delegitimize_address (x
);
5743 if (GET_CODE (term
) != SYMBOL_REF
5744 && GET_CODE (term
) != LABEL_REF
)
5750 /* Determine if a given RTX is a valid constant. We already know this
5751 satisfies CONSTANT_P. */
5754 legitimate_constant_p (rtx x
)
5758 switch (GET_CODE (x
))
5761 /* TLS symbols are not constant. */
5762 if (tls_symbolic_operand (x
, Pmode
))
5767 inner
= XEXP (x
, 0);
5769 /* Offsets of TLS symbols are never valid.
5770 Discourage CSE from creating them. */
5771 if (GET_CODE (inner
) == PLUS
5772 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5775 if (GET_CODE (inner
) == PLUS
)
5777 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
5779 inner
= XEXP (inner
, 0);
5782 /* Only some unspecs are valid as "constants". */
5783 if (GET_CODE (inner
) == UNSPEC
)
5784 switch (XINT (inner
, 1))
5788 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5790 return local_dynamic_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5800 /* Otherwise we handle everything else in the move patterns. */
5804 /* Determine if it's legal to put X into the constant pool. This
5805 is not possible for the address of thread-local symbols, which
5806 is checked above. */
5809 ix86_cannot_force_const_mem (rtx x
)
5811 return !legitimate_constant_p (x
);
5814 /* Determine if a given RTX is a valid constant address. */
5817 constant_address_p (rtx x
)
5819 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5822 /* Nonzero if the constant value X is a legitimate general operand
5823 when generating PIC code. It is given that flag_pic is on and
5824 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5827 legitimate_pic_operand_p (rtx x
)
5831 switch (GET_CODE (x
))
5834 inner
= XEXP (x
, 0);
5836 /* Only some unspecs are valid as "constants". */
5837 if (GET_CODE (inner
) == UNSPEC
)
5838 switch (XINT (inner
, 1))
5841 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5849 return legitimate_pic_address_disp_p (x
);
5856 /* Determine if a given CONST RTX is a valid memory displacement
5860 legitimate_pic_address_disp_p (rtx disp
)
5864 /* In 64bit mode we can allow direct addresses of symbols and labels
5865 when they are not dynamic symbols. */
5868 /* TLS references should always be enclosed in UNSPEC. */
5869 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5871 if (GET_CODE (disp
) == SYMBOL_REF
5872 && ix86_cmodel
== CM_SMALL_PIC
5873 && SYMBOL_REF_LOCAL_P (disp
))
5875 if (GET_CODE (disp
) == LABEL_REF
)
5877 if (GET_CODE (disp
) == CONST
5878 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
5880 rtx op0
= XEXP (XEXP (disp
, 0), 0);
5881 rtx op1
= XEXP (XEXP (disp
, 0), 1);
5883 /* TLS references should always be enclosed in UNSPEC. */
5884 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
5886 if (((GET_CODE (op0
) == SYMBOL_REF
5887 && ix86_cmodel
== CM_SMALL_PIC
5888 && SYMBOL_REF_LOCAL_P (op0
))
5889 || GET_CODE (op0
) == LABEL_REF
)
5890 && GET_CODE (op1
) == CONST_INT
5891 && INTVAL (op1
) < 16*1024*1024
5892 && INTVAL (op1
) >= -16*1024*1024)
5896 if (GET_CODE (disp
) != CONST
)
5898 disp
= XEXP (disp
, 0);
5902 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5903 of GOT tables. We should not need these anyway. */
5904 if (GET_CODE (disp
) != UNSPEC
5905 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5908 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5909 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5915 if (GET_CODE (disp
) == PLUS
)
5917 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5919 disp
= XEXP (disp
, 0);
5923 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5924 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5926 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5927 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5928 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5930 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5931 if (! strcmp (sym_name
, "<pic base>"))
5936 if (GET_CODE (disp
) != UNSPEC
)
5939 switch (XINT (disp
, 1))
5944 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5946 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
5947 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
5948 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5950 case UNSPEC_GOTTPOFF
:
5951 case UNSPEC_GOTNTPOFF
:
5952 case UNSPEC_INDNTPOFF
:
5955 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5957 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5959 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5965 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5966 memory address for an instruction. The MODE argument is the machine mode
5967 for the MEM expression that wants to use this address.
5969 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5970 convert common non-canonical forms to canonical form so that they will
5974 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5976 struct ix86_address parts
;
5977 rtx base
, index
, disp
;
5978 HOST_WIDE_INT scale
;
5979 const char *reason
= NULL
;
5980 rtx reason_rtx
= NULL_RTX
;
5982 if (TARGET_DEBUG_ADDR
)
5985 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5986 GET_MODE_NAME (mode
), strict
);
5990 if (ix86_decompose_address (addr
, &parts
) <= 0)
5992 reason
= "decomposition failed";
5997 index
= parts
.index
;
5999 scale
= parts
.scale
;
6001 /* Validate base register.
6003 Don't allow SUBREG's here, it can lead to spill failures when the base
6004 is one word out of a two word structure, which is represented internally
6011 if (GET_CODE (base
) != REG
)
6013 reason
= "base is not a register";
6017 if (GET_MODE (base
) != Pmode
)
6019 reason
= "base is not in Pmode";
6023 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
6024 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
6026 reason
= "base is not valid";
6031 /* Validate index register.
6033 Don't allow SUBREG's here, it can lead to spill failures when the index
6034 is one word out of a two word structure, which is represented internally
6041 if (GET_CODE (index
) != REG
)
6043 reason
= "index is not a register";
6047 if (GET_MODE (index
) != Pmode
)
6049 reason
= "index is not in Pmode";
6053 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
6054 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
6056 reason
= "index is not valid";
6061 /* Validate scale factor. */
6064 reason_rtx
= GEN_INT (scale
);
6067 reason
= "scale without index";
6071 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6073 reason
= "scale is not a valid multiplier";
6078 /* Validate displacement. */
6083 if (GET_CODE (disp
) == CONST
6084 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6085 switch (XINT (XEXP (disp
, 0), 1))
6089 case UNSPEC_GOTPCREL
:
6092 goto is_legitimate_pic
;
6094 case UNSPEC_GOTTPOFF
:
6095 case UNSPEC_GOTNTPOFF
:
6096 case UNSPEC_INDNTPOFF
:
6102 reason
= "invalid address unspec";
6106 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
6108 && !machopic_operand_p (disp
)
6113 if (TARGET_64BIT
&& (index
|| base
))
6115 /* foo@dtpoff(%rX) is ok. */
6116 if (GET_CODE (disp
) != CONST
6117 || GET_CODE (XEXP (disp
, 0)) != PLUS
6118 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6119 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6120 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6121 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6123 reason
= "non-constant pic memory reference";
6127 else if (! legitimate_pic_address_disp_p (disp
))
6129 reason
= "displacement is an invalid pic construct";
6133 /* This code used to verify that a symbolic pic displacement
6134 includes the pic_offset_table_rtx register.
6136 While this is good idea, unfortunately these constructs may
6137 be created by "adds using lea" optimization for incorrect
6146 This code is nonsensical, but results in addressing
6147 GOT table with pic_offset_table_rtx base. We can't
6148 just refuse it easily, since it gets matched by
6149 "addsi3" pattern, that later gets split to lea in the
6150 case output register differs from input. While this
6151 can be handled by separate addsi pattern for this case
6152 that never results in lea, this seems to be easier and
6153 correct fix for crash to disable this test. */
6155 else if (GET_CODE (disp
) != LABEL_REF
6156 && GET_CODE (disp
) != CONST_INT
6157 && (GET_CODE (disp
) != CONST
6158 || !legitimate_constant_p (disp
))
6159 && (GET_CODE (disp
) != SYMBOL_REF
6160 || !legitimate_constant_p (disp
)))
6162 reason
= "displacement is not constant";
6165 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
6167 reason
= "displacement is out of range";
6172 /* Everything looks valid. */
6173 if (TARGET_DEBUG_ADDR
)
6174 fprintf (stderr
, "Success.\n");
6178 if (TARGET_DEBUG_ADDR
)
6180 fprintf (stderr
, "Error: %s\n", reason
);
6181 debug_rtx (reason_rtx
);
6186 /* Return an unique alias set for the GOT. */
6188 static HOST_WIDE_INT
6189 ix86_GOT_alias_set (void)
6191 static HOST_WIDE_INT set
= -1;
6193 set
= new_alias_set ();
6197 /* Return a legitimate reference for ORIG (an address) using the
6198 register REG. If REG is 0, a new pseudo is generated.
6200 There are two types of references that must be handled:
6202 1. Global data references must load the address from the GOT, via
6203 the PIC reg. An insn is emitted to do this load, and the reg is
6206 2. Static data references, constant pool addresses, and code labels
6207 compute the address as an offset from the GOT, whose base is in
6208 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6209 differentiate them from global data objects. The returned
6210 address is the PIC reg + an unspec constant.
6212 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6213 reg also appears in the address. */
6216 legitimize_pic_address (rtx orig
, rtx reg
)
6224 reg
= gen_reg_rtx (Pmode
);
6225 /* Use the generic Mach-O PIC machinery. */
6226 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6229 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6231 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6233 /* This symbol may be referenced via a displacement from the PIC
6234 base address (@GOTOFF). */
6236 if (reload_in_progress
)
6237 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6238 if (GET_CODE (addr
) == CONST
)
6239 addr
= XEXP (addr
, 0);
6240 if (GET_CODE (addr
) == PLUS
)
6242 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6243 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6246 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6247 new = gen_rtx_CONST (Pmode
, new);
6248 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6252 emit_move_insn (reg
, new);
6256 else if (GET_CODE (addr
) == SYMBOL_REF
)
6260 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6261 new = gen_rtx_CONST (Pmode
, new);
6262 new = gen_rtx_MEM (Pmode
, new);
6263 RTX_UNCHANGING_P (new) = 1;
6264 set_mem_alias_set (new, ix86_GOT_alias_set ());
6267 reg
= gen_reg_rtx (Pmode
);
6268 /* Use directly gen_movsi, otherwise the address is loaded
6269 into register for CSE. We don't want to CSE this addresses,
6270 instead we CSE addresses from the GOT table, so skip this. */
6271 emit_insn (gen_movsi (reg
, new));
6276 /* This symbol must be referenced via a load from the
6277 Global Offset Table (@GOT). */
6279 if (reload_in_progress
)
6280 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6281 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6282 new = gen_rtx_CONST (Pmode
, new);
6283 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6284 new = gen_rtx_MEM (Pmode
, new);
6285 RTX_UNCHANGING_P (new) = 1;
6286 set_mem_alias_set (new, ix86_GOT_alias_set ());
6289 reg
= gen_reg_rtx (Pmode
);
6290 emit_move_insn (reg
, new);
6296 if (GET_CODE (addr
) == CONST
)
6298 addr
= XEXP (addr
, 0);
6300 /* We must match stuff we generate before. Assume the only
6301 unspecs that can get here are ours. Not that we could do
6302 anything with them anyway.... */
6303 if (GET_CODE (addr
) == UNSPEC
6304 || (GET_CODE (addr
) == PLUS
6305 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6307 if (GET_CODE (addr
) != PLUS
)
6310 if (GET_CODE (addr
) == PLUS
)
6312 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6314 /* Check first to see if this is a constant offset from a @GOTOFF
6315 symbol reference. */
6316 if (local_symbolic_operand (op0
, Pmode
)
6317 && GET_CODE (op1
) == CONST_INT
)
6321 if (reload_in_progress
)
6322 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6323 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6325 new = gen_rtx_PLUS (Pmode
, new, op1
);
6326 new = gen_rtx_CONST (Pmode
, new);
6327 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6331 emit_move_insn (reg
, new);
6337 if (INTVAL (op1
) < -16*1024*1024
6338 || INTVAL (op1
) >= 16*1024*1024)
6339 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
6344 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6345 new = legitimize_pic_address (XEXP (addr
, 1),
6346 base
== reg
? NULL_RTX
: reg
);
6348 if (GET_CODE (new) == CONST_INT
)
6349 new = plus_constant (base
, INTVAL (new));
6352 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6354 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6355 new = XEXP (new, 1);
6357 new = gen_rtx_PLUS (Pmode
, base
, new);
6365 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6368 get_thread_pointer (int to_reg
)
6372 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6376 reg
= gen_reg_rtx (Pmode
);
6377 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6378 insn
= emit_insn (insn
);
6383 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6384 false if we expect this to be used for a memory address and true if
6385 we expect to load the address into a register. */
6388 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6390 rtx dest
, base
, off
, pic
;
6395 case TLS_MODEL_GLOBAL_DYNAMIC
:
6396 dest
= gen_reg_rtx (Pmode
);
6399 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6402 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6403 insns
= get_insns ();
6406 emit_libcall_block (insns
, dest
, rax
, x
);
6409 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6412 case TLS_MODEL_LOCAL_DYNAMIC
:
6413 base
= gen_reg_rtx (Pmode
);
6416 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6419 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6420 insns
= get_insns ();
6423 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6424 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6425 emit_libcall_block (insns
, base
, rax
, note
);
6428 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6430 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6431 off
= gen_rtx_CONST (Pmode
, off
);
6433 return gen_rtx_PLUS (Pmode
, base
, off
);
6435 case TLS_MODEL_INITIAL_EXEC
:
6439 type
= UNSPEC_GOTNTPOFF
;
6443 if (reload_in_progress
)
6444 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6445 pic
= pic_offset_table_rtx
;
6446 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6448 else if (!TARGET_GNU_TLS
)
6450 pic
= gen_reg_rtx (Pmode
);
6451 emit_insn (gen_set_got (pic
));
6452 type
= UNSPEC_GOTTPOFF
;
6457 type
= UNSPEC_INDNTPOFF
;
6460 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6461 off
= gen_rtx_CONST (Pmode
, off
);
6463 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6464 off
= gen_rtx_MEM (Pmode
, off
);
6465 RTX_UNCHANGING_P (off
) = 1;
6466 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6468 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6470 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6471 off
= force_reg (Pmode
, off
);
6472 return gen_rtx_PLUS (Pmode
, base
, off
);
6476 base
= get_thread_pointer (true);
6477 dest
= gen_reg_rtx (Pmode
);
6478 emit_insn (gen_subsi3 (dest
, base
, off
));
6482 case TLS_MODEL_LOCAL_EXEC
:
6483 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6484 (TARGET_64BIT
|| TARGET_GNU_TLS
)
6485 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6486 off
= gen_rtx_CONST (Pmode
, off
);
6488 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6490 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6491 return gen_rtx_PLUS (Pmode
, base
, off
);
6495 base
= get_thread_pointer (true);
6496 dest
= gen_reg_rtx (Pmode
);
6497 emit_insn (gen_subsi3 (dest
, base
, off
));
6508 /* Try machine-dependent ways of modifying an illegitimate address
6509 to be legitimate. If we find one, return the new, valid address.
6510 This macro is used in only one place: `memory_address' in explow.c.
6512 OLDX is the address as it was before break_out_memory_refs was called.
6513 In some cases it is useful to look at this to decide what needs to be done.
6515 MODE and WIN are passed so that this macro can use
6516 GO_IF_LEGITIMATE_ADDRESS.
6518 It is always safe for this macro to do nothing. It exists to recognize
6519 opportunities to optimize the output.
6521 For the 80386, we handle X+REG by loading X into a register R and
6522 using R+REG. R will go in a general reg and indexing will be used.
6523 However, if REG is a broken-out memory address or multiplication,
6524 nothing needs to be done because REG can certainly go in a general reg.
6526 When -fpic is used, special handling is needed for symbolic references.
6527 See comments by legitimize_pic_address in i386.c for details. */
6530 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6535 if (TARGET_DEBUG_ADDR
)
6537 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6538 GET_MODE_NAME (mode
));
6542 log
= tls_symbolic_operand (x
, mode
);
6544 return legitimize_tls_address (x
, log
, false);
6546 if (flag_pic
&& SYMBOLIC_CONST (x
))
6547 return legitimize_pic_address (x
, 0);
6549 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6550 if (GET_CODE (x
) == ASHIFT
6551 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6552 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
6555 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6556 GEN_INT (1 << log
));
6559 if (GET_CODE (x
) == PLUS
)
6561 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6563 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6564 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6565 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
6568 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6569 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6570 GEN_INT (1 << log
));
6573 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6574 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6575 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
6578 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6579 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6580 GEN_INT (1 << log
));
6583 /* Put multiply first if it isn't already. */
6584 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6586 rtx tmp
= XEXP (x
, 0);
6587 XEXP (x
, 0) = XEXP (x
, 1);
6592 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6593 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6594 created by virtual register instantiation, register elimination, and
6595 similar optimizations. */
6596 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6599 x
= gen_rtx_PLUS (Pmode
,
6600 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6601 XEXP (XEXP (x
, 1), 0)),
6602 XEXP (XEXP (x
, 1), 1));
6606 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6607 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6608 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6609 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6610 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6611 && CONSTANT_P (XEXP (x
, 1)))
6614 rtx other
= NULL_RTX
;
6616 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6618 constant
= XEXP (x
, 1);
6619 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6621 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6623 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6624 other
= XEXP (x
, 1);
6632 x
= gen_rtx_PLUS (Pmode
,
6633 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6634 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6635 plus_constant (other
, INTVAL (constant
)));
6639 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6642 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6645 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6648 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6651 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6655 && GET_CODE (XEXP (x
, 1)) == REG
6656 && GET_CODE (XEXP (x
, 0)) == REG
)
6659 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6662 x
= legitimize_pic_address (x
, 0);
6665 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6668 if (GET_CODE (XEXP (x
, 0)) == REG
)
6670 rtx temp
= gen_reg_rtx (Pmode
);
6671 rtx val
= force_operand (XEXP (x
, 1), temp
);
6673 emit_move_insn (temp
, val
);
6679 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6681 rtx temp
= gen_reg_rtx (Pmode
);
6682 rtx val
= force_operand (XEXP (x
, 0), temp
);
6684 emit_move_insn (temp
, val
);
6694 /* Print an integer constant expression in assembler syntax. Addition
6695 and subtraction are the only arithmetic that may appear in these
6696 expressions. FILE is the stdio stream to write to, X is the rtx, and
6697 CODE is the operand print code from the output string. */
6700 output_pic_addr_const (FILE *file
, rtx x
, int code
)
6704 switch (GET_CODE (x
))
6714 assemble_name (file
, XSTR (x
, 0));
6715 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
6716 fputs ("@PLT", file
);
6723 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6724 assemble_name (asm_out_file
, buf
);
6728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6732 /* This used to output parentheses around the expression,
6733 but that does not work on the 386 (either ATT or BSD assembler). */
6734 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6738 if (GET_MODE (x
) == VOIDmode
)
6740 /* We can use %d if the number is <32 bits and positive. */
6741 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6742 fprintf (file
, "0x%lx%08lx",
6743 (unsigned long) CONST_DOUBLE_HIGH (x
),
6744 (unsigned long) CONST_DOUBLE_LOW (x
));
6746 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6749 /* We can't handle floating point constants;
6750 PRINT_OPERAND must handle them. */
6751 output_operand_lossage ("floating constant misused");
6755 /* Some assemblers need integer constants to appear first. */
6756 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6758 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6760 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6762 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6764 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6766 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6774 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6775 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6777 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6779 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6783 if (XVECLEN (x
, 0) != 1)
6785 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6786 switch (XINT (x
, 1))
6789 fputs ("@GOT", file
);
6792 fputs ("@GOTOFF", file
);
6794 case UNSPEC_GOTPCREL
:
6795 fputs ("@GOTPCREL(%rip)", file
);
6797 case UNSPEC_GOTTPOFF
:
6798 /* FIXME: This might be @TPOFF in Sun ld too. */
6799 fputs ("@GOTTPOFF", file
);
6802 fputs ("@TPOFF", file
);
6806 fputs ("@TPOFF", file
);
6808 fputs ("@NTPOFF", file
);
6811 fputs ("@DTPOFF", file
);
6813 case UNSPEC_GOTNTPOFF
:
6815 fputs ("@GOTTPOFF(%rip)", file
);
6817 fputs ("@GOTNTPOFF", file
);
6819 case UNSPEC_INDNTPOFF
:
6820 fputs ("@INDNTPOFF", file
);
6823 output_operand_lossage ("invalid UNSPEC as operand");
6829 output_operand_lossage ("invalid expression as operand");
6833 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6834 We need to handle our special PIC relocations. */
6837 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
6840 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6844 fprintf (file
, "%s", ASM_LONG
);
6847 output_pic_addr_const (file
, x
, '\0');
6849 output_addr_const (file
, x
);
6853 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6854 We need to emit DTP-relative relocations. */
6857 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
6859 fputs (ASM_LONG
, file
);
6860 output_addr_const (file
, x
);
6861 fputs ("@DTPOFF", file
);
6867 fputs (", 0", file
);
6874 /* In the name of slightly smaller debug output, and to cater to
6875 general assembler losage, recognize PIC+GOTOFF and turn it back
6876 into a direct symbol reference. */
6879 ix86_delegitimize_address (rtx orig_x
)
6883 if (GET_CODE (x
) == MEM
)
6888 if (GET_CODE (x
) != CONST
6889 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6890 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6891 || GET_CODE (orig_x
) != MEM
)
6893 return XVECEXP (XEXP (x
, 0), 0, 0);
6896 if (GET_CODE (x
) != PLUS
6897 || GET_CODE (XEXP (x
, 1)) != CONST
)
6900 if (GET_CODE (XEXP (x
, 0)) == REG
6901 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6902 /* %ebx + GOT/GOTOFF */
6904 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6906 /* %ebx + %reg * scale + GOT/GOTOFF */
6908 if (GET_CODE (XEXP (y
, 0)) == REG
6909 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6911 else if (GET_CODE (XEXP (y
, 1)) == REG
6912 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6916 if (GET_CODE (y
) != REG
6917 && GET_CODE (y
) != MULT
6918 && GET_CODE (y
) != ASHIFT
)
6924 x
= XEXP (XEXP (x
, 1), 0);
6925 if (GET_CODE (x
) == UNSPEC
6926 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6927 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6930 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6931 return XVECEXP (x
, 0, 0);
6934 if (GET_CODE (x
) == PLUS
6935 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6936 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6937 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6938 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6939 && GET_CODE (orig_x
) != MEM
)))
6941 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6943 return gen_rtx_PLUS (Pmode
, y
, x
);
6951 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6956 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6958 enum rtx_code second_code
, bypass_code
;
6959 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6960 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6962 code
= ix86_fp_compare_code_to_integer (code
);
6966 code
= reverse_condition (code
);
6977 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6982 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6983 Those same assemblers have the same but opposite losage on cmov. */
6986 suffix
= fp
? "nbe" : "a";
6989 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6991 else if (mode
== CCmode
|| mode
== CCGCmode
)
7002 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7004 else if (mode
== CCmode
|| mode
== CCGCmode
)
7013 suffix
= fp
? "nb" : "ae";
7016 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
7026 suffix
= fp
? "u" : "p";
7029 suffix
= fp
? "nu" : "np";
7034 fputs (suffix
, file
);
7037 /* Print the name of register X to FILE based on its machine mode and number.
7038 If CODE is 'w', pretend the mode is HImode.
7039 If CODE is 'b', pretend the mode is QImode.
7040 If CODE is 'k', pretend the mode is SImode.
7041 If CODE is 'q', pretend the mode is DImode.
7042 If CODE is 'h', pretend the reg is the `high' byte register.
7043 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7046 print_reg (rtx x
, int code
, FILE *file
)
7048 if (REGNO (x
) == ARG_POINTER_REGNUM
7049 || REGNO (x
) == FRAME_POINTER_REGNUM
7050 || REGNO (x
) == FLAGS_REG
7051 || REGNO (x
) == FPSR_REG
)
7054 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7057 if (code
== 'w' || MMX_REG_P (x
))
7059 else if (code
== 'b')
7061 else if (code
== 'k')
7063 else if (code
== 'q')
7065 else if (code
== 'y')
7067 else if (code
== 'h')
7070 code
= GET_MODE_SIZE (GET_MODE (x
));
7072 /* Irritatingly, AMD extended registers use different naming convention
7073 from the normal registers. */
7074 if (REX_INT_REG_P (x
))
7081 error ("extended registers have no high halves");
7084 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7087 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7090 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7093 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7096 error ("unsupported operand size for extended register");
7104 if (STACK_TOP_P (x
))
7106 fputs ("st(0)", file
);
7113 if (! ANY_FP_REG_P (x
))
7114 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7119 fputs (hi_reg_name
[REGNO (x
)], file
);
7122 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7124 fputs (qi_reg_name
[REGNO (x
)], file
);
7127 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7129 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7136 /* Locate some local-dynamic symbol still in use by this function
7137 so that we can print its name in some tls_local_dynamic_base
7141 get_some_local_dynamic_name (void)
7145 if (cfun
->machine
->some_ld_name
)
7146 return cfun
->machine
->some_ld_name
;
7148 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7150 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7151 return cfun
->machine
->some_ld_name
;
7157 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7161 if (GET_CODE (x
) == SYMBOL_REF
7162 && local_dynamic_symbolic_operand (x
, Pmode
))
7164 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7172 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7173 C -- print opcode suffix for set/cmov insn.
7174 c -- like C, but print reversed condition
7175 F,f -- likewise, but for floating-point.
7176 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7178 R -- print the prefix for register names.
7179 z -- print the opcode suffix for the size of the current operand.
7180 * -- print a star (in certain assembler syntax)
7181 A -- print an absolute memory reference.
7182 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7183 s -- print a shift double count, followed by the assemblers argument
7185 b -- print the QImode name of the register for the indicated operand.
7186 %b0 would print %al if operands[0] is reg 0.
7187 w -- likewise, print the HImode name of the register.
7188 k -- likewise, print the SImode name of the register.
7189 q -- likewise, print the DImode name of the register.
7190 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7191 y -- print "st(0)" instead of "st" as a register.
7192 D -- print condition for SSE cmp instruction.
7193 P -- if PIC, print an @PLT suffix.
7194 X -- don't print any sort of PIC '@' suffix for a symbol.
7195 & -- print some in-use local-dynamic symbol name.
7199 print_operand (FILE *file
, rtx x
, int code
)
7206 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7211 assemble_name (file
, get_some_local_dynamic_name ());
7215 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7217 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7219 /* Intel syntax. For absolute addresses, registers should not
7220 be surrounded by braces. */
7221 if (GET_CODE (x
) != REG
)
7224 PRINT_OPERAND (file
, x
, 0);
7232 PRINT_OPERAND (file
, x
, 0);
7237 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7242 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7247 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7252 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7257 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7262 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7267 /* 387 opcodes don't get size suffixes if the operands are
7269 if (STACK_REG_P (x
))
7272 /* Likewise if using Intel opcodes. */
7273 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7276 /* This is the size of op from size of operand. */
7277 switch (GET_MODE_SIZE (GET_MODE (x
)))
7280 #ifdef HAVE_GAS_FILDS_FISTS
7286 if (GET_MODE (x
) == SFmode
)
7301 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7303 #ifdef GAS_MNEMONICS
7329 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7331 PRINT_OPERAND (file
, x
, 0);
7337 /* Little bit of braindamage here. The SSE compare instructions
7338 does use completely different names for the comparisons that the
7339 fp conditional moves. */
7340 switch (GET_CODE (x
))
7355 fputs ("unord", file
);
7359 fputs ("neq", file
);
7363 fputs ("nlt", file
);
7367 fputs ("nle", file
);
7370 fputs ("ord", file
);
7378 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7379 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7381 switch (GET_MODE (x
))
7383 case HImode
: putc ('w', file
); break;
7385 case SFmode
: putc ('l', file
); break;
7387 case DFmode
: putc ('q', file
); break;
7395 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7398 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7399 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7402 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7405 /* Like above, but reverse condition */
7407 /* Check to see if argument to %c is really a constant
7408 and not a condition code which needs to be reversed. */
7409 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
7411 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7414 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7417 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7418 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7421 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7427 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7430 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7433 int pred_val
= INTVAL (XEXP (x
, 0));
7435 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7436 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7438 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7439 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7441 /* Emit hints only in the case default branch prediction
7442 heuristics would fail. */
7443 if (taken
!= cputaken
)
7445 /* We use 3e (DS) prefix for taken branches and
7446 2e (CS) prefix for not taken branches. */
7448 fputs ("ds ; ", file
);
7450 fputs ("cs ; ", file
);
7457 output_operand_lossage ("invalid operand code `%c'", code
);
7461 if (GET_CODE (x
) == REG
)
7462 print_reg (x
, code
, file
);
7464 else if (GET_CODE (x
) == MEM
)
7466 /* No `byte ptr' prefix for call instructions. */
7467 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7470 switch (GET_MODE_SIZE (GET_MODE (x
)))
7472 case 1: size
= "BYTE"; break;
7473 case 2: size
= "WORD"; break;
7474 case 4: size
= "DWORD"; break;
7475 case 8: size
= "QWORD"; break;
7476 case 12: size
= "XWORD"; break;
7477 case 16: size
= "XMMWORD"; break;
7482 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7485 else if (code
== 'w')
7487 else if (code
== 'k')
7491 fputs (" PTR ", file
);
7495 /* Avoid (%rip) for call operands. */
7496 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7497 && GET_CODE (x
) != CONST_INT
)
7498 output_addr_const (file
, x
);
7499 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7500 output_operand_lossage ("invalid constraints for operand");
7505 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7510 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7511 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7513 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7515 fprintf (file
, "0x%08lx", l
);
7518 /* These float cases don't actually occur as immediate operands. */
7519 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7523 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7524 fprintf (file
, "%s", dstr
);
7527 else if (GET_CODE (x
) == CONST_DOUBLE
7528 && GET_MODE (x
) == XFmode
)
7532 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7533 fprintf (file
, "%s", dstr
);
7540 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7542 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7545 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7546 || GET_CODE (x
) == LABEL_REF
)
7548 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7551 fputs ("OFFSET FLAT:", file
);
7554 if (GET_CODE (x
) == CONST_INT
)
7555 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7557 output_pic_addr_const (file
, x
, code
);
7559 output_addr_const (file
, x
);
7563 /* Print a memory operand whose address is ADDR. */
7566 print_operand_address (FILE *file
, rtx addr
)
7568 struct ix86_address parts
;
7569 rtx base
, index
, disp
;
7572 if (! ix86_decompose_address (addr
, &parts
))
7576 index
= parts
.index
;
7578 scale
= parts
.scale
;
7586 if (USER_LABEL_PREFIX
[0] == 0)
7588 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
7594 if (!base
&& !index
)
7596 /* Displacement only requires special attention. */
7598 if (GET_CODE (disp
) == CONST_INT
)
7600 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
7602 if (USER_LABEL_PREFIX
[0] == 0)
7604 fputs ("ds:", file
);
7606 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
7609 output_pic_addr_const (file
, disp
, 0);
7611 output_addr_const (file
, disp
);
7613 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7615 && ((GET_CODE (disp
) == SYMBOL_REF
7616 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
7617 || GET_CODE (disp
) == LABEL_REF
7618 || (GET_CODE (disp
) == CONST
7619 && GET_CODE (XEXP (disp
, 0)) == PLUS
7620 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
7621 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
7622 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
7623 fputs ("(%rip)", file
);
7627 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7632 output_pic_addr_const (file
, disp
, 0);
7633 else if (GET_CODE (disp
) == LABEL_REF
)
7634 output_asm_label (disp
);
7636 output_addr_const (file
, disp
);
7641 print_reg (base
, 0, file
);
7645 print_reg (index
, 0, file
);
7647 fprintf (file
, ",%d", scale
);
7653 rtx offset
= NULL_RTX
;
7657 /* Pull out the offset of a symbol; print any symbol itself. */
7658 if (GET_CODE (disp
) == CONST
7659 && GET_CODE (XEXP (disp
, 0)) == PLUS
7660 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7662 offset
= XEXP (XEXP (disp
, 0), 1);
7663 disp
= gen_rtx_CONST (VOIDmode
,
7664 XEXP (XEXP (disp
, 0), 0));
7668 output_pic_addr_const (file
, disp
, 0);
7669 else if (GET_CODE (disp
) == LABEL_REF
)
7670 output_asm_label (disp
);
7671 else if (GET_CODE (disp
) == CONST_INT
)
7674 output_addr_const (file
, disp
);
7680 print_reg (base
, 0, file
);
7683 if (INTVAL (offset
) >= 0)
7685 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7689 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7696 print_reg (index
, 0, file
);
7698 fprintf (file
, "*%d", scale
);
7706 output_addr_const_extra (FILE *file
, rtx x
)
7710 if (GET_CODE (x
) != UNSPEC
)
7713 op
= XVECEXP (x
, 0, 0);
7714 switch (XINT (x
, 1))
7716 case UNSPEC_GOTTPOFF
:
7717 output_addr_const (file
, op
);
7718 /* FIXME: This might be @TPOFF in Sun ld. */
7719 fputs ("@GOTTPOFF", file
);
7722 output_addr_const (file
, op
);
7723 fputs ("@TPOFF", file
);
7726 output_addr_const (file
, op
);
7728 fputs ("@TPOFF", file
);
7730 fputs ("@NTPOFF", file
);
7733 output_addr_const (file
, op
);
7734 fputs ("@DTPOFF", file
);
7736 case UNSPEC_GOTNTPOFF
:
7737 output_addr_const (file
, op
);
7739 fputs ("@GOTTPOFF(%rip)", file
);
7741 fputs ("@GOTNTPOFF", file
);
7743 case UNSPEC_INDNTPOFF
:
7744 output_addr_const (file
, op
);
7745 fputs ("@INDNTPOFF", file
);
7755 /* Split one or more DImode RTL references into pairs of SImode
7756 references. The RTL can be REG, offsettable MEM, integer constant, or
7757 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7758 split and "num" is its length. lo_half and hi_half are output arrays
7759 that parallel "operands". */
7762 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7766 rtx op
= operands
[num
];
7768 /* simplify_subreg refuse to split volatile memory addresses,
7769 but we still have to handle it. */
7770 if (GET_CODE (op
) == MEM
)
7772 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7773 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7777 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7778 GET_MODE (op
) == VOIDmode
7779 ? DImode
: GET_MODE (op
), 0);
7780 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7781 GET_MODE (op
) == VOIDmode
7782 ? DImode
: GET_MODE (op
), 4);
7786 /* Split one or more TImode RTL references into pairs of SImode
7787 references. The RTL can be REG, offsettable MEM, integer constant, or
7788 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7789 split and "num" is its length. lo_half and hi_half are output arrays
7790 that parallel "operands". */
7793 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7797 rtx op
= operands
[num
];
7799 /* simplify_subreg refuse to split volatile memory addresses, but we
7800 still have to handle it. */
7801 if (GET_CODE (op
) == MEM
)
7803 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7804 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7808 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7809 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7814 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7815 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7816 is the expression of the binary operation. The output may either be
7817 emitted here, or returned to the caller, like all output_* functions.
7819 There is no guarantee that the operands are the same mode, as they
7820 might be within FLOAT or FLOAT_EXTEND expressions. */
7822 #ifndef SYSV386_COMPAT
7823 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7824 wants to fix the assemblers because that causes incompatibility
7825 with gcc. No-one wants to fix gcc because that causes
7826 incompatibility with assemblers... You can use the option of
7827 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7828 #define SYSV386_COMPAT 1
7832 output_387_binary_op (rtx insn
, rtx
*operands
)
7834 static char buf
[30];
7837 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7839 #ifdef ENABLE_CHECKING
7840 /* Even if we do not want to check the inputs, this documents input
7841 constraints. Which helps in understanding the following code. */
7842 if (STACK_REG_P (operands
[0])
7843 && ((REG_P (operands
[1])
7844 && REGNO (operands
[0]) == REGNO (operands
[1])
7845 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7846 || (REG_P (operands
[2])
7847 && REGNO (operands
[0]) == REGNO (operands
[2])
7848 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7849 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7855 switch (GET_CODE (operands
[3]))
7858 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7859 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7867 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7868 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7876 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7877 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7885 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7886 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7900 if (GET_MODE (operands
[0]) == SFmode
)
7901 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7903 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7908 switch (GET_CODE (operands
[3]))
7912 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7914 rtx temp
= operands
[2];
7915 operands
[2] = operands
[1];
7919 /* know operands[0] == operands[1]. */
7921 if (GET_CODE (operands
[2]) == MEM
)
7927 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7929 if (STACK_TOP_P (operands
[0]))
7930 /* How is it that we are storing to a dead operand[2]?
7931 Well, presumably operands[1] is dead too. We can't
7932 store the result to st(0) as st(0) gets popped on this
7933 instruction. Instead store to operands[2] (which I
7934 think has to be st(1)). st(1) will be popped later.
7935 gcc <= 2.8.1 didn't have this check and generated
7936 assembly code that the Unixware assembler rejected. */
7937 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7939 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7943 if (STACK_TOP_P (operands
[0]))
7944 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7946 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7951 if (GET_CODE (operands
[1]) == MEM
)
7957 if (GET_CODE (operands
[2]) == MEM
)
7963 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7966 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7967 derived assemblers, confusingly reverse the direction of
7968 the operation for fsub{r} and fdiv{r} when the
7969 destination register is not st(0). The Intel assembler
7970 doesn't have this brain damage. Read !SYSV386_COMPAT to
7971 figure out what the hardware really does. */
7972 if (STACK_TOP_P (operands
[0]))
7973 p
= "{p\t%0, %2|rp\t%2, %0}";
7975 p
= "{rp\t%2, %0|p\t%0, %2}";
7977 if (STACK_TOP_P (operands
[0]))
7978 /* As above for fmul/fadd, we can't store to st(0). */
7979 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7981 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7986 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7989 if (STACK_TOP_P (operands
[0]))
7990 p
= "{rp\t%0, %1|p\t%1, %0}";
7992 p
= "{p\t%1, %0|rp\t%0, %1}";
7994 if (STACK_TOP_P (operands
[0]))
7995 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7997 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8002 if (STACK_TOP_P (operands
[0]))
8004 if (STACK_TOP_P (operands
[1]))
8005 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8007 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8010 else if (STACK_TOP_P (operands
[1]))
8013 p
= "{\t%1, %0|r\t%0, %1}";
8015 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8021 p
= "{r\t%2, %0|\t%0, %2}";
8023 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8036 /* Output code to initialize control word copies used by
8037 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8038 is set to control word rounding downwards. */
8040 emit_i387_cw_initialization (rtx normal
, rtx round_down
)
8042 rtx reg
= gen_reg_rtx (HImode
);
8044 emit_insn (gen_x86_fnstcw_1 (normal
));
8045 emit_move_insn (reg
, normal
);
8046 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
8048 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8050 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
8051 emit_move_insn (round_down
, reg
);
8054 /* Output code for INSN to convert a float to a signed int. OPERANDS
8055 are the insn operands. The output may be [HSD]Imode and the input
8056 operand may be [SDX]Fmode. */
8059 output_fix_trunc (rtx insn
, rtx
*operands
)
8061 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8062 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8064 /* Jump through a hoop or two for DImode, since the hardware has no
8065 non-popping instruction. We used to do this a different way, but
8066 that was somewhat fragile and broke with post-reload splitters. */
8067 if (dimode_p
&& !stack_top_dies
)
8068 output_asm_insn ("fld\t%y1", operands
);
8070 if (!STACK_TOP_P (operands
[1]))
8073 if (GET_CODE (operands
[0]) != MEM
)
8076 output_asm_insn ("fldcw\t%3", operands
);
8077 if (stack_top_dies
|| dimode_p
)
8078 output_asm_insn ("fistp%z0\t%0", operands
);
8080 output_asm_insn ("fist%z0\t%0", operands
);
8081 output_asm_insn ("fldcw\t%2", operands
);
8086 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8087 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8088 when fucom should be used. */
8091 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8094 rtx cmp_op0
= operands
[0];
8095 rtx cmp_op1
= operands
[1];
8096 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
8101 cmp_op1
= operands
[2];
8105 if (GET_MODE (operands
[0]) == SFmode
)
8107 return "ucomiss\t{%1, %0|%0, %1}";
8109 return "comiss\t{%1, %0|%0, %1}";
8112 return "ucomisd\t{%1, %0|%0, %1}";
8114 return "comisd\t{%1, %0|%0, %1}";
8117 if (! STACK_TOP_P (cmp_op0
))
8120 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8122 if (STACK_REG_P (cmp_op1
)
8124 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8125 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8127 /* If both the top of the 387 stack dies, and the other operand
8128 is also a stack register that dies, then this must be a
8129 `fcompp' float compare */
8133 /* There is no double popping fcomi variant. Fortunately,
8134 eflags is immune from the fstp's cc clobbering. */
8136 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8138 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8146 return "fucompp\n\tfnstsw\t%0";
8148 return "fcompp\n\tfnstsw\t%0";
8161 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8163 static const char * const alt
[24] =
8175 "fcomi\t{%y1, %0|%0, %y1}",
8176 "fcomip\t{%y1, %0|%0, %y1}",
8177 "fucomi\t{%y1, %0|%0, %y1}",
8178 "fucomip\t{%y1, %0|%0, %y1}",
8185 "fcom%z2\t%y2\n\tfnstsw\t%0",
8186 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8187 "fucom%z2\t%y2\n\tfnstsw\t%0",
8188 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8190 "ficom%z2\t%y2\n\tfnstsw\t%0",
8191 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8199 mask
= eflags_p
<< 3;
8200 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
8201 mask
|= unordered_p
<< 1;
8202 mask
|= stack_top_dies
;
8215 ix86_output_addr_vec_elt (FILE *file
, int value
)
8217 const char *directive
= ASM_LONG
;
8222 directive
= ASM_QUAD
;
8228 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8232 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8235 fprintf (file
, "%s%s%d-%s%d\n",
8236 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8237 else if (HAVE_AS_GOTOFF_IN_DATA
)
8238 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8240 else if (TARGET_MACHO
)
8242 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8243 machopic_output_function_base_name (file
);
8244 fprintf(file
, "\n");
8248 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8249 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8252 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8256 ix86_expand_clear (rtx dest
)
8260 /* We play register width games, which are only valid after reload. */
8261 if (!reload_completed
)
8264 /* Avoid HImode and its attendant prefix byte. */
8265 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8266 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8268 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8270 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8271 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8273 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8274 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8280 /* X is an unchanging MEM. If it is a constant pool reference, return
8281 the constant pool rtx, else NULL. */
8284 maybe_get_pool_constant (rtx x
)
8286 x
= ix86_delegitimize_address (XEXP (x
, 0));
8288 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8289 return get_pool_constant (x
);
8295 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8297 int strict
= (reload_in_progress
|| reload_completed
);
8299 enum tls_model model
;
8304 model
= tls_symbolic_operand (op1
, Pmode
);
8307 op1
= legitimize_tls_address (op1
, model
, true);
8308 op1
= force_operand (op1
, op0
);
8313 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8318 rtx temp
= ((reload_in_progress
8319 || ((op0
&& GET_CODE (op0
) == REG
)
8321 ? op0
: gen_reg_rtx (Pmode
));
8322 op1
= machopic_indirect_data_reference (op1
, temp
);
8323 op1
= machopic_legitimize_pic_address (op1
, mode
,
8324 temp
== op1
? 0 : temp
);
8326 else if (MACHOPIC_INDIRECT
)
8327 op1
= machopic_indirect_data_reference (op1
, 0);
8331 if (GET_CODE (op0
) == MEM
)
8332 op1
= force_reg (Pmode
, op1
);
8336 if (GET_CODE (temp
) != REG
)
8337 temp
= gen_reg_rtx (Pmode
);
8338 temp
= legitimize_pic_address (op1
, temp
);
8343 #endif /* TARGET_MACHO */
8347 if (GET_CODE (op0
) == MEM
8348 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8349 || !push_operand (op0
, mode
))
8350 && GET_CODE (op1
) == MEM
)
8351 op1
= force_reg (mode
, op1
);
8353 if (push_operand (op0
, mode
)
8354 && ! general_no_elim_operand (op1
, mode
))
8355 op1
= copy_to_mode_reg (mode
, op1
);
8357 /* Force large constants in 64bit compilation into register
8358 to get them CSEed. */
8359 if (TARGET_64BIT
&& mode
== DImode
8360 && immediate_operand (op1
, mode
)
8361 && !x86_64_zero_extended_value (op1
)
8362 && !register_operand (op0
, mode
)
8363 && optimize
&& !reload_completed
&& !reload_in_progress
)
8364 op1
= copy_to_mode_reg (mode
, op1
);
8366 if (FLOAT_MODE_P (mode
))
8368 /* If we are loading a floating point constant to a register,
8369 force the value to memory now, since we'll get better code
8370 out the back end. */
8374 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8376 op1
= validize_mem (force_const_mem (mode
, op1
));
8377 if (!register_operand (op0
, mode
))
8379 rtx temp
= gen_reg_rtx (mode
);
8380 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8381 emit_move_insn (op0
, temp
);
8388 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8392 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
8394 /* Force constants other than zero into memory. We do not know how
8395 the instructions used to build constants modify the upper 64 bits
8396 of the register, once we have that information we may be able
8397 to handle some of them more efficiently. */
8398 if ((reload_in_progress
| reload_completed
) == 0
8399 && register_operand (operands
[0], mode
)
8400 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
8401 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
8403 /* Make operand1 a register if it isn't already. */
8405 && !register_operand (operands
[0], mode
)
8406 && !register_operand (operands
[1], mode
))
8408 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
8409 emit_move_insn (operands
[0], temp
);
8413 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
8416 /* Attempt to expand a binary operator. Make the expansion closer to the
8417 actual machine, then just general_operand, which will allow 3 separate
8418 memory references (one output, two input) in a single insn. */
8421 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
8424 int matching_memory
;
8425 rtx src1
, src2
, dst
, op
, clob
;
8431 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8432 if (GET_RTX_CLASS (code
) == 'c'
8433 && (rtx_equal_p (dst
, src2
)
8434 || immediate_operand (src1
, mode
)))
8441 /* If the destination is memory, and we do not have matching source
8442 operands, do things in registers. */
8443 matching_memory
= 0;
8444 if (GET_CODE (dst
) == MEM
)
8446 if (rtx_equal_p (dst
, src1
))
8447 matching_memory
= 1;
8448 else if (GET_RTX_CLASS (code
) == 'c'
8449 && rtx_equal_p (dst
, src2
))
8450 matching_memory
= 2;
8452 dst
= gen_reg_rtx (mode
);
8455 /* Both source operands cannot be in memory. */
8456 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8458 if (matching_memory
!= 2)
8459 src2
= force_reg (mode
, src2
);
8461 src1
= force_reg (mode
, src1
);
8464 /* If the operation is not commutable, source 1 cannot be a constant
8465 or non-matching memory. */
8466 if ((CONSTANT_P (src1
)
8467 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8468 && GET_RTX_CLASS (code
) != 'c')
8469 src1
= force_reg (mode
, src1
);
8471 /* If optimizing, copy to regs to improve CSE */
8472 if (optimize
&& ! no_new_pseudos
)
8474 if (GET_CODE (dst
) == MEM
)
8475 dst
= gen_reg_rtx (mode
);
8476 if (GET_CODE (src1
) == MEM
)
8477 src1
= force_reg (mode
, src1
);
8478 if (GET_CODE (src2
) == MEM
)
8479 src2
= force_reg (mode
, src2
);
8482 /* Emit the instruction. */
8484 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8485 if (reload_in_progress
)
8487 /* Reload doesn't know about the flags register, and doesn't know that
8488 it doesn't want to clobber it. We can only do this with PLUS. */
8495 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8496 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8499 /* Fix up the destination if needed. */
8500 if (dst
!= operands
[0])
8501 emit_move_insn (operands
[0], dst
);
8504 /* Return TRUE or FALSE depending on whether the binary operator meets the
8505 appropriate constraints. */
8508 ix86_binary_operator_ok (enum rtx_code code
,
8509 enum machine_mode mode ATTRIBUTE_UNUSED
,
8512 /* Both source operands cannot be in memory. */
8513 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8515 /* If the operation is not commutable, source 1 cannot be a constant. */
8516 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
8518 /* If the destination is memory, we must have a matching source operand. */
8519 if (GET_CODE (operands
[0]) == MEM
8520 && ! (rtx_equal_p (operands
[0], operands
[1])
8521 || (GET_RTX_CLASS (code
) == 'c'
8522 && rtx_equal_p (operands
[0], operands
[2]))))
8524 /* If the operation is not commutable and the source 1 is memory, we must
8525 have a matching destination. */
8526 if (GET_CODE (operands
[1]) == MEM
8527 && GET_RTX_CLASS (code
) != 'c'
8528 && ! rtx_equal_p (operands
[0], operands
[1]))
8533 /* Attempt to expand a unary operator. Make the expansion closer to the
8534 actual machine, then just general_operand, which will allow 2 separate
8535 memory references (one output, one input) in a single insn. */
8538 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
8541 int matching_memory
;
8542 rtx src
, dst
, op
, clob
;
8547 /* If the destination is memory, and we do not have matching source
8548 operands, do things in registers. */
8549 matching_memory
= 0;
8550 if (GET_CODE (dst
) == MEM
)
8552 if (rtx_equal_p (dst
, src
))
8553 matching_memory
= 1;
8555 dst
= gen_reg_rtx (mode
);
8558 /* When source operand is memory, destination must match. */
8559 if (!matching_memory
&& GET_CODE (src
) == MEM
)
8560 src
= force_reg (mode
, src
);
8562 /* If optimizing, copy to regs to improve CSE */
8563 if (optimize
&& ! no_new_pseudos
)
8565 if (GET_CODE (dst
) == MEM
)
8566 dst
= gen_reg_rtx (mode
);
8567 if (GET_CODE (src
) == MEM
)
8568 src
= force_reg (mode
, src
);
8571 /* Emit the instruction. */
8573 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8574 if (reload_in_progress
|| code
== NOT
)
8576 /* Reload doesn't know about the flags register, and doesn't know that
8577 it doesn't want to clobber it. */
8584 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8585 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8588 /* Fix up the destination if needed. */
8589 if (dst
!= operands
[0])
8590 emit_move_insn (operands
[0], dst
);
8593 /* Return TRUE or FALSE depending on whether the unary operator meets the
8594 appropriate constraints. */
8597 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
8598 enum machine_mode mode ATTRIBUTE_UNUSED
,
8599 rtx operands
[2] ATTRIBUTE_UNUSED
)
8601 /* If one of operands is memory, source and destination must match. */
8602 if ((GET_CODE (operands
[0]) == MEM
8603 || GET_CODE (operands
[1]) == MEM
)
8604 && ! rtx_equal_p (operands
[0], operands
[1]))
8609 /* Return TRUE or FALSE depending on whether the first SET in INSN
8610 has source and destination with matching CC modes, and that the
8611 CC mode is at least as constrained as REQ_MODE. */
8614 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8617 enum machine_mode set_mode
;
8619 set
= PATTERN (insn
);
8620 if (GET_CODE (set
) == PARALLEL
)
8621 set
= XVECEXP (set
, 0, 0);
8622 if (GET_CODE (set
) != SET
)
8624 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8627 set_mode
= GET_MODE (SET_DEST (set
));
8631 if (req_mode
!= CCNOmode
8632 && (req_mode
!= CCmode
8633 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8637 if (req_mode
== CCGCmode
)
8641 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8645 if (req_mode
== CCZmode
)
8655 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8658 /* Generate insn patterns to do an integer compare of OPERANDS. */
8661 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8663 enum machine_mode cmpmode
;
8666 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8667 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8669 /* This is very simple, but making the interface the same as in the
8670 FP case makes the rest of the code easier. */
8671 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8672 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8674 /* Return the test that should be put into the flags user, i.e.
8675 the bcc, scc, or cmov instruction. */
8676 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8679 /* Figure out whether to use ordered or unordered fp comparisons.
8680 Return the appropriate mode to use. */
8683 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8685 /* ??? In order to make all comparisons reversible, we do all comparisons
8686 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8687 all forms trapping and nontrapping comparisons, we can make inequality
8688 comparisons trapping again, since it results in better code when using
8689 FCOM based compares. */
8690 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8694 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8696 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8697 return ix86_fp_compare_mode (code
);
8700 /* Only zero flag is needed. */
8702 case NE
: /* ZF!=0 */
8704 /* Codes needing carry flag. */
8705 case GEU
: /* CF=0 */
8706 case GTU
: /* CF=0 & ZF=0 */
8707 case LTU
: /* CF=1 */
8708 case LEU
: /* CF=1 | ZF=1 */
8710 /* Codes possibly doable only with sign flag when
8711 comparing against zero. */
8712 case GE
: /* SF=OF or SF=0 */
8713 case LT
: /* SF<>OF or SF=1 */
8714 if (op1
== const0_rtx
)
8717 /* For other cases Carry flag is not required. */
8719 /* Codes doable only with sign flag when comparing
8720 against zero, but we miss jump instruction for it
8721 so we need to use relational tests against overflow
8722 that thus needs to be zero. */
8723 case GT
: /* ZF=0 & SF=OF */
8724 case LE
: /* ZF=1 | SF<>OF */
8725 if (op1
== const0_rtx
)
8729 /* strcmp pattern do (use flags) and combine may ask us for proper
8738 /* Return the fixed registers used for condition codes. */
8741 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8748 /* If two condition code modes are compatible, return a condition code
8749 mode which is compatible with both. Otherwise, return
8752 static enum machine_mode
8753 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8758 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8761 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8762 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8790 /* These are only compatible with themselves, which we already
8796 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8799 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8801 enum rtx_code swapped_code
= swap_condition (code
);
8802 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8803 || (ix86_fp_comparison_cost (swapped_code
)
8804 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8807 /* Swap, force into registers, or otherwise massage the two operands
8808 to a fp comparison. The operands are updated in place; the new
8809 comparison code is returned. */
8811 static enum rtx_code
8812 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8814 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8815 rtx op0
= *pop0
, op1
= *pop1
;
8816 enum machine_mode op_mode
= GET_MODE (op0
);
8817 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8819 /* All of the unordered compare instructions only work on registers.
8820 The same is true of the XFmode compare instructions. The same is
8821 true of the fcomi compare instructions. */
8824 && (fpcmp_mode
== CCFPUmode
8825 || op_mode
== XFmode
8826 || ix86_use_fcomi_compare (code
)))
8828 op0
= force_reg (op_mode
, op0
);
8829 op1
= force_reg (op_mode
, op1
);
8833 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8834 things around if they appear profitable, otherwise force op0
8837 if (standard_80387_constant_p (op0
) == 0
8838 || (GET_CODE (op0
) == MEM
8839 && ! (standard_80387_constant_p (op1
) == 0
8840 || GET_CODE (op1
) == MEM
)))
8843 tmp
= op0
, op0
= op1
, op1
= tmp
;
8844 code
= swap_condition (code
);
8847 if (GET_CODE (op0
) != REG
)
8848 op0
= force_reg (op_mode
, op0
);
8850 if (CONSTANT_P (op1
))
8852 if (standard_80387_constant_p (op1
))
8853 op1
= force_reg (op_mode
, op1
);
8855 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8859 /* Try to rearrange the comparison to make it cheaper. */
8860 if (ix86_fp_comparison_cost (code
)
8861 > ix86_fp_comparison_cost (swap_condition (code
))
8862 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8865 tmp
= op0
, op0
= op1
, op1
= tmp
;
8866 code
= swap_condition (code
);
8867 if (GET_CODE (op0
) != REG
)
8868 op0
= force_reg (op_mode
, op0
);
8876 /* Convert comparison codes we use to represent FP comparison to integer
8877 code that will result in proper branch. Return UNKNOWN if no such code
8879 static enum rtx_code
8880 ix86_fp_compare_code_to_integer (enum rtx_code code
)
8909 /* Split comparison code CODE into comparisons we can do using branch
8910 instructions. BYPASS_CODE is comparison code for branch that will
8911 branch around FIRST_CODE and SECOND_CODE. If some of branches
8912 is not required, set value to NIL.
8913 We never require more than two branches. */
8915 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
8916 enum rtx_code
*first_code
,
8917 enum rtx_code
*second_code
)
8923 /* The fcomi comparison sets flags as follows:
8933 case GT
: /* GTU - CF=0 & ZF=0 */
8934 case GE
: /* GEU - CF=0 */
8935 case ORDERED
: /* PF=0 */
8936 case UNORDERED
: /* PF=1 */
8937 case UNEQ
: /* EQ - ZF=1 */
8938 case UNLT
: /* LTU - CF=1 */
8939 case UNLE
: /* LEU - CF=1 | ZF=1 */
8940 case LTGT
: /* EQ - ZF=0 */
8942 case LT
: /* LTU - CF=1 - fails on unordered */
8944 *bypass_code
= UNORDERED
;
8946 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8948 *bypass_code
= UNORDERED
;
8950 case EQ
: /* EQ - ZF=1 - fails on unordered */
8952 *bypass_code
= UNORDERED
;
8954 case NE
: /* NE - ZF=0 - fails on unordered */
8956 *second_code
= UNORDERED
;
8958 case UNGE
: /* GEU - CF=0 - fails on unordered */
8960 *second_code
= UNORDERED
;
8962 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8964 *second_code
= UNORDERED
;
8969 if (!TARGET_IEEE_FP
)
8976 /* Return cost of comparison done fcom + arithmetics operations on AX.
8977 All following functions do use number of instructions as a cost metrics.
8978 In future this should be tweaked to compute bytes for optimize_size and
8979 take into account performance of various instructions on various CPUs. */
8981 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8983 if (!TARGET_IEEE_FP
)
8985 /* The cost of code output by ix86_expand_fp_compare. */
9013 /* Return cost of comparison done using fcomi operation.
9014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9016 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
9018 enum rtx_code bypass_code
, first_code
, second_code
;
9019 /* Return arbitrarily high cost when instruction is not supported - this
9020 prevents gcc from using it. */
9023 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9024 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
9027 /* Return cost of comparison done using sahf operation.
9028 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9030 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
9032 enum rtx_code bypass_code
, first_code
, second_code
;
9033 /* Return arbitrarily high cost when instruction is not preferred - this
9034 avoids gcc from using it. */
9035 if (!TARGET_USE_SAHF
&& !optimize_size
)
9037 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9038 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
9041 /* Compute cost of the comparison done using any method.
9042 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9044 ix86_fp_comparison_cost (enum rtx_code code
)
9046 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
9049 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
9050 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
9052 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
9053 if (min
> sahf_cost
)
9055 if (min
> fcomi_cost
)
9060 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9063 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
9064 rtx
*second_test
, rtx
*bypass_test
)
9066 enum machine_mode fpcmp_mode
, intcmp_mode
;
9068 int cost
= ix86_fp_comparison_cost (code
);
9069 enum rtx_code bypass_code
, first_code
, second_code
;
9071 fpcmp_mode
= ix86_fp_compare_mode (code
);
9072 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
9075 *second_test
= NULL_RTX
;
9077 *bypass_test
= NULL_RTX
;
9079 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9081 /* Do fcomi/sahf based test when profitable. */
9082 if ((bypass_code
== NIL
|| bypass_test
)
9083 && (second_code
== NIL
|| second_test
)
9084 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
9088 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9089 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
9095 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9096 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9098 scratch
= gen_reg_rtx (HImode
);
9099 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9100 emit_insn (gen_x86_sahf_1 (scratch
));
9103 /* The FP codes work out to act like unsigned. */
9104 intcmp_mode
= fpcmp_mode
;
9106 if (bypass_code
!= NIL
)
9107 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
9108 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9110 if (second_code
!= NIL
)
9111 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
9112 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9117 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9118 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9119 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9121 scratch
= gen_reg_rtx (HImode
);
9122 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9124 /* In the unordered case, we have to check C2 for NaN's, which
9125 doesn't happen to work out to anything nice combination-wise.
9126 So do some bit twiddling on the value we've got in AH to come
9127 up with an appropriate set of condition codes. */
9129 intcmp_mode
= CCNOmode
;
9134 if (code
== GT
|| !TARGET_IEEE_FP
)
9136 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9141 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9142 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9143 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
9144 intcmp_mode
= CCmode
;
9150 if (code
== LT
&& TARGET_IEEE_FP
)
9152 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9153 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
9154 intcmp_mode
= CCmode
;
9159 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
9165 if (code
== GE
|| !TARGET_IEEE_FP
)
9167 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
9172 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9173 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9180 if (code
== LE
&& TARGET_IEEE_FP
)
9182 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9183 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9184 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9185 intcmp_mode
= CCmode
;
9190 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9196 if (code
== EQ
&& TARGET_IEEE_FP
)
9198 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9199 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9200 intcmp_mode
= CCmode
;
9205 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9212 if (code
== NE
&& TARGET_IEEE_FP
)
9214 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9215 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9221 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9227 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9231 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9240 /* Return the test that should be put into the flags user, i.e.
9241 the bcc, scc, or cmov instruction. */
9242 return gen_rtx_fmt_ee (code
, VOIDmode
,
9243 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9248 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
9251 op0
= ix86_compare_op0
;
9252 op1
= ix86_compare_op1
;
9255 *second_test
= NULL_RTX
;
9257 *bypass_test
= NULL_RTX
;
9259 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9260 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9261 second_test
, bypass_test
);
9263 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9268 /* Return true if the CODE will result in nontrivial jump sequence. */
9270 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
9272 enum rtx_code bypass_code
, first_code
, second_code
;
9275 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9276 return bypass_code
!= NIL
|| second_code
!= NIL
;
9280 ix86_expand_branch (enum rtx_code code
, rtx label
)
9284 switch (GET_MODE (ix86_compare_op0
))
9290 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9291 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9292 gen_rtx_LABEL_REF (VOIDmode
, label
),
9294 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9303 enum rtx_code bypass_code
, first_code
, second_code
;
9305 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9308 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9310 /* Check whether we will use the natural sequence with one jump. If
9311 so, we can expand jump early. Otherwise delay expansion by
9312 creating compound insn to not confuse optimizers. */
9313 if (bypass_code
== NIL
&& second_code
== NIL
9316 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9317 gen_rtx_LABEL_REF (VOIDmode
, label
),
9322 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9323 ix86_compare_op0
, ix86_compare_op1
);
9324 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9325 gen_rtx_LABEL_REF (VOIDmode
, label
),
9327 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9329 use_fcomi
= ix86_use_fcomi_compare (code
);
9330 vec
= rtvec_alloc (3 + !use_fcomi
);
9331 RTVEC_ELT (vec
, 0) = tmp
;
9333 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9335 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9338 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9340 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9348 /* Expand DImode branch into multiple compare+branch. */
9350 rtx lo
[2], hi
[2], label2
;
9351 enum rtx_code code1
, code2
, code3
;
9353 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9355 tmp
= ix86_compare_op0
;
9356 ix86_compare_op0
= ix86_compare_op1
;
9357 ix86_compare_op1
= tmp
;
9358 code
= swap_condition (code
);
9360 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9361 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9363 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9364 avoid two branches. This costs one extra insn, so disable when
9365 optimizing for size. */
9367 if ((code
== EQ
|| code
== NE
)
9369 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9374 if (hi
[1] != const0_rtx
)
9375 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9376 NULL_RTX
, 0, OPTAB_WIDEN
);
9379 if (lo
[1] != const0_rtx
)
9380 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9381 NULL_RTX
, 0, OPTAB_WIDEN
);
9383 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9384 NULL_RTX
, 0, OPTAB_WIDEN
);
9386 ix86_compare_op0
= tmp
;
9387 ix86_compare_op1
= const0_rtx
;
9388 ix86_expand_branch (code
, label
);
9392 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9393 op1 is a constant and the low word is zero, then we can just
9394 examine the high word. */
9396 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9399 case LT
: case LTU
: case GE
: case GEU
:
9400 ix86_compare_op0
= hi
[0];
9401 ix86_compare_op1
= hi
[1];
9402 ix86_expand_branch (code
, label
);
9408 /* Otherwise, we need two or three jumps. */
9410 label2
= gen_label_rtx ();
9413 code2
= swap_condition (code
);
9414 code3
= unsigned_condition (code
);
9418 case LT
: case GT
: case LTU
: case GTU
:
9421 case LE
: code1
= LT
; code2
= GT
; break;
9422 case GE
: code1
= GT
; code2
= LT
; break;
9423 case LEU
: code1
= LTU
; code2
= GTU
; break;
9424 case GEU
: code1
= GTU
; code2
= LTU
; break;
9426 case EQ
: code1
= NIL
; code2
= NE
; break;
9427 case NE
: code2
= NIL
; break;
9435 * if (hi(a) < hi(b)) goto true;
9436 * if (hi(a) > hi(b)) goto false;
9437 * if (lo(a) < lo(b)) goto true;
9441 ix86_compare_op0
= hi
[0];
9442 ix86_compare_op1
= hi
[1];
9445 ix86_expand_branch (code1
, label
);
9447 ix86_expand_branch (code2
, label2
);
9449 ix86_compare_op0
= lo
[0];
9450 ix86_compare_op1
= lo
[1];
9451 ix86_expand_branch (code3
, label
);
9454 emit_label (label2
);
9463 /* Split branch based on floating point condition. */
9465 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
9466 rtx target1
, rtx target2
, rtx tmp
)
9469 rtx label
= NULL_RTX
;
9471 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9474 if (target2
!= pc_rtx
)
9477 code
= reverse_condition_maybe_unordered (code
);
9482 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9483 tmp
, &second
, &bypass
);
9485 if (split_branch_probability
>= 0)
9487 /* Distribute the probabilities across the jumps.
9488 Assume the BYPASS and SECOND to be always test
9490 probability
= split_branch_probability
;
9492 /* Value of 1 is low enough to make no need for probability
9493 to be updated. Later we may run some experiments and see
9494 if unordered values are more frequent in practice. */
9496 bypass_probability
= 1;
9498 second_probability
= 1;
9500 if (bypass
!= NULL_RTX
)
9502 label
= gen_label_rtx ();
9503 i
= emit_jump_insn (gen_rtx_SET
9505 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9507 gen_rtx_LABEL_REF (VOIDmode
,
9510 if (bypass_probability
>= 0)
9512 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9513 GEN_INT (bypass_probability
),
9516 i
= emit_jump_insn (gen_rtx_SET
9518 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9519 condition
, target1
, target2
)));
9520 if (probability
>= 0)
9522 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9523 GEN_INT (probability
),
9525 if (second
!= NULL_RTX
)
9527 i
= emit_jump_insn (gen_rtx_SET
9529 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9531 if (second_probability
>= 0)
9533 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9534 GEN_INT (second_probability
),
9537 if (label
!= NULL_RTX
)
9542 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9544 rtx ret
, tmp
, tmpreg
, equiv
;
9545 rtx second_test
, bypass_test
;
9547 if (GET_MODE (ix86_compare_op0
) == DImode
9549 return 0; /* FAIL */
9551 if (GET_MODE (dest
) != QImode
)
9554 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9555 PUT_MODE (ret
, QImode
);
9560 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9561 if (bypass_test
|| second_test
)
9563 rtx test
= second_test
;
9565 rtx tmp2
= gen_reg_rtx (QImode
);
9572 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9574 PUT_MODE (test
, QImode
);
9575 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9578 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9580 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9583 /* Attach a REG_EQUAL note describing the comparison result. */
9584 equiv
= simplify_gen_relational (code
, QImode
,
9585 GET_MODE (ix86_compare_op0
),
9586 ix86_compare_op0
, ix86_compare_op1
);
9587 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9589 return 1; /* DONE */
9592 /* Expand comparison setting or clearing carry flag. Return true when
9593 successful and set pop for the operation. */
9595 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9597 enum machine_mode mode
=
9598 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9600 /* Do not handle DImode compares that go trought special path. Also we can't
9601 deal with FP compares yet. This is possible to add. */
9602 if ((mode
== DImode
&& !TARGET_64BIT
))
9604 if (FLOAT_MODE_P (mode
))
9606 rtx second_test
= NULL
, bypass_test
= NULL
;
9607 rtx compare_op
, compare_seq
;
9609 /* Shortcut: following common codes never translate into carry flag compares. */
9610 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9611 || code
== ORDERED
|| code
== UNORDERED
)
9614 /* These comparisons require zero flag; swap operands so they won't. */
9615 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9621 code
= swap_condition (code
);
9624 /* Try to expand the comparison and verify that we end up with carry flag
9625 based comparison. This is fails to be true only when we decide to expand
9626 comparison using arithmetic that is not too common scenario. */
9628 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9629 &second_test
, &bypass_test
);
9630 compare_seq
= get_insns ();
9633 if (second_test
|| bypass_test
)
9635 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9636 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9637 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9639 code
= GET_CODE (compare_op
);
9640 if (code
!= LTU
&& code
!= GEU
)
9642 emit_insn (compare_seq
);
9646 if (!INTEGRAL_MODE_P (mode
))
9654 /* Convert a==0 into (unsigned)a<1. */
9657 if (op1
!= const0_rtx
)
9660 code
= (code
== EQ
? LTU
: GEU
);
9663 /* Convert a>b into b<a or a>=b-1. */
9666 if (GET_CODE (op1
) == CONST_INT
)
9668 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9669 /* Bail out on overflow. We still can swap operands but that
9670 would force loading of the constant into register. */
9671 if (op1
== const0_rtx
9672 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9674 code
= (code
== GTU
? GEU
: LTU
);
9681 code
= (code
== GTU
? LTU
: GEU
);
9685 /* Convert a>=0 into (unsigned)a<0x80000000. */
9688 if (mode
== DImode
|| op1
!= const0_rtx
)
9690 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9691 code
= (code
== LT
? GEU
: LTU
);
9695 if (mode
== DImode
|| op1
!= constm1_rtx
)
9697 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9698 code
= (code
== LE
? GEU
: LTU
);
9704 /* Swapping operands may cause constant to appear as first operand. */
9705 if (!nonimmediate_operand (op0
, VOIDmode
))
9709 op0
= force_reg (mode
, op0
);
9711 ix86_compare_op0
= op0
;
9712 ix86_compare_op1
= op1
;
9713 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9714 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9720 ix86_expand_int_movcc (rtx operands
[])
9722 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9723 rtx compare_seq
, compare_op
;
9724 rtx second_test
, bypass_test
;
9725 enum machine_mode mode
= GET_MODE (operands
[0]);
9726 bool sign_bit_compare_p
= false;;
9729 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9730 compare_seq
= get_insns ();
9733 compare_code
= GET_CODE (compare_op
);
9735 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9736 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9737 sign_bit_compare_p
= true;
9739 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9740 HImode insns, we'd be swallowed in word prefix ops. */
9742 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9743 && (mode
!= DImode
|| TARGET_64BIT
)
9744 && GET_CODE (operands
[2]) == CONST_INT
9745 && GET_CODE (operands
[3]) == CONST_INT
)
9747 rtx out
= operands
[0];
9748 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9749 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9753 /* Sign bit compares are better done using shifts than we do by using
9755 if (sign_bit_compare_p
9756 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9757 ix86_compare_op1
, &compare_op
))
9759 /* Detect overlap between destination and compare sources. */
9762 if (!sign_bit_compare_p
)
9766 compare_code
= GET_CODE (compare_op
);
9768 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9769 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9772 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9775 /* To simplify rest of code, restrict to the GEU case. */
9776 if (compare_code
== LTU
)
9778 HOST_WIDE_INT tmp
= ct
;
9781 compare_code
= reverse_condition (compare_code
);
9782 code
= reverse_condition (code
);
9787 PUT_CODE (compare_op
,
9788 reverse_condition_maybe_unordered
9789 (GET_CODE (compare_op
)));
9791 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9795 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9796 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9797 tmp
= gen_reg_rtx (mode
);
9800 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9802 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9806 if (code
== GT
|| code
== GE
)
9807 code
= reverse_condition (code
);
9810 HOST_WIDE_INT tmp
= ct
;
9815 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9816 ix86_compare_op1
, VOIDmode
, 0, -1);
9829 tmp
= expand_simple_binop (mode
, PLUS
,
9831 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9842 tmp
= expand_simple_binop (mode
, IOR
,
9844 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9846 else if (diff
== -1 && ct
)
9856 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9858 tmp
= expand_simple_binop (mode
, PLUS
,
9859 copy_rtx (tmp
), GEN_INT (cf
),
9860 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9868 * andl cf - ct, dest
9878 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9881 tmp
= expand_simple_binop (mode
, AND
,
9883 gen_int_mode (cf
- ct
, mode
),
9884 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9886 tmp
= expand_simple_binop (mode
, PLUS
,
9887 copy_rtx (tmp
), GEN_INT (ct
),
9888 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9891 if (!rtx_equal_p (tmp
, out
))
9892 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9894 return 1; /* DONE */
9900 tmp
= ct
, ct
= cf
, cf
= tmp
;
9902 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9904 /* We may be reversing unordered compare to normal compare, that
9905 is not valid in general (we may convert non-trapping condition
9906 to trapping one), however on i386 we currently emit all
9907 comparisons unordered. */
9908 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9909 code
= reverse_condition_maybe_unordered (code
);
9913 compare_code
= reverse_condition (compare_code
);
9914 code
= reverse_condition (code
);
9919 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9920 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9922 if (ix86_compare_op1
== const0_rtx
9923 && (code
== LT
|| code
== GE
))
9924 compare_code
= code
;
9925 else if (ix86_compare_op1
== constm1_rtx
)
9929 else if (code
== GT
)
9934 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9935 if (compare_code
!= NIL
9936 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9937 && (cf
== -1 || ct
== -1))
9939 /* If lea code below could be used, only optimize
9940 if it results in a 2 insn sequence. */
9942 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9943 || diff
== 3 || diff
== 5 || diff
== 9)
9944 || (compare_code
== LT
&& ct
== -1)
9945 || (compare_code
== GE
&& cf
== -1))
9948 * notl op1 (if necessary)
9956 code
= reverse_condition (code
);
9959 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9960 ix86_compare_op1
, VOIDmode
, 0, -1);
9962 out
= expand_simple_binop (mode
, IOR
,
9964 out
, 1, OPTAB_DIRECT
);
9965 if (out
!= operands
[0])
9966 emit_move_insn (operands
[0], out
);
9968 return 1; /* DONE */
9973 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9974 || diff
== 3 || diff
== 5 || diff
== 9)
9975 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9976 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
9982 * lea cf(dest*(ct-cf)),dest
9986 * This also catches the degenerate setcc-only case.
9992 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9993 ix86_compare_op1
, VOIDmode
, 0, 1);
9996 /* On x86_64 the lea instruction operates on Pmode, so we need
9997 to get arithmetics done in proper mode to match. */
9999 tmp
= copy_rtx (out
);
10003 out1
= copy_rtx (out
);
10004 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
10008 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
10014 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
10017 if (!rtx_equal_p (tmp
, out
))
10020 out
= force_operand (tmp
, copy_rtx (out
));
10022 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
10024 if (!rtx_equal_p (out
, operands
[0]))
10025 emit_move_insn (operands
[0], copy_rtx (out
));
10027 return 1; /* DONE */
10031 * General case: Jumpful:
10032 * xorl dest,dest cmpl op1, op2
10033 * cmpl op1, op2 movl ct, dest
10034 * setcc dest jcc 1f
10035 * decl dest movl cf, dest
10036 * andl (cf-ct),dest 1:
10039 * Size 20. Size 14.
10041 * This is reasonably steep, but branch mispredict costs are
10042 * high on modern cpus, so consider failing only if optimizing
10046 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10047 && BRANCH_COST
>= 2)
10053 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10054 /* We may be reversing unordered compare to normal compare,
10055 that is not valid in general (we may convert non-trapping
10056 condition to trapping one), however on i386 we currently
10057 emit all comparisons unordered. */
10058 code
= reverse_condition_maybe_unordered (code
);
10061 code
= reverse_condition (code
);
10062 if (compare_code
!= NIL
)
10063 compare_code
= reverse_condition (compare_code
);
10067 if (compare_code
!= NIL
)
10069 /* notl op1 (if needed)
10074 For x < 0 (resp. x <= -1) there will be no notl,
10075 so if possible swap the constants to get rid of the
10077 True/false will be -1/0 while code below (store flag
10078 followed by decrement) is 0/-1, so the constants need
10079 to be exchanged once more. */
10081 if (compare_code
== GE
|| !cf
)
10083 code
= reverse_condition (code
);
10088 HOST_WIDE_INT tmp
= cf
;
10093 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10094 ix86_compare_op1
, VOIDmode
, 0, -1);
10098 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10099 ix86_compare_op1
, VOIDmode
, 0, 1);
10101 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
10102 copy_rtx (out
), 1, OPTAB_DIRECT
);
10105 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
10106 gen_int_mode (cf
- ct
, mode
),
10107 copy_rtx (out
), 1, OPTAB_DIRECT
);
10109 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
10110 copy_rtx (out
), 1, OPTAB_DIRECT
);
10111 if (!rtx_equal_p (out
, operands
[0]))
10112 emit_move_insn (operands
[0], copy_rtx (out
));
10114 return 1; /* DONE */
10118 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10120 /* Try a few things more with specific constants and a variable. */
10123 rtx var
, orig_out
, out
, tmp
;
10125 if (BRANCH_COST
<= 2)
10126 return 0; /* FAIL */
10128 /* If one of the two operands is an interesting constant, load a
10129 constant with the above and mask it in with a logical operation. */
10131 if (GET_CODE (operands
[2]) == CONST_INT
)
10134 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
10135 operands
[3] = constm1_rtx
, op
= and_optab
;
10136 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
10137 operands
[3] = const0_rtx
, op
= ior_optab
;
10139 return 0; /* FAIL */
10141 else if (GET_CODE (operands
[3]) == CONST_INT
)
10144 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
10145 operands
[2] = constm1_rtx
, op
= and_optab
;
10146 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
10147 operands
[2] = const0_rtx
, op
= ior_optab
;
10149 return 0; /* FAIL */
10152 return 0; /* FAIL */
10154 orig_out
= operands
[0];
10155 tmp
= gen_reg_rtx (mode
);
10158 /* Recurse to get the constant loaded. */
10159 if (ix86_expand_int_movcc (operands
) == 0)
10160 return 0; /* FAIL */
10162 /* Mask in the interesting variable. */
10163 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
10165 if (!rtx_equal_p (out
, orig_out
))
10166 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
10168 return 1; /* DONE */
10172 * For comparison with above,
10182 if (! nonimmediate_operand (operands
[2], mode
))
10183 operands
[2] = force_reg (mode
, operands
[2]);
10184 if (! nonimmediate_operand (operands
[3], mode
))
10185 operands
[3] = force_reg (mode
, operands
[3]);
10187 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10189 rtx tmp
= gen_reg_rtx (mode
);
10190 emit_move_insn (tmp
, operands
[3]);
10193 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10195 rtx tmp
= gen_reg_rtx (mode
);
10196 emit_move_insn (tmp
, operands
[2]);
10200 if (! register_operand (operands
[2], VOIDmode
)
10202 || ! register_operand (operands
[3], VOIDmode
)))
10203 operands
[2] = force_reg (mode
, operands
[2]);
10206 && ! register_operand (operands
[3], VOIDmode
))
10207 operands
[3] = force_reg (mode
, operands
[3]);
10209 emit_insn (compare_seq
);
10210 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10211 gen_rtx_IF_THEN_ELSE (mode
,
10212 compare_op
, operands
[2],
10215 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10216 gen_rtx_IF_THEN_ELSE (mode
,
10218 copy_rtx (operands
[3]),
10219 copy_rtx (operands
[0]))));
10221 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10222 gen_rtx_IF_THEN_ELSE (mode
,
10224 copy_rtx (operands
[2]),
10225 copy_rtx (operands
[0]))));
10227 return 1; /* DONE */
10231 ix86_expand_fp_movcc (rtx operands
[])
10233 enum rtx_code code
;
10235 rtx compare_op
, second_test
, bypass_test
;
10237 /* For SF/DFmode conditional moves based on comparisons
10238 in same mode, we may want to use SSE min/max instructions. */
10239 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
10240 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
10241 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
10242 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10243 && (!TARGET_IEEE_FP
10244 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
10245 /* We may be called from the post-reload splitter. */
10246 && (!REG_P (operands
[0])
10247 || SSE_REG_P (operands
[0])
10248 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
10250 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
10251 code
= GET_CODE (operands
[1]);
10253 /* See if we have (cross) match between comparison operands and
10254 conditional move operands. */
10255 if (rtx_equal_p (operands
[2], op1
))
10260 code
= reverse_condition_maybe_unordered (code
);
10262 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
10264 /* Check for min operation. */
10265 if (code
== LT
|| code
== UNLE
)
10273 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10274 if (memory_operand (op0
, VOIDmode
))
10275 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10276 if (GET_MODE (operands
[0]) == SFmode
)
10277 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
10279 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
10282 /* Check for max operation. */
10283 if (code
== GT
|| code
== UNGE
)
10291 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10292 if (memory_operand (op0
, VOIDmode
))
10293 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10294 if (GET_MODE (operands
[0]) == SFmode
)
10295 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
10297 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
10301 /* Manage condition to be sse_comparison_operator. In case we are
10302 in non-ieee mode, try to canonicalize the destination operand
10303 to be first in the comparison - this helps reload to avoid extra
10305 if (!sse_comparison_operator (operands
[1], VOIDmode
)
10306 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
10308 rtx tmp
= ix86_compare_op0
;
10309 ix86_compare_op0
= ix86_compare_op1
;
10310 ix86_compare_op1
= tmp
;
10311 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
10312 VOIDmode
, ix86_compare_op0
,
10315 /* Similarly try to manage result to be first operand of conditional
10316 move. We also don't support the NE comparison on SSE, so try to
10318 if ((rtx_equal_p (operands
[0], operands
[3])
10319 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
10320 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
10322 rtx tmp
= operands
[2];
10323 operands
[2] = operands
[3];
10325 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10326 (GET_CODE (operands
[1])),
10327 VOIDmode
, ix86_compare_op0
,
10330 if (GET_MODE (operands
[0]) == SFmode
)
10331 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
10332 operands
[2], operands
[3],
10333 ix86_compare_op0
, ix86_compare_op1
));
10335 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
10336 operands
[2], operands
[3],
10337 ix86_compare_op0
, ix86_compare_op1
));
10341 /* The floating point conditional move instructions don't directly
10342 support conditions resulting from a signed integer comparison. */
10344 code
= GET_CODE (operands
[1]);
10345 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10347 /* The floating point conditional move instructions don't directly
10348 support signed integer comparisons. */
10350 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10352 if (second_test
!= NULL
|| bypass_test
!= NULL
)
10354 tmp
= gen_reg_rtx (QImode
);
10355 ix86_expand_setcc (code
, tmp
);
10357 ix86_compare_op0
= tmp
;
10358 ix86_compare_op1
= const0_rtx
;
10359 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10361 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10363 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10364 emit_move_insn (tmp
, operands
[3]);
10367 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10369 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10370 emit_move_insn (tmp
, operands
[2]);
10374 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10375 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10380 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10386 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10387 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10395 /* Expand conditional increment or decrement using adb/sbb instructions.
10396 The default case using setcc followed by the conditional move can be
10397 done by generic code. */
10399 ix86_expand_int_addcc (rtx operands
[])
10401 enum rtx_code code
= GET_CODE (operands
[1]);
10403 rtx val
= const0_rtx
;
10404 bool fpcmp
= false;
10405 enum machine_mode mode
= GET_MODE (operands
[0]);
10407 if (operands
[3] != const1_rtx
10408 && operands
[3] != constm1_rtx
)
10410 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10411 ix86_compare_op1
, &compare_op
))
10413 code
= GET_CODE (compare_op
);
10415 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10416 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10419 code
= ix86_fp_compare_code_to_integer (code
);
10426 PUT_CODE (compare_op
,
10427 reverse_condition_maybe_unordered
10428 (GET_CODE (compare_op
)));
10430 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10432 PUT_MODE (compare_op
, mode
);
10434 /* Construct either adc or sbb insn. */
10435 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10437 switch (GET_MODE (operands
[0]))
10440 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10443 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10446 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10449 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10457 switch (GET_MODE (operands
[0]))
10460 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10463 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10466 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10469 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10475 return 1; /* DONE */
10479 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10480 works for floating pointer parameters and nonoffsetable memories.
10481 For pushes, it returns just stack offsets; the values will be saved
10482 in the right order. Maximally three parts are generated. */
10485 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
10490 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
10492 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10494 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
10496 if (size
< 2 || size
> 3)
10499 /* Optimize constant pool reference to immediates. This is used by fp
10500 moves, that force all constants to memory to allow combining. */
10501 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
10503 rtx tmp
= maybe_get_pool_constant (operand
);
10508 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10510 /* The only non-offsetable memories we handle are pushes. */
10511 if (! push_operand (operand
, VOIDmode
))
10514 operand
= copy_rtx (operand
);
10515 PUT_MODE (operand
, Pmode
);
10516 parts
[0] = parts
[1] = parts
[2] = operand
;
10518 else if (!TARGET_64BIT
)
10520 if (mode
== DImode
)
10521 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10524 if (REG_P (operand
))
10526 if (!reload_completed
)
10528 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10529 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10531 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10533 else if (offsettable_memref_p (operand
))
10535 operand
= adjust_address (operand
, SImode
, 0);
10536 parts
[0] = operand
;
10537 parts
[1] = adjust_address (operand
, SImode
, 4);
10539 parts
[2] = adjust_address (operand
, SImode
, 8);
10541 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10546 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10550 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10551 parts
[2] = gen_int_mode (l
[2], SImode
);
10554 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10559 parts
[1] = gen_int_mode (l
[1], SImode
);
10560 parts
[0] = gen_int_mode (l
[0], SImode
);
10568 if (mode
== TImode
)
10569 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10570 if (mode
== XFmode
|| mode
== TFmode
)
10572 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10573 if (REG_P (operand
))
10575 if (!reload_completed
)
10577 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10578 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10580 else if (offsettable_memref_p (operand
))
10582 operand
= adjust_address (operand
, DImode
, 0);
10583 parts
[0] = operand
;
10584 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10586 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10591 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10592 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10593 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10594 if (HOST_BITS_PER_WIDE_INT
>= 64)
10597 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10598 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10601 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10602 if (upper_mode
== SImode
)
10603 parts
[1] = gen_int_mode (l
[2], SImode
);
10604 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10607 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10608 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10611 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10621 /* Emit insns to perform a move or push of DI, DF, and XF values.
10622 Return false when normal moves are needed; true when all required
10623 insns have been emitted. Operands 2-4 contain the input values
10624 int the correct order; operands 5-7 contain the output values. */
10627 ix86_split_long_move (rtx operands
[])
10632 int collisions
= 0;
10633 enum machine_mode mode
= GET_MODE (operands
[0]);
10635 /* The DFmode expanders may ask us to move double.
10636 For 64bit target this is single move. By hiding the fact
10637 here we simplify i386.md splitters. */
10638 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10640 /* Optimize constant pool reference to immediates. This is used by
10641 fp moves, that force all constants to memory to allow combining. */
10643 if (GET_CODE (operands
[1]) == MEM
10644 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10645 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10646 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10647 if (push_operand (operands
[0], VOIDmode
))
10649 operands
[0] = copy_rtx (operands
[0]);
10650 PUT_MODE (operands
[0], Pmode
);
10653 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10654 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10655 emit_move_insn (operands
[0], operands
[1]);
10659 /* The only non-offsettable memory we handle is push. */
10660 if (push_operand (operands
[0], VOIDmode
))
10662 else if (GET_CODE (operands
[0]) == MEM
10663 && ! offsettable_memref_p (operands
[0]))
10666 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10667 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10669 /* When emitting push, take care for source operands on the stack. */
10670 if (push
&& GET_CODE (operands
[1]) == MEM
10671 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10674 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10675 XEXP (part
[1][2], 0));
10676 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10677 XEXP (part
[1][1], 0));
10680 /* We need to do copy in the right order in case an address register
10681 of the source overlaps the destination. */
10682 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10684 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10686 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10689 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10692 /* Collision in the middle part can be handled by reordering. */
10693 if (collisions
== 1 && nparts
== 3
10694 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10697 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10698 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10701 /* If there are more collisions, we can't handle it by reordering.
10702 Do an lea to the last part and use only one colliding move. */
10703 else if (collisions
> 1)
10709 base
= part
[0][nparts
- 1];
10711 /* Handle the case when the last part isn't valid for lea.
10712 Happens in 64-bit mode storing the 12-byte XFmode. */
10713 if (GET_MODE (base
) != Pmode
)
10714 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10716 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10717 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10718 part
[1][1] = replace_equiv_address (part
[1][1],
10719 plus_constant (base
, UNITS_PER_WORD
));
10721 part
[1][2] = replace_equiv_address (part
[1][2],
10722 plus_constant (base
, 8));
10732 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10733 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10734 emit_move_insn (part
[0][2], part
[1][2]);
10739 /* In 64bit mode we don't have 32bit push available. In case this is
10740 register, it is OK - we will just use larger counterpart. We also
10741 retype memory - these comes from attempt to avoid REX prefix on
10742 moving of second half of TFmode value. */
10743 if (GET_MODE (part
[1][1]) == SImode
)
10745 if (GET_CODE (part
[1][1]) == MEM
)
10746 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10747 else if (REG_P (part
[1][1]))
10748 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10751 if (GET_MODE (part
[1][0]) == SImode
)
10752 part
[1][0] = part
[1][1];
10755 emit_move_insn (part
[0][1], part
[1][1]);
10756 emit_move_insn (part
[0][0], part
[1][0]);
10760 /* Choose correct order to not overwrite the source before it is copied. */
10761 if ((REG_P (part
[0][0])
10762 && REG_P (part
[1][1])
10763 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10765 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10767 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10771 operands
[2] = part
[0][2];
10772 operands
[3] = part
[0][1];
10773 operands
[4] = part
[0][0];
10774 operands
[5] = part
[1][2];
10775 operands
[6] = part
[1][1];
10776 operands
[7] = part
[1][0];
10780 operands
[2] = part
[0][1];
10781 operands
[3] = part
[0][0];
10782 operands
[5] = part
[1][1];
10783 operands
[6] = part
[1][0];
10790 operands
[2] = part
[0][0];
10791 operands
[3] = part
[0][1];
10792 operands
[4] = part
[0][2];
10793 operands
[5] = part
[1][0];
10794 operands
[6] = part
[1][1];
10795 operands
[7] = part
[1][2];
10799 operands
[2] = part
[0][0];
10800 operands
[3] = part
[0][1];
10801 operands
[5] = part
[1][0];
10802 operands
[6] = part
[1][1];
10805 emit_move_insn (operands
[2], operands
[5]);
10806 emit_move_insn (operands
[3], operands
[6]);
10808 emit_move_insn (operands
[4], operands
[7]);
10814 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10816 rtx low
[2], high
[2];
10819 if (GET_CODE (operands
[2]) == CONST_INT
)
10821 split_di (operands
, 2, low
, high
);
10822 count
= INTVAL (operands
[2]) & 63;
10826 emit_move_insn (high
[0], low
[1]);
10827 emit_move_insn (low
[0], const0_rtx
);
10830 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
10834 if (!rtx_equal_p (operands
[0], operands
[1]))
10835 emit_move_insn (operands
[0], operands
[1]);
10836 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10837 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
10842 if (!rtx_equal_p (operands
[0], operands
[1]))
10843 emit_move_insn (operands
[0], operands
[1]);
10845 split_di (operands
, 1, low
, high
);
10847 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10848 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10850 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10852 if (! no_new_pseudos
)
10853 scratch
= force_reg (SImode
, const0_rtx
);
10855 emit_move_insn (scratch
, const0_rtx
);
10857 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
10861 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10866 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
10868 rtx low
[2], high
[2];
10871 if (GET_CODE (operands
[2]) == CONST_INT
)
10873 split_di (operands
, 2, low
, high
);
10874 count
= INTVAL (operands
[2]) & 63;
10878 emit_move_insn (low
[0], high
[1]);
10880 if (! reload_completed
)
10881 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10884 emit_move_insn (high
[0], low
[0]);
10885 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10889 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10893 if (!rtx_equal_p (operands
[0], operands
[1]))
10894 emit_move_insn (operands
[0], operands
[1]);
10895 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10896 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10901 if (!rtx_equal_p (operands
[0], operands
[1]))
10902 emit_move_insn (operands
[0], operands
[1]);
10904 split_di (operands
, 1, low
, high
);
10906 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10907 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10909 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10911 if (! no_new_pseudos
)
10912 scratch
= gen_reg_rtx (SImode
);
10913 emit_move_insn (scratch
, high
[0]);
10914 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10915 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10919 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10924 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
10926 rtx low
[2], high
[2];
10929 if (GET_CODE (operands
[2]) == CONST_INT
)
10931 split_di (operands
, 2, low
, high
);
10932 count
= INTVAL (operands
[2]) & 63;
10936 emit_move_insn (low
[0], high
[1]);
10937 emit_move_insn (high
[0], const0_rtx
);
10940 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10944 if (!rtx_equal_p (operands
[0], operands
[1]))
10945 emit_move_insn (operands
[0], operands
[1]);
10946 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10947 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10952 if (!rtx_equal_p (operands
[0], operands
[1]))
10953 emit_move_insn (operands
[0], operands
[1]);
10955 split_di (operands
, 1, low
, high
);
10957 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10958 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10960 /* Heh. By reversing the arguments, we can reuse this pattern. */
10961 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10963 if (! no_new_pseudos
)
10964 scratch
= force_reg (SImode
, const0_rtx
);
10966 emit_move_insn (scratch
, const0_rtx
);
10968 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10972 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10976 /* Helper function for the string operations below. Dest VARIABLE whether
10977 it is aligned to VALUE bytes. If true, jump to the label. */
10979 ix86_expand_aligntest (rtx variable
, int value
)
10981 rtx label
= gen_label_rtx ();
10982 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10983 if (GET_MODE (variable
) == DImode
)
10984 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10986 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10987 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10992 /* Adjust COUNTER by the VALUE. */
10994 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
10996 if (GET_MODE (countreg
) == DImode
)
10997 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10999 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
11002 /* Zero extend possibly SImode EXP to Pmode register. */
11004 ix86_zero_extend_to_Pmode (rtx exp
)
11007 if (GET_MODE (exp
) == VOIDmode
)
11008 return force_reg (Pmode
, exp
);
11009 if (GET_MODE (exp
) == Pmode
)
11010 return copy_to_mode_reg (Pmode
, exp
);
11011 r
= gen_reg_rtx (Pmode
);
11012 emit_insn (gen_zero_extendsidi2 (r
, exp
));
11016 /* Expand string move (memcpy) operation. Use i386 string operations when
11017 profitable. expand_clrstr contains similar code. */
11019 ix86_expand_movstr (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
11021 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
11022 enum machine_mode counter_mode
;
11023 HOST_WIDE_INT align
= 0;
11024 unsigned HOST_WIDE_INT count
= 0;
11026 if (GET_CODE (align_exp
) == CONST_INT
)
11027 align
= INTVAL (align_exp
);
11029 /* Can't use any of this if the user has appropriated esi or edi. */
11030 if (global_regs
[4] || global_regs
[5])
11033 /* This simple hack avoids all inlining code and simplifies code below. */
11034 if (!TARGET_ALIGN_STRINGOPS
)
11037 if (GET_CODE (count_exp
) == CONST_INT
)
11039 count
= INTVAL (count_exp
);
11040 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11044 /* Figure out proper mode for counter. For 32bits it is always SImode,
11045 for 64bits use SImode when possible, otherwise DImode.
11046 Set count to number of bytes copied when known at compile time. */
11047 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11048 || x86_64_zero_extended_value (count_exp
))
11049 counter_mode
= SImode
;
11051 counter_mode
= DImode
;
11053 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
11056 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11057 if (destreg
!= XEXP (dst
, 0))
11058 dst
= replace_equiv_address_nv (dst
, destreg
);
11059 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11060 if (srcreg
!= XEXP (src
, 0))
11061 src
= replace_equiv_address_nv (src
, srcreg
);
11063 /* When optimizing for size emit simple rep ; movsb instruction for
11064 counts not divisible by 4. */
11066 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11068 emit_insn (gen_cld ());
11069 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11070 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11071 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
11072 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
11076 /* For constant aligned (or small unaligned) copies use rep movsl
11077 followed by code copying the rest. For PentiumPro ensure 8 byte
11078 alignment to allow rep movsl acceleration. */
11080 else if (count
!= 0
11082 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11083 || optimize_size
|| count
< (unsigned int) 64))
11085 unsigned HOST_WIDE_INT offset
= 0;
11086 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11087 rtx srcmem
, dstmem
;
11089 emit_insn (gen_cld ());
11090 if (count
& ~(size
- 1))
11092 countreg
= copy_to_mode_reg (counter_mode
,
11093 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11094 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11095 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11097 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11098 GEN_INT (size
== 4 ? 2 : 3));
11099 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11100 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11102 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11103 countreg
, destexp
, srcexp
));
11104 offset
= count
& ~(size
- 1);
11106 if (size
== 8 && (count
& 0x04))
11108 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
11110 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11112 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11117 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
11119 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11121 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11126 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
11128 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11130 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11133 /* The generic code based on the glibc implementation:
11134 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11135 allowing accelerated copying there)
11136 - copy the data using rep movsl
11137 - copy the rest. */
11142 rtx srcmem
, dstmem
;
11143 int desired_alignment
= (TARGET_PENTIUMPRO
11144 && (count
== 0 || count
>= (unsigned int) 260)
11145 ? 8 : UNITS_PER_WORD
);
11146 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11147 dst
= change_address (dst
, BLKmode
, destreg
);
11148 src
= change_address (src
, BLKmode
, srcreg
);
11150 /* In case we don't know anything about the alignment, default to
11151 library version, since it is usually equally fast and result in
11154 Also emit call when we know that the count is large and call overhead
11155 will not be important. */
11156 if (!TARGET_INLINE_ALL_STRINGOPS
11157 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11160 if (TARGET_SINGLE_STRINGOP
)
11161 emit_insn (gen_cld ());
11163 countreg2
= gen_reg_rtx (Pmode
);
11164 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11166 /* We don't use loops to align destination and to copy parts smaller
11167 than 4 bytes, because gcc is able to optimize such code better (in
11168 the case the destination or the count really is aligned, gcc is often
11169 able to predict the branches) and also it is friendlier to the
11170 hardware branch prediction.
11172 Using loops is beneficial for generic case, because we can
11173 handle small counts using the loops. Many CPUs (such as Athlon)
11174 have large REP prefix setup costs.
11176 This is quite costly. Maybe we can revisit this decision later or
11177 add some customizability to this code. */
11179 if (count
== 0 && align
< desired_alignment
)
11181 label
= gen_label_rtx ();
11182 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11183 LEU
, 0, counter_mode
, 1, label
);
11187 rtx label
= ix86_expand_aligntest (destreg
, 1);
11188 srcmem
= change_address (src
, QImode
, srcreg
);
11189 dstmem
= change_address (dst
, QImode
, destreg
);
11190 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11191 ix86_adjust_counter (countreg
, 1);
11192 emit_label (label
);
11193 LABEL_NUSES (label
) = 1;
11197 rtx label
= ix86_expand_aligntest (destreg
, 2);
11198 srcmem
= change_address (src
, HImode
, srcreg
);
11199 dstmem
= change_address (dst
, HImode
, destreg
);
11200 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11201 ix86_adjust_counter (countreg
, 2);
11202 emit_label (label
);
11203 LABEL_NUSES (label
) = 1;
11205 if (align
<= 4 && desired_alignment
> 4)
11207 rtx label
= ix86_expand_aligntest (destreg
, 4);
11208 srcmem
= change_address (src
, SImode
, srcreg
);
11209 dstmem
= change_address (dst
, SImode
, destreg
);
11210 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11211 ix86_adjust_counter (countreg
, 4);
11212 emit_label (label
);
11213 LABEL_NUSES (label
) = 1;
11216 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11218 emit_label (label
);
11219 LABEL_NUSES (label
) = 1;
11222 if (!TARGET_SINGLE_STRINGOP
)
11223 emit_insn (gen_cld ());
11226 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11228 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11232 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11233 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11235 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11236 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11237 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11238 countreg2
, destexp
, srcexp
));
11242 emit_label (label
);
11243 LABEL_NUSES (label
) = 1;
11245 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11247 srcmem
= change_address (src
, SImode
, srcreg
);
11248 dstmem
= change_address (dst
, SImode
, destreg
);
11249 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11251 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
11253 rtx label
= ix86_expand_aligntest (countreg
, 4);
11254 srcmem
= change_address (src
, SImode
, srcreg
);
11255 dstmem
= change_address (dst
, SImode
, destreg
);
11256 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11257 emit_label (label
);
11258 LABEL_NUSES (label
) = 1;
11260 if (align
> 2 && count
!= 0 && (count
& 2))
11262 srcmem
= change_address (src
, HImode
, srcreg
);
11263 dstmem
= change_address (dst
, HImode
, destreg
);
11264 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11266 if (align
<= 2 || count
== 0)
11268 rtx label
= ix86_expand_aligntest (countreg
, 2);
11269 srcmem
= change_address (src
, HImode
, srcreg
);
11270 dstmem
= change_address (dst
, HImode
, destreg
);
11271 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11272 emit_label (label
);
11273 LABEL_NUSES (label
) = 1;
11275 if (align
> 1 && count
!= 0 && (count
& 1))
11277 srcmem
= change_address (src
, QImode
, srcreg
);
11278 dstmem
= change_address (dst
, QImode
, destreg
);
11279 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11281 if (align
<= 1 || count
== 0)
11283 rtx label
= ix86_expand_aligntest (countreg
, 1);
11284 srcmem
= change_address (src
, QImode
, srcreg
);
11285 dstmem
= change_address (dst
, QImode
, destreg
);
11286 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11287 emit_label (label
);
11288 LABEL_NUSES (label
) = 1;
11295 /* Expand string clear operation (bzero). Use i386 string operations when
11296 profitable. expand_movstr contains similar code. */
11298 ix86_expand_clrstr (rtx dst
, rtx count_exp
, rtx align_exp
)
11300 rtx destreg
, zeroreg
, countreg
, destexp
;
11301 enum machine_mode counter_mode
;
11302 HOST_WIDE_INT align
= 0;
11303 unsigned HOST_WIDE_INT count
= 0;
11305 if (GET_CODE (align_exp
) == CONST_INT
)
11306 align
= INTVAL (align_exp
);
11308 /* Can't use any of this if the user has appropriated esi. */
11309 if (global_regs
[4])
11312 /* This simple hack avoids all inlining code and simplifies code below. */
11313 if (!TARGET_ALIGN_STRINGOPS
)
11316 if (GET_CODE (count_exp
) == CONST_INT
)
11318 count
= INTVAL (count_exp
);
11319 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11322 /* Figure out proper mode for counter. For 32bits it is always SImode,
11323 for 64bits use SImode when possible, otherwise DImode.
11324 Set count to number of bytes copied when known at compile time. */
11325 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11326 || x86_64_zero_extended_value (count_exp
))
11327 counter_mode
= SImode
;
11329 counter_mode
= DImode
;
11331 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11332 if (destreg
!= XEXP (dst
, 0))
11333 dst
= replace_equiv_address_nv (dst
, destreg
);
11335 emit_insn (gen_cld ());
11337 /* When optimizing for size emit simple rep ; movsb instruction for
11338 counts not divisible by 4. */
11340 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11342 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11343 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11344 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11345 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11347 else if (count
!= 0
11349 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11350 || optimize_size
|| count
< (unsigned int) 64))
11352 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11353 unsigned HOST_WIDE_INT offset
= 0;
11355 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11356 if (count
& ~(size
- 1))
11358 countreg
= copy_to_mode_reg (counter_mode
,
11359 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11360 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11361 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11362 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
, GEN_INT (size
== 4 ? 2 : 3));
11363 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11364 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11365 offset
= count
& ~(size
- 1);
11367 if (size
== 8 && (count
& 0x04))
11369 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11371 emit_insn (gen_strset (destreg
, mem
,
11372 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11377 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11379 emit_insn (gen_strset (destreg
, mem
,
11380 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11385 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11387 emit_insn (gen_strset (destreg
, mem
,
11388 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11395 /* Compute desired alignment of the string operation. */
11396 int desired_alignment
= (TARGET_PENTIUMPRO
11397 && (count
== 0 || count
>= (unsigned int) 260)
11398 ? 8 : UNITS_PER_WORD
);
11400 /* In case we don't know anything about the alignment, default to
11401 library version, since it is usually equally fast and result in
11404 Also emit call when we know that the count is large and call overhead
11405 will not be important. */
11406 if (!TARGET_INLINE_ALL_STRINGOPS
11407 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11410 if (TARGET_SINGLE_STRINGOP
)
11411 emit_insn (gen_cld ());
11413 countreg2
= gen_reg_rtx (Pmode
);
11414 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11415 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11416 /* Get rid of MEM_OFFSET, it won't be accurate. */
11417 dst
= change_address (dst
, BLKmode
, destreg
);
11419 if (count
== 0 && align
< desired_alignment
)
11421 label
= gen_label_rtx ();
11422 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11423 LEU
, 0, counter_mode
, 1, label
);
11427 rtx label
= ix86_expand_aligntest (destreg
, 1);
11428 emit_insn (gen_strset (destreg
, dst
,
11429 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11430 ix86_adjust_counter (countreg
, 1);
11431 emit_label (label
);
11432 LABEL_NUSES (label
) = 1;
11436 rtx label
= ix86_expand_aligntest (destreg
, 2);
11437 emit_insn (gen_strset (destreg
, dst
,
11438 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11439 ix86_adjust_counter (countreg
, 2);
11440 emit_label (label
);
11441 LABEL_NUSES (label
) = 1;
11443 if (align
<= 4 && desired_alignment
> 4)
11445 rtx label
= ix86_expand_aligntest (destreg
, 4);
11446 emit_insn (gen_strset (destreg
, dst
,
11448 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11450 ix86_adjust_counter (countreg
, 4);
11451 emit_label (label
);
11452 LABEL_NUSES (label
) = 1;
11455 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11457 emit_label (label
);
11458 LABEL_NUSES (label
) = 1;
11462 if (!TARGET_SINGLE_STRINGOP
)
11463 emit_insn (gen_cld ());
11466 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11468 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11472 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11473 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11475 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11476 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11480 emit_label (label
);
11481 LABEL_NUSES (label
) = 1;
11484 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11485 emit_insn (gen_strset (destreg
, dst
,
11486 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11487 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11489 rtx label
= ix86_expand_aligntest (countreg
, 4);
11490 emit_insn (gen_strset (destreg
, dst
,
11491 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11492 emit_label (label
);
11493 LABEL_NUSES (label
) = 1;
11495 if (align
> 2 && count
!= 0 && (count
& 2))
11496 emit_insn (gen_strset (destreg
, dst
,
11497 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11498 if (align
<= 2 || count
== 0)
11500 rtx label
= ix86_expand_aligntest (countreg
, 2);
11501 emit_insn (gen_strset (destreg
, dst
,
11502 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11503 emit_label (label
);
11504 LABEL_NUSES (label
) = 1;
11506 if (align
> 1 && count
!= 0 && (count
& 1))
11507 emit_insn (gen_strset (destreg
, dst
,
11508 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11509 if (align
<= 1 || count
== 0)
11511 rtx label
= ix86_expand_aligntest (countreg
, 1);
11512 emit_insn (gen_strset (destreg
, dst
,
11513 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11514 emit_label (label
);
11515 LABEL_NUSES (label
) = 1;
11521 /* Expand strlen. */
11523 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11525 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11527 /* The generic case of strlen expander is long. Avoid it's
11528 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11530 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11531 && !TARGET_INLINE_ALL_STRINGOPS
11533 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11536 addr
= force_reg (Pmode
, XEXP (src
, 0));
11537 scratch1
= gen_reg_rtx (Pmode
);
11539 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11542 /* Well it seems that some optimizer does not combine a call like
11543 foo(strlen(bar), strlen(bar));
11544 when the move and the subtraction is done here. It does calculate
11545 the length just once when these instructions are done inside of
11546 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11547 often used and I use one fewer register for the lifetime of
11548 output_strlen_unroll() this is better. */
11550 emit_move_insn (out
, addr
);
11552 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11554 /* strlensi_unroll_1 returns the address of the zero at the end of
11555 the string, like memchr(), so compute the length by subtracting
11556 the start address. */
11558 emit_insn (gen_subdi3 (out
, out
, addr
));
11560 emit_insn (gen_subsi3 (out
, out
, addr
));
11565 scratch2
= gen_reg_rtx (Pmode
);
11566 scratch3
= gen_reg_rtx (Pmode
);
11567 scratch4
= force_reg (Pmode
, constm1_rtx
);
11569 emit_move_insn (scratch3
, addr
);
11570 eoschar
= force_reg (QImode
, eoschar
);
11572 emit_insn (gen_cld ());
11573 src
= replace_equiv_address_nv (src
, scratch3
);
11575 /* If .md starts supporting :P, this can be done in .md. */
11576 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11577 scratch4
), UNSPEC_SCAS
);
11578 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11581 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11582 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11586 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11587 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11593 /* Expand the appropriate insns for doing strlen if not just doing
11596 out = result, initialized with the start address
11597 align_rtx = alignment of the address.
11598 scratch = scratch register, initialized with the startaddress when
11599 not aligned, otherwise undefined
11601 This is just the body. It needs the initializations mentioned above and
11602 some address computing at the end. These things are done in i386.md. */
11605 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11609 rtx align_2_label
= NULL_RTX
;
11610 rtx align_3_label
= NULL_RTX
;
11611 rtx align_4_label
= gen_label_rtx ();
11612 rtx end_0_label
= gen_label_rtx ();
11614 rtx tmpreg
= gen_reg_rtx (SImode
);
11615 rtx scratch
= gen_reg_rtx (SImode
);
11619 if (GET_CODE (align_rtx
) == CONST_INT
)
11620 align
= INTVAL (align_rtx
);
11622 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11624 /* Is there a known alignment and is it less than 4? */
11627 rtx scratch1
= gen_reg_rtx (Pmode
);
11628 emit_move_insn (scratch1
, out
);
11629 /* Is there a known alignment and is it not 2? */
11632 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11633 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11635 /* Leave just the 3 lower bits. */
11636 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11637 NULL_RTX
, 0, OPTAB_WIDEN
);
11639 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11640 Pmode
, 1, align_4_label
);
11641 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
11642 Pmode
, 1, align_2_label
);
11643 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
11644 Pmode
, 1, align_3_label
);
11648 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11649 check if is aligned to 4 - byte. */
11651 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
11652 NULL_RTX
, 0, OPTAB_WIDEN
);
11654 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11655 Pmode
, 1, align_4_label
);
11658 mem
= change_address (src
, QImode
, out
);
11660 /* Now compare the bytes. */
11662 /* Compare the first n unaligned byte on a byte per byte basis. */
11663 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11664 QImode
, 1, end_0_label
);
11666 /* Increment the address. */
11668 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11670 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11672 /* Not needed with an alignment of 2 */
11675 emit_label (align_2_label
);
11677 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11681 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11683 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11685 emit_label (align_3_label
);
11688 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11692 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11694 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11697 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11698 align this loop. It gives only huge programs, but does not help to
11700 emit_label (align_4_label
);
11702 mem
= change_address (src
, SImode
, out
);
11703 emit_move_insn (scratch
, mem
);
11705 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11707 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11709 /* This formula yields a nonzero result iff one of the bytes is zero.
11710 This saves three branches inside loop and many cycles. */
11712 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11713 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11714 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11715 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11716 gen_int_mode (0x80808080, SImode
)));
11717 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11722 rtx reg
= gen_reg_rtx (SImode
);
11723 rtx reg2
= gen_reg_rtx (Pmode
);
11724 emit_move_insn (reg
, tmpreg
);
11725 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11727 /* If zero is not in the first two bytes, move two bytes forward. */
11728 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11729 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11730 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11731 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11732 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11735 /* Emit lea manually to avoid clobbering of flags. */
11736 emit_insn (gen_rtx_SET (SImode
, reg2
,
11737 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
11739 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11740 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11741 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11742 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11749 rtx end_2_label
= gen_label_rtx ();
11750 /* Is zero in the first two bytes? */
11752 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11753 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11754 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11755 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11756 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11758 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11759 JUMP_LABEL (tmp
) = end_2_label
;
11761 /* Not in the first two. Move two bytes forward. */
11762 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11764 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
11766 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
11768 emit_label (end_2_label
);
11772 /* Avoid branch in fixing the byte. */
11773 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11774 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11775 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11777 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11779 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11781 emit_label (end_0_label
);
11785 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11786 rtx callarg2 ATTRIBUTE_UNUSED
,
11787 rtx pop
, int sibcall
)
11789 rtx use
= NULL
, call
;
11791 if (pop
== const0_rtx
)
11793 if (TARGET_64BIT
&& pop
)
11797 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11798 fnaddr
= machopic_indirect_call_target (fnaddr
);
11800 /* Static functions and indirect calls don't need the pic register. */
11801 if (! TARGET_64BIT
&& flag_pic
11802 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11803 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11804 use_reg (&use
, pic_offset_table_rtx
);
11806 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11808 rtx al
= gen_rtx_REG (QImode
, 0);
11809 emit_move_insn (al
, callarg2
);
11810 use_reg (&use
, al
);
11812 #endif /* TARGET_MACHO */
11814 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11816 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11817 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11819 if (sibcall
&& TARGET_64BIT
11820 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11823 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11824 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
11825 emit_move_insn (fnaddr
, addr
);
11826 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11829 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11831 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11834 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11835 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11836 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11839 call
= emit_call_insn (call
);
11841 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11845 /* Clear stack slot assignments remembered from previous functions.
11846 This is called from INIT_EXPANDERS once before RTL is emitted for each
11849 static struct machine_function
*
11850 ix86_init_machine_status (void)
11852 struct machine_function
*f
;
11854 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
11855 f
->use_fast_prologue_epilogue_nregs
= -1;
11860 /* Return a MEM corresponding to a stack slot with mode MODE.
11861 Allocate a new slot if necessary.
11863 The RTL for a function can have several slots available: N is
11864 which slot to use. */
11867 assign_386_stack_local (enum machine_mode mode
, int n
)
11869 struct stack_local_entry
*s
;
11871 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11874 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11875 if (s
->mode
== mode
&& s
->n
== n
)
11878 s
= (struct stack_local_entry
*)
11879 ggc_alloc (sizeof (struct stack_local_entry
));
11882 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11884 s
->next
= ix86_stack_locals
;
11885 ix86_stack_locals
= s
;
11889 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11891 static GTY(()) rtx ix86_tls_symbol
;
11893 ix86_tls_get_addr (void)
11896 if (!ix86_tls_symbol
)
11898 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11899 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11900 ? "___tls_get_addr"
11901 : "__tls_get_addr");
11904 return ix86_tls_symbol
;
11907 /* Calculate the length of the memory address in the instruction
11908 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11911 memory_address_length (rtx addr
)
11913 struct ix86_address parts
;
11914 rtx base
, index
, disp
;
11917 if (GET_CODE (addr
) == PRE_DEC
11918 || GET_CODE (addr
) == POST_INC
11919 || GET_CODE (addr
) == PRE_MODIFY
11920 || GET_CODE (addr
) == POST_MODIFY
)
11923 if (! ix86_decompose_address (addr
, &parts
))
11927 index
= parts
.index
;
11932 - esp as the base always wants an index,
11933 - ebp as the base always wants a displacement. */
11935 /* Register Indirect. */
11936 if (base
&& !index
&& !disp
)
11938 /* esp (for its index) and ebp (for its displacement) need
11939 the two-byte modrm form. */
11940 if (addr
== stack_pointer_rtx
11941 || addr
== arg_pointer_rtx
11942 || addr
== frame_pointer_rtx
11943 || addr
== hard_frame_pointer_rtx
)
11947 /* Direct Addressing. */
11948 else if (disp
&& !base
&& !index
)
11953 /* Find the length of the displacement constant. */
11956 if (GET_CODE (disp
) == CONST_INT
11957 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11963 /* ebp always wants a displacement. */
11964 else if (base
== hard_frame_pointer_rtx
)
11967 /* An index requires the two-byte modrm form.... */
11969 /* ...like esp, which always wants an index. */
11970 || base
== stack_pointer_rtx
11971 || base
== arg_pointer_rtx
11972 || base
== frame_pointer_rtx
)
11979 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11980 is set, expect that insn have 8bit immediate alternative. */
11982 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
11986 extract_insn_cached (insn
);
11987 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11988 if (CONSTANT_P (recog_data
.operand
[i
]))
11993 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11994 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11998 switch (get_attr_mode (insn
))
12009 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12014 fatal_insn ("unknown insn mode", insn
);
12020 /* Compute default value for "length_address" attribute. */
12022 ix86_attr_length_address_default (rtx insn
)
12026 if (get_attr_type (insn
) == TYPE_LEA
)
12028 rtx set
= PATTERN (insn
);
12029 if (GET_CODE (set
) == SET
)
12031 else if (GET_CODE (set
) == PARALLEL
12032 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
12033 set
= XVECEXP (set
, 0, 0);
12036 #ifdef ENABLE_CHECKING
12042 return memory_address_length (SET_SRC (set
));
12045 extract_insn_cached (insn
);
12046 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12047 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12049 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
12055 /* Return the maximum number of instructions a cpu can issue. */
12058 ix86_issue_rate (void)
12062 case PROCESSOR_PENTIUM
:
12066 case PROCESSOR_PENTIUMPRO
:
12067 case PROCESSOR_PENTIUM4
:
12068 case PROCESSOR_ATHLON
:
12077 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12078 by DEP_INSN and nothing set by DEP_INSN. */
12081 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12085 /* Simplify the test for uninteresting insns. */
12086 if (insn_type
!= TYPE_SETCC
12087 && insn_type
!= TYPE_ICMOV
12088 && insn_type
!= TYPE_FCMOV
12089 && insn_type
!= TYPE_IBR
)
12092 if ((set
= single_set (dep_insn
)) != 0)
12094 set
= SET_DEST (set
);
12097 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
12098 && XVECLEN (PATTERN (dep_insn
), 0) == 2
12099 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
12100 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
12102 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12103 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12108 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
12111 /* This test is true if the dependent insn reads the flags but
12112 not any other potentially set register. */
12113 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
12116 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
12122 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12123 address with operands set by DEP_INSN. */
12126 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12130 if (insn_type
== TYPE_LEA
12133 addr
= PATTERN (insn
);
12134 if (GET_CODE (addr
) == SET
)
12136 else if (GET_CODE (addr
) == PARALLEL
12137 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
12138 addr
= XVECEXP (addr
, 0, 0);
12141 addr
= SET_SRC (addr
);
12146 extract_insn_cached (insn
);
12147 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12148 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12150 addr
= XEXP (recog_data
.operand
[i
], 0);
12157 return modified_in_p (addr
, dep_insn
);
12161 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
12163 enum attr_type insn_type
, dep_insn_type
;
12164 enum attr_memory memory
, dep_memory
;
12166 int dep_insn_code_number
;
12168 /* Anti and output dependencies have zero cost on all CPUs. */
12169 if (REG_NOTE_KIND (link
) != 0)
12172 dep_insn_code_number
= recog_memoized (dep_insn
);
12174 /* If we can't recognize the insns, we can't really do anything. */
12175 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
12178 insn_type
= get_attr_type (insn
);
12179 dep_insn_type
= get_attr_type (dep_insn
);
12183 case PROCESSOR_PENTIUM
:
12184 /* Address Generation Interlock adds a cycle of latency. */
12185 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12188 /* ??? Compares pair with jump/setcc. */
12189 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
12192 /* Floating point stores require value to be ready one cycle earlier. */
12193 if (insn_type
== TYPE_FMOV
12194 && get_attr_memory (insn
) == MEMORY_STORE
12195 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12199 case PROCESSOR_PENTIUMPRO
:
12200 memory
= get_attr_memory (insn
);
12201 dep_memory
= get_attr_memory (dep_insn
);
12203 /* Since we can't represent delayed latencies of load+operation,
12204 increase the cost here for non-imov insns. */
12205 if (dep_insn_type
!= TYPE_IMOV
12206 && dep_insn_type
!= TYPE_FMOV
12207 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
12210 /* INT->FP conversion is expensive. */
12211 if (get_attr_fp_int_src (dep_insn
))
12214 /* There is one cycle extra latency between an FP op and a store. */
12215 if (insn_type
== TYPE_FMOV
12216 && (set
= single_set (dep_insn
)) != NULL_RTX
12217 && (set2
= single_set (insn
)) != NULL_RTX
12218 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
12219 && GET_CODE (SET_DEST (set2
)) == MEM
)
12222 /* Show ability of reorder buffer to hide latency of load by executing
12223 in parallel with previous instruction in case
12224 previous instruction is not needed to compute the address. */
12225 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12226 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12228 /* Claim moves to take one cycle, as core can issue one load
12229 at time and the next load can start cycle later. */
12230 if (dep_insn_type
== TYPE_IMOV
12231 || dep_insn_type
== TYPE_FMOV
)
12239 memory
= get_attr_memory (insn
);
12240 dep_memory
= get_attr_memory (dep_insn
);
12241 /* The esp dependency is resolved before the instruction is really
12243 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
12244 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
12247 /* Since we can't represent delayed latencies of load+operation,
12248 increase the cost here for non-imov insns. */
12249 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
12250 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
12252 /* INT->FP conversion is expensive. */
12253 if (get_attr_fp_int_src (dep_insn
))
12256 /* Show ability of reorder buffer to hide latency of load by executing
12257 in parallel with previous instruction in case
12258 previous instruction is not needed to compute the address. */
12259 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12260 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12262 /* Claim moves to take one cycle, as core can issue one load
12263 at time and the next load can start cycle later. */
12264 if (dep_insn_type
== TYPE_IMOV
12265 || dep_insn_type
== TYPE_FMOV
)
12274 case PROCESSOR_ATHLON
:
12276 memory
= get_attr_memory (insn
);
12277 dep_memory
= get_attr_memory (dep_insn
);
12279 /* Show ability of reorder buffer to hide latency of load by executing
12280 in parallel with previous instruction in case
12281 previous instruction is not needed to compute the address. */
12282 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12283 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12285 enum attr_unit unit
= get_attr_unit (insn
);
12288 /* Because of the difference between the length of integer and
12289 floating unit pipeline preparation stages, the memory operands
12290 for floating point are cheaper.
12292 ??? For Athlon it the difference is most probably 2. */
12293 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
12296 loadcost
= TARGET_ATHLON
? 2 : 0;
12298 if (cost
>= loadcost
)
12313 struct ppro_sched_data
12316 int issued_this_cycle
;
12320 static enum attr_ppro_uops
12321 ix86_safe_ppro_uops (rtx insn
)
12323 if (recog_memoized (insn
) >= 0)
12324 return get_attr_ppro_uops (insn
);
12326 return PPRO_UOPS_MANY
;
12330 ix86_dump_ppro_packet (FILE *dump
)
12332 if (ix86_sched_data
.ppro
.decode
[0])
12334 fprintf (dump
, "PPRO packet: %d",
12335 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
12336 if (ix86_sched_data
.ppro
.decode
[1])
12337 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
12338 if (ix86_sched_data
.ppro
.decode
[2])
12339 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
12340 fputc ('\n', dump
);
12344 /* We're beginning a new block. Initialize data structures as necessary. */
12347 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
12348 int sched_verbose ATTRIBUTE_UNUSED
,
12349 int veclen ATTRIBUTE_UNUSED
)
12351 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
12354 /* Shift INSN to SLOT, and shift everything else down. */
12357 ix86_reorder_insn (rtx
*insnp
, rtx
*slot
)
12363 insnp
[0] = insnp
[1];
12364 while (++insnp
!= slot
);
12370 ix86_sched_reorder_ppro (rtx
*ready
, rtx
*e_ready
)
12373 enum attr_ppro_uops cur_uops
;
12374 int issued_this_cycle
;
12378 /* At this point .ppro.decode contains the state of the three
12379 decoders from last "cycle". That is, those insns that were
12380 actually independent. But here we're scheduling for the
12381 decoder, and we may find things that are decodable in the
12384 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
12385 issued_this_cycle
= 0;
12388 cur_uops
= ix86_safe_ppro_uops (*insnp
);
12390 /* If the decoders are empty, and we've a complex insn at the
12391 head of the priority queue, let it issue without complaint. */
12392 if (decode
[0] == NULL
)
12394 if (cur_uops
== PPRO_UOPS_MANY
)
12396 decode
[0] = *insnp
;
12400 /* Otherwise, search for a 2-4 uop unsn to issue. */
12401 while (cur_uops
!= PPRO_UOPS_FEW
)
12403 if (insnp
== ready
)
12405 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
12408 /* If so, move it to the head of the line. */
12409 if (cur_uops
== PPRO_UOPS_FEW
)
12410 ix86_reorder_insn (insnp
, e_ready
);
12412 /* Issue the head of the queue. */
12413 issued_this_cycle
= 1;
12414 decode
[0] = *e_ready
--;
12417 /* Look for simple insns to fill in the other two slots. */
12418 for (i
= 1; i
< 3; ++i
)
12419 if (decode
[i
] == NULL
)
12421 if (ready
> e_ready
)
12425 cur_uops
= ix86_safe_ppro_uops (*insnp
);
12426 while (cur_uops
!= PPRO_UOPS_ONE
)
12428 if (insnp
== ready
)
12430 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
12433 /* Found one. Move it to the head of the queue and issue it. */
12434 if (cur_uops
== PPRO_UOPS_ONE
)
12436 ix86_reorder_insn (insnp
, e_ready
);
12437 decode
[i
] = *e_ready
--;
12438 issued_this_cycle
++;
12442 /* ??? Didn't find one. Ideally, here we would do a lazy split
12443 of 2-uop insns, issue one and queue the other. */
12447 if (issued_this_cycle
== 0)
12448 issued_this_cycle
= 1;
12449 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
12452 /* We are about to being issuing insns for this clock cycle.
12453 Override the default sort algorithm to better slot instructions. */
12455 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
,
12456 int sched_verbose ATTRIBUTE_UNUSED
, rtx
*ready
,
12457 int *n_readyp
, int clock_var ATTRIBUTE_UNUSED
)
12459 int n_ready
= *n_readyp
;
12460 rtx
*e_ready
= ready
+ n_ready
- 1;
12462 /* Make sure to go ahead and initialize key items in
12463 ix86_sched_data if we are not going to bother trying to
12464 reorder the ready queue. */
12467 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
12476 case PROCESSOR_PENTIUMPRO
:
12477 ix86_sched_reorder_ppro (ready
, e_ready
);
12482 return ix86_issue_rate ();
12485 /* We are about to issue INSN. Return the number of insns left on the
12486 ready queue that can be issued this cycle. */
12489 ix86_variable_issue (FILE *dump
, int sched_verbose
, rtx insn
,
12490 int can_issue_more
)
12496 return can_issue_more
- 1;
12498 case PROCESSOR_PENTIUMPRO
:
12500 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
12502 if (uops
== PPRO_UOPS_MANY
)
12505 ix86_dump_ppro_packet (dump
);
12506 ix86_sched_data
.ppro
.decode
[0] = insn
;
12507 ix86_sched_data
.ppro
.decode
[1] = NULL
;
12508 ix86_sched_data
.ppro
.decode
[2] = NULL
;
12510 ix86_dump_ppro_packet (dump
);
12511 ix86_sched_data
.ppro
.decode
[0] = NULL
;
12513 else if (uops
== PPRO_UOPS_FEW
)
12516 ix86_dump_ppro_packet (dump
);
12517 ix86_sched_data
.ppro
.decode
[0] = insn
;
12518 ix86_sched_data
.ppro
.decode
[1] = NULL
;
12519 ix86_sched_data
.ppro
.decode
[2] = NULL
;
12523 for (i
= 0; i
< 3; ++i
)
12524 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
12526 ix86_sched_data
.ppro
.decode
[i
] = insn
;
12534 ix86_dump_ppro_packet (dump
);
12535 ix86_sched_data
.ppro
.decode
[0] = NULL
;
12536 ix86_sched_data
.ppro
.decode
[1] = NULL
;
12537 ix86_sched_data
.ppro
.decode
[2] = NULL
;
12541 return --ix86_sched_data
.ppro
.issued_this_cycle
;
12546 ia32_use_dfa_pipeline_interface (void)
12548 if (TARGET_PENTIUM
|| TARGET_ATHLON_K8
)
12553 /* How many alternative schedules to try. This should be as wide as the
12554 scheduling freedom in the DFA, but no wider. Making this value too
12555 large results extra work for the scheduler. */
12558 ia32_multipass_dfa_lookahead (void)
12560 if (ix86_tune
== PROCESSOR_PENTIUM
)
12567 /* Compute the alignment given to a constant that is being placed in memory.
12568 EXP is the constant and ALIGN is the alignment that the object would
12570 The value of this function is used instead of that alignment to align
12574 ix86_constant_alignment (tree exp
, int align
)
12576 if (TREE_CODE (exp
) == REAL_CST
)
12578 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12580 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12583 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
12584 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
12585 return BITS_PER_WORD
;
12590 /* Compute the alignment for a static variable.
12591 TYPE is the data type, and ALIGN is the alignment that
12592 the object would ordinarily have. The value of this function is used
12593 instead of that alignment to align the object. */
12596 ix86_data_alignment (tree type
, int align
)
12598 if (AGGREGATE_TYPE_P (type
)
12599 && TYPE_SIZE (type
)
12600 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12601 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12602 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12605 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12606 to 16byte boundary. */
12609 if (AGGREGATE_TYPE_P (type
)
12610 && TYPE_SIZE (type
)
12611 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12612 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12613 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12617 if (TREE_CODE (type
) == ARRAY_TYPE
)
12619 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12621 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12624 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12627 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12629 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12632 else if ((TREE_CODE (type
) == RECORD_TYPE
12633 || TREE_CODE (type
) == UNION_TYPE
12634 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12635 && TYPE_FIELDS (type
))
12637 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12639 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12642 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12643 || TREE_CODE (type
) == INTEGER_TYPE
)
12645 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12647 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12654 /* Compute the alignment for a local variable.
12655 TYPE is the data type, and ALIGN is the alignment that
12656 the object would ordinarily have. The value of this macro is used
12657 instead of that alignment to align the object. */
12660 ix86_local_alignment (tree type
, int align
)
12662 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12663 to 16byte boundary. */
12666 if (AGGREGATE_TYPE_P (type
)
12667 && TYPE_SIZE (type
)
12668 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12669 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12670 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12673 if (TREE_CODE (type
) == ARRAY_TYPE
)
12675 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12677 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12680 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12682 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12684 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12687 else if ((TREE_CODE (type
) == RECORD_TYPE
12688 || TREE_CODE (type
) == UNION_TYPE
12689 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12690 && TYPE_FIELDS (type
))
12692 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12694 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12697 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12698 || TREE_CODE (type
) == INTEGER_TYPE
)
12701 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12703 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12709 /* Emit RTL insns to initialize the variable parts of a trampoline.
12710 FNADDR is an RTX for the address of the function's pure code.
12711 CXT is an RTX for the static chain value for the function. */
12713 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12717 /* Compute offset from the end of the jmp to the target function. */
12718 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12719 plus_constant (tramp
, 10),
12720 NULL_RTX
, 1, OPTAB_DIRECT
);
12721 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12722 gen_int_mode (0xb9, QImode
));
12723 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12724 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12725 gen_int_mode (0xe9, QImode
));
12726 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12731 /* Try to load address using shorter movl instead of movabs.
12732 We may want to support movq for kernel mode, but kernel does not use
12733 trampolines at the moment. */
12734 if (x86_64_zero_extended_value (fnaddr
))
12736 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12737 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12738 gen_int_mode (0xbb41, HImode
));
12739 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12740 gen_lowpart (SImode
, fnaddr
));
12745 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12746 gen_int_mode (0xbb49, HImode
));
12747 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12751 /* Load static chain using movabs to r10. */
12752 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12753 gen_int_mode (0xba49, HImode
));
12754 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12757 /* Jump to the r11 */
12758 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12759 gen_int_mode (0xff49, HImode
));
12760 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12761 gen_int_mode (0xe3, QImode
));
12763 if (offset
> TRAMPOLINE_SIZE
)
12767 #ifdef TRANSFER_FROM_TRAMPOLINE
12768 emit_library_call (gen_rtx (SYMBOL_REF
, Pmode
, "__enable_execute_stack"),
12769 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12773 #define def_builtin(MASK, NAME, TYPE, CODE) \
12775 if ((MASK) & target_flags \
12776 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12777 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12778 NULL, NULL_TREE); \
12781 struct builtin_description
12783 const unsigned int mask
;
12784 const enum insn_code icode
;
12785 const char *const name
;
12786 const enum ix86_builtins code
;
12787 const enum rtx_code comparison
;
12788 const unsigned int flag
;
12791 static const struct builtin_description bdesc_comi
[] =
12793 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12794 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12795 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12796 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12797 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12798 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12799 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12800 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12801 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12802 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12803 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12804 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12805 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12806 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12807 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12808 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12809 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12810 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12811 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12812 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12813 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12814 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12815 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12816 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12819 static const struct builtin_description bdesc_2arg
[] =
12822 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12823 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12824 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12825 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12826 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12827 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12828 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12829 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12831 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12832 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12833 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12834 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
12835 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
12836 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12837 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
12838 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
12839 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
12840 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
12841 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
12842 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
12843 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12844 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12845 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12846 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12847 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
12848 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
12849 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
12850 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12852 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12853 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12854 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12855 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12857 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12858 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12859 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12860 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12862 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12863 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12864 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12865 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12866 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12869 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12870 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12871 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12872 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
12873 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12874 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12875 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12876 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
12878 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12879 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12880 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12881 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12882 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12883 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12884 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12885 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12887 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12888 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12889 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12891 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12892 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12893 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12894 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12896 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12897 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12899 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12900 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12901 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12902 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12903 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12904 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12906 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12907 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12908 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12909 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12911 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12912 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12913 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12914 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12915 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12916 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12919 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12920 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12921 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12923 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12924 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12925 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
12927 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12928 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12929 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12930 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12931 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12932 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12934 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12935 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12936 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12937 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12938 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12939 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12941 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12942 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12943 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12944 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12946 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12947 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12950 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12951 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12952 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12953 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12954 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12955 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12956 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12957 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12959 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12960 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12961 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12962 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12963 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12964 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12965 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12966 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12967 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12968 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12969 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12970 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12971 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12972 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12973 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12974 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12975 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12976 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12977 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12978 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12980 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12981 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12982 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12983 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12985 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12986 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12987 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12988 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12990 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12991 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12992 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12995 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12996 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12997 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12998 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12999 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
13000 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
13001 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
13002 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
13004 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
13005 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
13006 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
13007 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
13008 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
13009 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
13010 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
13011 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
13013 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
13014 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
13015 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
13016 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
13018 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
13019 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
13020 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
13021 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
13023 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
13024 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
13026 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
13027 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
13028 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
13029 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
13030 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
13031 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
13033 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
13034 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
13035 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
13036 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
13038 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
13039 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
13040 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
13041 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
13042 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
13043 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
13044 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
13045 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
13047 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
13048 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
13049 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
13051 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
13052 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
13054 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
13055 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
13056 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
13057 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
13058 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
13059 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
13061 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
13062 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
13063 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
13064 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
13065 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
13066 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
13068 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
13069 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
13070 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
13071 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
13073 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
13075 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
13076 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
13077 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
13078 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
13081 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
13082 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
13083 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
13084 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
13085 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
13086 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
13089 static const struct builtin_description bdesc_1arg
[] =
13091 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
13092 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
13094 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
13095 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
13096 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
13098 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
13099 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
13100 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
13101 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
13102 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
13103 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
13105 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
13106 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
13107 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
13108 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
13110 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
13112 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
13113 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
13115 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
13116 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
13117 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
13118 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
13119 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
13121 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
13123 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
13124 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
13125 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
13126 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
13128 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
13129 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
13130 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
13132 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
13135 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
13136 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
13137 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
13141 ix86_init_builtins (void)
13144 ix86_init_mmx_sse_builtins ();
13147 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13148 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13151 ix86_init_mmx_sse_builtins (void)
13153 const struct builtin_description
* d
;
13156 tree pchar_type_node
= build_pointer_type (char_type_node
);
13157 tree pcchar_type_node
= build_pointer_type (
13158 build_type_variant (char_type_node
, 1, 0));
13159 tree pfloat_type_node
= build_pointer_type (float_type_node
);
13160 tree pcfloat_type_node
= build_pointer_type (
13161 build_type_variant (float_type_node
, 1, 0));
13162 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
13163 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
13164 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
13167 tree int_ftype_v4sf_v4sf
13168 = build_function_type_list (integer_type_node
,
13169 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13170 tree v4si_ftype_v4sf_v4sf
13171 = build_function_type_list (V4SI_type_node
,
13172 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13173 /* MMX/SSE/integer conversions. */
13174 tree int_ftype_v4sf
13175 = build_function_type_list (integer_type_node
,
13176 V4SF_type_node
, NULL_TREE
);
13177 tree int64_ftype_v4sf
13178 = build_function_type_list (long_long_integer_type_node
,
13179 V4SF_type_node
, NULL_TREE
);
13180 tree int_ftype_v8qi
13181 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
13182 tree v4sf_ftype_v4sf_int
13183 = build_function_type_list (V4SF_type_node
,
13184 V4SF_type_node
, integer_type_node
, NULL_TREE
);
13185 tree v4sf_ftype_v4sf_int64
13186 = build_function_type_list (V4SF_type_node
,
13187 V4SF_type_node
, long_long_integer_type_node
,
13189 tree v4sf_ftype_v4sf_v2si
13190 = build_function_type_list (V4SF_type_node
,
13191 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
13192 tree int_ftype_v4hi_int
13193 = build_function_type_list (integer_type_node
,
13194 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13195 tree v4hi_ftype_v4hi_int_int
13196 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
13197 integer_type_node
, integer_type_node
,
13199 /* Miscellaneous. */
13200 tree v8qi_ftype_v4hi_v4hi
13201 = build_function_type_list (V8QI_type_node
,
13202 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13203 tree v4hi_ftype_v2si_v2si
13204 = build_function_type_list (V4HI_type_node
,
13205 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13206 tree v4sf_ftype_v4sf_v4sf_int
13207 = build_function_type_list (V4SF_type_node
,
13208 V4SF_type_node
, V4SF_type_node
,
13209 integer_type_node
, NULL_TREE
);
13210 tree v2si_ftype_v4hi_v4hi
13211 = build_function_type_list (V2SI_type_node
,
13212 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13213 tree v4hi_ftype_v4hi_int
13214 = build_function_type_list (V4HI_type_node
,
13215 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13216 tree v4hi_ftype_v4hi_di
13217 = build_function_type_list (V4HI_type_node
,
13218 V4HI_type_node
, long_long_unsigned_type_node
,
13220 tree v2si_ftype_v2si_di
13221 = build_function_type_list (V2SI_type_node
,
13222 V2SI_type_node
, long_long_unsigned_type_node
,
13224 tree void_ftype_void
13225 = build_function_type (void_type_node
, void_list_node
);
13226 tree void_ftype_unsigned
13227 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
13228 tree void_ftype_unsigned_unsigned
13229 = build_function_type_list (void_type_node
, unsigned_type_node
,
13230 unsigned_type_node
, NULL_TREE
);
13231 tree void_ftype_pcvoid_unsigned_unsigned
13232 = build_function_type_list (void_type_node
, const_ptr_type_node
,
13233 unsigned_type_node
, unsigned_type_node
,
13235 tree unsigned_ftype_void
13236 = build_function_type (unsigned_type_node
, void_list_node
);
13238 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
13239 tree v4sf_ftype_void
13240 = build_function_type (V4SF_type_node
, void_list_node
);
13241 tree v2si_ftype_v4sf
13242 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
13243 /* Loads/stores. */
13244 tree void_ftype_v8qi_v8qi_pchar
13245 = build_function_type_list (void_type_node
,
13246 V8QI_type_node
, V8QI_type_node
,
13247 pchar_type_node
, NULL_TREE
);
13248 tree v4sf_ftype_pcfloat
13249 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
13250 /* @@@ the type is bogus */
13251 tree v4sf_ftype_v4sf_pv2si
13252 = build_function_type_list (V4SF_type_node
,
13253 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
13254 tree void_ftype_pv2si_v4sf
13255 = build_function_type_list (void_type_node
,
13256 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
13257 tree void_ftype_pfloat_v4sf
13258 = build_function_type_list (void_type_node
,
13259 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
13260 tree void_ftype_pdi_di
13261 = build_function_type_list (void_type_node
,
13262 pdi_type_node
, long_long_unsigned_type_node
,
13264 tree void_ftype_pv2di_v2di
13265 = build_function_type_list (void_type_node
,
13266 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
13267 /* Normal vector unops. */
13268 tree v4sf_ftype_v4sf
13269 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13271 /* Normal vector binops. */
13272 tree v4sf_ftype_v4sf_v4sf
13273 = build_function_type_list (V4SF_type_node
,
13274 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13275 tree v8qi_ftype_v8qi_v8qi
13276 = build_function_type_list (V8QI_type_node
,
13277 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13278 tree v4hi_ftype_v4hi_v4hi
13279 = build_function_type_list (V4HI_type_node
,
13280 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13281 tree v2si_ftype_v2si_v2si
13282 = build_function_type_list (V2SI_type_node
,
13283 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13284 tree di_ftype_di_di
13285 = build_function_type_list (long_long_unsigned_type_node
,
13286 long_long_unsigned_type_node
,
13287 long_long_unsigned_type_node
, NULL_TREE
);
13289 tree v2si_ftype_v2sf
13290 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
13291 tree v2sf_ftype_v2si
13292 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
13293 tree v2si_ftype_v2si
13294 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13295 tree v2sf_ftype_v2sf
13296 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13297 tree v2sf_ftype_v2sf_v2sf
13298 = build_function_type_list (V2SF_type_node
,
13299 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13300 tree v2si_ftype_v2sf_v2sf
13301 = build_function_type_list (V2SI_type_node
,
13302 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13303 tree pint_type_node
= build_pointer_type (integer_type_node
);
13304 tree pcint_type_node
= build_pointer_type (
13305 build_type_variant (integer_type_node
, 1, 0));
13306 tree pdouble_type_node
= build_pointer_type (double_type_node
);
13307 tree pcdouble_type_node
= build_pointer_type (
13308 build_type_variant (double_type_node
, 1, 0));
13309 tree int_ftype_v2df_v2df
13310 = build_function_type_list (integer_type_node
,
13311 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13314 = build_function_type (intTI_type_node
, void_list_node
);
13315 tree v2di_ftype_void
13316 = build_function_type (V2DI_type_node
, void_list_node
);
13317 tree ti_ftype_ti_ti
13318 = build_function_type_list (intTI_type_node
,
13319 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13320 tree void_ftype_pcvoid
13321 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13323 = build_function_type_list (V2DI_type_node
,
13324 long_long_unsigned_type_node
, NULL_TREE
);
13326 = build_function_type_list (long_long_unsigned_type_node
,
13327 V2DI_type_node
, NULL_TREE
);
13328 tree v4sf_ftype_v4si
13329 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13330 tree v4si_ftype_v4sf
13331 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13332 tree v2df_ftype_v4si
13333 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13334 tree v4si_ftype_v2df
13335 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13336 tree v2si_ftype_v2df
13337 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13338 tree v4sf_ftype_v2df
13339 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13340 tree v2df_ftype_v2si
13341 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13342 tree v2df_ftype_v4sf
13343 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13344 tree int_ftype_v2df
13345 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13346 tree int64_ftype_v2df
13347 = build_function_type_list (long_long_integer_type_node
,
13348 V2DF_type_node
, NULL_TREE
);
13349 tree v2df_ftype_v2df_int
13350 = build_function_type_list (V2DF_type_node
,
13351 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13352 tree v2df_ftype_v2df_int64
13353 = build_function_type_list (V2DF_type_node
,
13354 V2DF_type_node
, long_long_integer_type_node
,
13356 tree v4sf_ftype_v4sf_v2df
13357 = build_function_type_list (V4SF_type_node
,
13358 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13359 tree v2df_ftype_v2df_v4sf
13360 = build_function_type_list (V2DF_type_node
,
13361 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13362 tree v2df_ftype_v2df_v2df_int
13363 = build_function_type_list (V2DF_type_node
,
13364 V2DF_type_node
, V2DF_type_node
,
13367 tree v2df_ftype_v2df_pv2si
13368 = build_function_type_list (V2DF_type_node
,
13369 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
13370 tree void_ftype_pv2si_v2df
13371 = build_function_type_list (void_type_node
,
13372 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
13373 tree void_ftype_pdouble_v2df
13374 = build_function_type_list (void_type_node
,
13375 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13376 tree void_ftype_pint_int
13377 = build_function_type_list (void_type_node
,
13378 pint_type_node
, integer_type_node
, NULL_TREE
);
13379 tree void_ftype_v16qi_v16qi_pchar
13380 = build_function_type_list (void_type_node
,
13381 V16QI_type_node
, V16QI_type_node
,
13382 pchar_type_node
, NULL_TREE
);
13383 tree v2df_ftype_pcdouble
13384 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13385 tree v2df_ftype_v2df_v2df
13386 = build_function_type_list (V2DF_type_node
,
13387 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13388 tree v16qi_ftype_v16qi_v16qi
13389 = build_function_type_list (V16QI_type_node
,
13390 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13391 tree v8hi_ftype_v8hi_v8hi
13392 = build_function_type_list (V8HI_type_node
,
13393 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13394 tree v4si_ftype_v4si_v4si
13395 = build_function_type_list (V4SI_type_node
,
13396 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13397 tree v2di_ftype_v2di_v2di
13398 = build_function_type_list (V2DI_type_node
,
13399 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13400 tree v2di_ftype_v2df_v2df
13401 = build_function_type_list (V2DI_type_node
,
13402 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13403 tree v2df_ftype_v2df
13404 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13405 tree v2df_ftype_double
13406 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
13407 tree v2df_ftype_double_double
13408 = build_function_type_list (V2DF_type_node
,
13409 double_type_node
, double_type_node
, NULL_TREE
);
13410 tree int_ftype_v8hi_int
13411 = build_function_type_list (integer_type_node
,
13412 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13413 tree v8hi_ftype_v8hi_int_int
13414 = build_function_type_list (V8HI_type_node
,
13415 V8HI_type_node
, integer_type_node
,
13416 integer_type_node
, NULL_TREE
);
13417 tree v2di_ftype_v2di_int
13418 = build_function_type_list (V2DI_type_node
,
13419 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13420 tree v4si_ftype_v4si_int
13421 = build_function_type_list (V4SI_type_node
,
13422 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13423 tree v8hi_ftype_v8hi_int
13424 = build_function_type_list (V8HI_type_node
,
13425 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13426 tree v8hi_ftype_v8hi_v2di
13427 = build_function_type_list (V8HI_type_node
,
13428 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13429 tree v4si_ftype_v4si_v2di
13430 = build_function_type_list (V4SI_type_node
,
13431 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13432 tree v4si_ftype_v8hi_v8hi
13433 = build_function_type_list (V4SI_type_node
,
13434 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13435 tree di_ftype_v8qi_v8qi
13436 = build_function_type_list (long_long_unsigned_type_node
,
13437 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13438 tree v2di_ftype_v16qi_v16qi
13439 = build_function_type_list (V2DI_type_node
,
13440 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13441 tree int_ftype_v16qi
13442 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13443 tree v16qi_ftype_pcchar
13444 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13445 tree void_ftype_pchar_v16qi
13446 = build_function_type_list (void_type_node
,
13447 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13448 tree v4si_ftype_pcint
13449 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
13450 tree void_ftype_pcint_v4si
13451 = build_function_type_list (void_type_node
,
13452 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
13453 tree v2di_ftype_v2di
13454 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13457 tree float128_type
;
13459 /* The __float80 type. */
13460 if (TYPE_MODE (long_double_type_node
) == XFmode
)
13461 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
13465 /* The __float80 type. */
13466 float80_type
= make_node (REAL_TYPE
);
13467 TYPE_PRECISION (float80_type
) = 96;
13468 layout_type (float80_type
);
13469 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
13472 float128_type
= make_node (REAL_TYPE
);
13473 TYPE_PRECISION (float128_type
) = 128;
13474 layout_type (float128_type
);
13475 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
13477 /* Add all builtins that are more or less simple operations on two
13479 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13481 /* Use one of the operands; the target can have a different mode for
13482 mask-generating compares. */
13483 enum machine_mode mode
;
13488 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13493 type
= v16qi_ftype_v16qi_v16qi
;
13496 type
= v8hi_ftype_v8hi_v8hi
;
13499 type
= v4si_ftype_v4si_v4si
;
13502 type
= v2di_ftype_v2di_v2di
;
13505 type
= v2df_ftype_v2df_v2df
;
13508 type
= ti_ftype_ti_ti
;
13511 type
= v4sf_ftype_v4sf_v4sf
;
13514 type
= v8qi_ftype_v8qi_v8qi
;
13517 type
= v4hi_ftype_v4hi_v4hi
;
13520 type
= v2si_ftype_v2si_v2si
;
13523 type
= di_ftype_di_di
;
13530 /* Override for comparisons. */
13531 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13532 || d
->icode
== CODE_FOR_maskncmpv4sf3
13533 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13534 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
13535 type
= v4si_ftype_v4sf_v4sf
;
13537 if (d
->icode
== CODE_FOR_maskcmpv2df3
13538 || d
->icode
== CODE_FOR_maskncmpv2df3
13539 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13540 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13541 type
= v2di_ftype_v2df_v2df
;
13543 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13546 /* Add the remaining MMX insns with somewhat more complicated types. */
13547 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
13548 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13549 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13550 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
13551 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
13553 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
13554 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
13555 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
13557 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
13558 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
13560 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
13561 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
13563 /* comi/ucomi insns. */
13564 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13565 if (d
->mask
== MASK_SSE2
)
13566 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
13568 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
13570 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
13571 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
13572 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
13574 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
13575 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
13576 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
13577 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
13578 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
13579 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
13580 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
13581 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
13582 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
13583 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
13584 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
13586 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
13587 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
13589 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
13591 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
13592 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
13593 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
13594 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
13595 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
13596 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
13598 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
13599 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
13600 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
13601 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13603 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13604 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13605 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13606 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13608 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13610 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13612 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13613 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13614 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13615 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13616 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13617 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13619 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13621 /* Original 3DNow! */
13622 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13623 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13624 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13625 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13626 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13627 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13628 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13629 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13630 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13631 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13632 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13633 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13634 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13635 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13636 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13637 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13638 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13639 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13640 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13641 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13643 /* 3DNow! extension as used in the Athlon CPU. */
13644 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13645 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13646 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13647 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13648 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13649 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13651 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13654 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13655 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13657 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13658 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13659 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13661 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13662 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13663 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13664 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13665 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13666 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13668 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
13669 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
13670 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
13671 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
13673 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13674 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13675 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13676 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13677 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13679 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13680 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13681 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13682 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13684 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13685 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13687 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13689 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13690 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13692 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13693 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13694 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13695 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13696 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13698 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13700 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13701 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13702 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13703 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13705 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13706 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13707 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13709 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13710 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13711 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13712 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13714 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13715 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13716 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13717 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13718 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13719 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13720 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13722 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13723 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13724 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13726 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13727 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13728 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13729 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13730 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13731 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13732 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13734 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13736 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13737 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13738 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13740 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13741 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13742 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13744 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13745 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13747 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13748 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13749 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13750 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13752 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13753 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13754 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13755 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13757 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13758 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13760 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13762 /* Prescott New Instructions. */
13763 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
13764 void_ftype_pcvoid_unsigned_unsigned
,
13765 IX86_BUILTIN_MONITOR
);
13766 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
13767 void_ftype_unsigned_unsigned
,
13768 IX86_BUILTIN_MWAIT
);
13769 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
13771 IX86_BUILTIN_MOVSHDUP
);
13772 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
13774 IX86_BUILTIN_MOVSLDUP
);
13775 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
13776 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
13777 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
13778 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
13779 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
13780 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
13783 /* Errors in the source file can cause expand_expr to return const0_rtx
13784 where we expect a vector. To avoid crashing, use one of the vector
13785 clear instructions. */
13787 safe_vector_operand (rtx x
, enum machine_mode mode
)
13789 if (x
!= const0_rtx
)
13791 x
= gen_reg_rtx (mode
);
13793 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
13794 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
13795 : gen_rtx_SUBREG (DImode
, x
, 0)));
13797 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
13798 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
13799 CONST0_RTX (V4SFmode
)));
13803 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13806 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13809 tree arg0
= TREE_VALUE (arglist
);
13810 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13811 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13812 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13813 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13814 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13815 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13817 if (VECTOR_MODE_P (mode0
))
13818 op0
= safe_vector_operand (op0
, mode0
);
13819 if (VECTOR_MODE_P (mode1
))
13820 op1
= safe_vector_operand (op1
, mode1
);
13823 || GET_MODE (target
) != tmode
13824 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13825 target
= gen_reg_rtx (tmode
);
13827 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
13829 rtx x
= gen_reg_rtx (V4SImode
);
13830 emit_insn (gen_sse2_loadd (x
, op1
));
13831 op1
= gen_lowpart (TImode
, x
);
13834 /* In case the insn wants input operands in modes different from
13835 the result, abort. */
13836 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
13837 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
13840 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13841 op0
= copy_to_mode_reg (mode0
, op0
);
13842 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13843 op1
= copy_to_mode_reg (mode1
, op1
);
13845 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13846 yet one of the two must not be a memory. This is normally enforced
13847 by expanders, but we didn't bother to create one here. */
13848 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
13849 op0
= copy_to_mode_reg (mode0
, op0
);
13851 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13858 /* Subroutine of ix86_expand_builtin to take care of stores. */
13861 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
13864 tree arg0
= TREE_VALUE (arglist
);
13865 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13866 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13867 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13868 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13869 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13871 if (VECTOR_MODE_P (mode1
))
13872 op1
= safe_vector_operand (op1
, mode1
);
13874 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13875 op1
= copy_to_mode_reg (mode1
, op1
);
13877 pat
= GEN_FCN (icode
) (op0
, op1
);
13883 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13886 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
13887 rtx target
, int do_load
)
13890 tree arg0
= TREE_VALUE (arglist
);
13891 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13892 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13893 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13896 || GET_MODE (target
) != tmode
13897 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13898 target
= gen_reg_rtx (tmode
);
13900 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13903 if (VECTOR_MODE_P (mode0
))
13904 op0
= safe_vector_operand (op0
, mode0
);
13906 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13907 op0
= copy_to_mode_reg (mode0
, op0
);
13910 pat
= GEN_FCN (icode
) (target
, op0
);
13917 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13918 sqrtss, rsqrtss, rcpss. */
13921 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13924 tree arg0
= TREE_VALUE (arglist
);
13925 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13926 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13927 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13930 || GET_MODE (target
) != tmode
13931 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13932 target
= gen_reg_rtx (tmode
);
13934 if (VECTOR_MODE_P (mode0
))
13935 op0
= safe_vector_operand (op0
, mode0
);
13937 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13938 op0
= copy_to_mode_reg (mode0
, op0
);
13941 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13942 op1
= copy_to_mode_reg (mode0
, op1
);
13944 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13951 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13954 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
13958 tree arg0
= TREE_VALUE (arglist
);
13959 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13960 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13961 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13963 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13964 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13965 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13966 enum rtx_code comparison
= d
->comparison
;
13968 if (VECTOR_MODE_P (mode0
))
13969 op0
= safe_vector_operand (op0
, mode0
);
13970 if (VECTOR_MODE_P (mode1
))
13971 op1
= safe_vector_operand (op1
, mode1
);
13973 /* Swap operands if we have a comparison that isn't available in
13977 rtx tmp
= gen_reg_rtx (mode1
);
13978 emit_move_insn (tmp
, op1
);
13984 || GET_MODE (target
) != tmode
13985 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13986 target
= gen_reg_rtx (tmode
);
13988 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13989 op0
= copy_to_mode_reg (mode0
, op0
);
13990 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13991 op1
= copy_to_mode_reg (mode1
, op1
);
13993 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13994 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
14001 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14004 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
14008 tree arg0
= TREE_VALUE (arglist
);
14009 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14010 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14011 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14013 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
14014 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
14015 enum rtx_code comparison
= d
->comparison
;
14017 if (VECTOR_MODE_P (mode0
))
14018 op0
= safe_vector_operand (op0
, mode0
);
14019 if (VECTOR_MODE_P (mode1
))
14020 op1
= safe_vector_operand (op1
, mode1
);
14022 /* Swap operands if we have a comparison that isn't available in
14031 target
= gen_reg_rtx (SImode
);
14032 emit_move_insn (target
, const0_rtx
);
14033 target
= gen_rtx_SUBREG (QImode
, target
, 0);
14035 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
14036 op0
= copy_to_mode_reg (mode0
, op0
);
14037 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
14038 op1
= copy_to_mode_reg (mode1
, op1
);
14040 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
14041 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
14045 emit_insn (gen_rtx_SET (VOIDmode
,
14046 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
14047 gen_rtx_fmt_ee (comparison
, QImode
,
14051 return SUBREG_REG (target
);
14054 /* Expand an expression EXP that calls a built-in function,
14055 with result going to TARGET if that's convenient
14056 (and in mode MODE if that's convenient).
14057 SUBTARGET may be used as the target for computing one of EXP's operands.
14058 IGNORE is nonzero if the value is to be ignored. */
14061 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
14062 enum machine_mode mode ATTRIBUTE_UNUSED
,
14063 int ignore ATTRIBUTE_UNUSED
)
14065 const struct builtin_description
*d
;
14067 enum insn_code icode
;
14068 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
14069 tree arglist
= TREE_OPERAND (exp
, 1);
14070 tree arg0
, arg1
, arg2
;
14071 rtx op0
, op1
, op2
, pat
;
14072 enum machine_mode tmode
, mode0
, mode1
, mode2
;
14073 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
14077 case IX86_BUILTIN_EMMS
:
14078 emit_insn (gen_emms ());
14081 case IX86_BUILTIN_SFENCE
:
14082 emit_insn (gen_sfence ());
14085 case IX86_BUILTIN_PEXTRW
:
14086 case IX86_BUILTIN_PEXTRW128
:
14087 icode
= (fcode
== IX86_BUILTIN_PEXTRW
14088 ? CODE_FOR_mmx_pextrw
14089 : CODE_FOR_sse2_pextrw
);
14090 arg0
= TREE_VALUE (arglist
);
14091 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14092 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14093 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14094 tmode
= insn_data
[icode
].operand
[0].mode
;
14095 mode0
= insn_data
[icode
].operand
[1].mode
;
14096 mode1
= insn_data
[icode
].operand
[2].mode
;
14098 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14099 op0
= copy_to_mode_reg (mode0
, op0
);
14100 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14102 error ("selector must be an integer constant in the range 0..%i",
14103 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
14104 return gen_reg_rtx (tmode
);
14107 || GET_MODE (target
) != tmode
14108 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14109 target
= gen_reg_rtx (tmode
);
14110 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14116 case IX86_BUILTIN_PINSRW
:
14117 case IX86_BUILTIN_PINSRW128
:
14118 icode
= (fcode
== IX86_BUILTIN_PINSRW
14119 ? CODE_FOR_mmx_pinsrw
14120 : CODE_FOR_sse2_pinsrw
);
14121 arg0
= TREE_VALUE (arglist
);
14122 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14123 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14124 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14125 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14126 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14127 tmode
= insn_data
[icode
].operand
[0].mode
;
14128 mode0
= insn_data
[icode
].operand
[1].mode
;
14129 mode1
= insn_data
[icode
].operand
[2].mode
;
14130 mode2
= insn_data
[icode
].operand
[3].mode
;
14132 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14133 op0
= copy_to_mode_reg (mode0
, op0
);
14134 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14135 op1
= copy_to_mode_reg (mode1
, op1
);
14136 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14138 error ("selector must be an integer constant in the range 0..%i",
14139 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
14143 || GET_MODE (target
) != tmode
14144 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14145 target
= gen_reg_rtx (tmode
);
14146 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14152 case IX86_BUILTIN_MASKMOVQ
:
14153 case IX86_BUILTIN_MASKMOVDQU
:
14154 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
14155 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
14156 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
14157 : CODE_FOR_sse2_maskmovdqu
));
14158 /* Note the arg order is different from the operand order. */
14159 arg1
= TREE_VALUE (arglist
);
14160 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
14161 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14162 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14163 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14164 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14165 mode0
= insn_data
[icode
].operand
[0].mode
;
14166 mode1
= insn_data
[icode
].operand
[1].mode
;
14167 mode2
= insn_data
[icode
].operand
[2].mode
;
14169 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14170 op0
= copy_to_mode_reg (mode0
, op0
);
14171 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14172 op1
= copy_to_mode_reg (mode1
, op1
);
14173 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
14174 op2
= copy_to_mode_reg (mode2
, op2
);
14175 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
14181 case IX86_BUILTIN_SQRTSS
:
14182 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
14183 case IX86_BUILTIN_RSQRTSS
:
14184 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
14185 case IX86_BUILTIN_RCPSS
:
14186 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
14188 case IX86_BUILTIN_LOADAPS
:
14189 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
14191 case IX86_BUILTIN_LOADUPS
:
14192 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
14194 case IX86_BUILTIN_STOREAPS
:
14195 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
14197 case IX86_BUILTIN_STOREUPS
:
14198 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
14200 case IX86_BUILTIN_LOADSS
:
14201 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
14203 case IX86_BUILTIN_STORESS
:
14204 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
14206 case IX86_BUILTIN_LOADHPS
:
14207 case IX86_BUILTIN_LOADLPS
:
14208 case IX86_BUILTIN_LOADHPD
:
14209 case IX86_BUILTIN_LOADLPD
:
14210 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
14211 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
14212 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
14213 : CODE_FOR_sse2_movsd
);
14214 arg0
= TREE_VALUE (arglist
);
14215 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14216 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14217 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14218 tmode
= insn_data
[icode
].operand
[0].mode
;
14219 mode0
= insn_data
[icode
].operand
[1].mode
;
14220 mode1
= insn_data
[icode
].operand
[2].mode
;
14222 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14223 op0
= copy_to_mode_reg (mode0
, op0
);
14224 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
14226 || GET_MODE (target
) != tmode
14227 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14228 target
= gen_reg_rtx (tmode
);
14229 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14235 case IX86_BUILTIN_STOREHPS
:
14236 case IX86_BUILTIN_STORELPS
:
14237 case IX86_BUILTIN_STOREHPD
:
14238 case IX86_BUILTIN_STORELPD
:
14239 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
14240 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
14241 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
14242 : CODE_FOR_sse2_movsd
);
14243 arg0
= TREE_VALUE (arglist
);
14244 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14245 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14246 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14247 mode0
= insn_data
[icode
].operand
[1].mode
;
14248 mode1
= insn_data
[icode
].operand
[2].mode
;
14250 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14251 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14252 op1
= copy_to_mode_reg (mode1
, op1
);
14254 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
14260 case IX86_BUILTIN_MOVNTPS
:
14261 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
14262 case IX86_BUILTIN_MOVNTQ
:
14263 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
14265 case IX86_BUILTIN_LDMXCSR
:
14266 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14267 target
= assign_386_stack_local (SImode
, 0);
14268 emit_move_insn (target
, op0
);
14269 emit_insn (gen_ldmxcsr (target
));
14272 case IX86_BUILTIN_STMXCSR
:
14273 target
= assign_386_stack_local (SImode
, 0);
14274 emit_insn (gen_stmxcsr (target
));
14275 return copy_to_mode_reg (SImode
, target
);
14277 case IX86_BUILTIN_SHUFPS
:
14278 case IX86_BUILTIN_SHUFPD
:
14279 icode
= (fcode
== IX86_BUILTIN_SHUFPS
14280 ? CODE_FOR_sse_shufps
14281 : CODE_FOR_sse2_shufpd
);
14282 arg0
= TREE_VALUE (arglist
);
14283 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14284 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14285 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14286 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14287 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14288 tmode
= insn_data
[icode
].operand
[0].mode
;
14289 mode0
= insn_data
[icode
].operand
[1].mode
;
14290 mode1
= insn_data
[icode
].operand
[2].mode
;
14291 mode2
= insn_data
[icode
].operand
[3].mode
;
14293 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14294 op0
= copy_to_mode_reg (mode0
, op0
);
14295 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14296 op1
= copy_to_mode_reg (mode1
, op1
);
14297 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14299 /* @@@ better error message */
14300 error ("mask must be an immediate");
14301 return gen_reg_rtx (tmode
);
14304 || GET_MODE (target
) != tmode
14305 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14306 target
= gen_reg_rtx (tmode
);
14307 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14313 case IX86_BUILTIN_PSHUFW
:
14314 case IX86_BUILTIN_PSHUFD
:
14315 case IX86_BUILTIN_PSHUFHW
:
14316 case IX86_BUILTIN_PSHUFLW
:
14317 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
14318 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
14319 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
14320 : CODE_FOR_mmx_pshufw
);
14321 arg0
= TREE_VALUE (arglist
);
14322 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14323 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14324 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14325 tmode
= insn_data
[icode
].operand
[0].mode
;
14326 mode1
= insn_data
[icode
].operand
[1].mode
;
14327 mode2
= insn_data
[icode
].operand
[2].mode
;
14329 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14330 op0
= copy_to_mode_reg (mode1
, op0
);
14331 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14333 /* @@@ better error message */
14334 error ("mask must be an immediate");
14338 || GET_MODE (target
) != tmode
14339 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14340 target
= gen_reg_rtx (tmode
);
14341 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14347 case IX86_BUILTIN_PSLLDQI128
:
14348 case IX86_BUILTIN_PSRLDQI128
:
14349 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
14350 : CODE_FOR_sse2_lshrti3
);
14351 arg0
= TREE_VALUE (arglist
);
14352 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14353 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14354 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14355 tmode
= insn_data
[icode
].operand
[0].mode
;
14356 mode1
= insn_data
[icode
].operand
[1].mode
;
14357 mode2
= insn_data
[icode
].operand
[2].mode
;
14359 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14361 op0
= copy_to_reg (op0
);
14362 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14364 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14366 error ("shift must be an immediate");
14369 target
= gen_reg_rtx (V2DImode
);
14370 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14376 case IX86_BUILTIN_FEMMS
:
14377 emit_insn (gen_femms ());
14380 case IX86_BUILTIN_PAVGUSB
:
14381 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
14383 case IX86_BUILTIN_PF2ID
:
14384 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
14386 case IX86_BUILTIN_PFACC
:
14387 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
14389 case IX86_BUILTIN_PFADD
:
14390 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
14392 case IX86_BUILTIN_PFCMPEQ
:
14393 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
14395 case IX86_BUILTIN_PFCMPGE
:
14396 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
14398 case IX86_BUILTIN_PFCMPGT
:
14399 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
14401 case IX86_BUILTIN_PFMAX
:
14402 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
14404 case IX86_BUILTIN_PFMIN
:
14405 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
14407 case IX86_BUILTIN_PFMUL
:
14408 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
14410 case IX86_BUILTIN_PFRCP
:
14411 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
14413 case IX86_BUILTIN_PFRCPIT1
:
14414 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
14416 case IX86_BUILTIN_PFRCPIT2
:
14417 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
14419 case IX86_BUILTIN_PFRSQIT1
:
14420 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
14422 case IX86_BUILTIN_PFRSQRT
:
14423 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
14425 case IX86_BUILTIN_PFSUB
:
14426 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
14428 case IX86_BUILTIN_PFSUBR
:
14429 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
14431 case IX86_BUILTIN_PI2FD
:
14432 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
14434 case IX86_BUILTIN_PMULHRW
:
14435 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
14437 case IX86_BUILTIN_PF2IW
:
14438 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
14440 case IX86_BUILTIN_PFNACC
:
14441 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
14443 case IX86_BUILTIN_PFPNACC
:
14444 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
14446 case IX86_BUILTIN_PI2FW
:
14447 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
14449 case IX86_BUILTIN_PSWAPDSI
:
14450 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
14452 case IX86_BUILTIN_PSWAPDSF
:
14453 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
14455 case IX86_BUILTIN_SSE_ZERO
:
14456 target
= gen_reg_rtx (V4SFmode
);
14457 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
14460 case IX86_BUILTIN_MMX_ZERO
:
14461 target
= gen_reg_rtx (DImode
);
14462 emit_insn (gen_mmx_clrdi (target
));
14465 case IX86_BUILTIN_CLRTI
:
14466 target
= gen_reg_rtx (V2DImode
);
14467 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
14471 case IX86_BUILTIN_SQRTSD
:
14472 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
14473 case IX86_BUILTIN_LOADAPD
:
14474 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
14475 case IX86_BUILTIN_LOADUPD
:
14476 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14478 case IX86_BUILTIN_STOREAPD
:
14479 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14480 case IX86_BUILTIN_STOREUPD
:
14481 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14483 case IX86_BUILTIN_LOADSD
:
14484 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
14486 case IX86_BUILTIN_STORESD
:
14487 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
14489 case IX86_BUILTIN_SETPD1
:
14490 target
= assign_386_stack_local (DFmode
, 0);
14491 arg0
= TREE_VALUE (arglist
);
14492 emit_move_insn (adjust_address (target
, DFmode
, 0),
14493 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14494 op0
= gen_reg_rtx (V2DFmode
);
14495 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
14496 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
14499 case IX86_BUILTIN_SETPD
:
14500 target
= assign_386_stack_local (V2DFmode
, 0);
14501 arg0
= TREE_VALUE (arglist
);
14502 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14503 emit_move_insn (adjust_address (target
, DFmode
, 0),
14504 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14505 emit_move_insn (adjust_address (target
, DFmode
, 8),
14506 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
14507 op0
= gen_reg_rtx (V2DFmode
);
14508 emit_insn (gen_sse2_movapd (op0
, target
));
14511 case IX86_BUILTIN_LOADRPD
:
14512 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
14513 gen_reg_rtx (V2DFmode
), 1);
14514 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
14517 case IX86_BUILTIN_LOADPD1
:
14518 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
14519 gen_reg_rtx (V2DFmode
), 1);
14520 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
14523 case IX86_BUILTIN_STOREPD1
:
14524 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14525 case IX86_BUILTIN_STORERPD
:
14526 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14528 case IX86_BUILTIN_CLRPD
:
14529 target
= gen_reg_rtx (V2DFmode
);
14530 emit_insn (gen_sse_clrv2df (target
));
14533 case IX86_BUILTIN_MFENCE
:
14534 emit_insn (gen_sse2_mfence ());
14536 case IX86_BUILTIN_LFENCE
:
14537 emit_insn (gen_sse2_lfence ());
14540 case IX86_BUILTIN_CLFLUSH
:
14541 arg0
= TREE_VALUE (arglist
);
14542 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14543 icode
= CODE_FOR_sse2_clflush
;
14544 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
14545 op0
= copy_to_mode_reg (Pmode
, op0
);
14547 emit_insn (gen_sse2_clflush (op0
));
14550 case IX86_BUILTIN_MOVNTPD
:
14551 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
14552 case IX86_BUILTIN_MOVNTDQ
:
14553 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
14554 case IX86_BUILTIN_MOVNTI
:
14555 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
14557 case IX86_BUILTIN_LOADDQA
:
14558 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
14559 case IX86_BUILTIN_LOADDQU
:
14560 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
14561 case IX86_BUILTIN_LOADD
:
14562 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
14564 case IX86_BUILTIN_STOREDQA
:
14565 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
14566 case IX86_BUILTIN_STOREDQU
:
14567 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
14568 case IX86_BUILTIN_STORED
:
14569 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
14571 case IX86_BUILTIN_MONITOR
:
14572 arg0
= TREE_VALUE (arglist
);
14573 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14574 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14575 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14576 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14577 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14579 op0
= copy_to_mode_reg (SImode
, op0
);
14581 op1
= copy_to_mode_reg (SImode
, op1
);
14583 op2
= copy_to_mode_reg (SImode
, op2
);
14584 emit_insn (gen_monitor (op0
, op1
, op2
));
14587 case IX86_BUILTIN_MWAIT
:
14588 arg0
= TREE_VALUE (arglist
);
14589 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14590 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14591 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14593 op0
= copy_to_mode_reg (SImode
, op0
);
14595 op1
= copy_to_mode_reg (SImode
, op1
);
14596 emit_insn (gen_mwait (op0
, op1
));
14599 case IX86_BUILTIN_LOADDDUP
:
14600 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
14602 case IX86_BUILTIN_LDDQU
:
14603 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
14610 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14611 if (d
->code
== fcode
)
14613 /* Compares are treated specially. */
14614 if (d
->icode
== CODE_FOR_maskcmpv4sf3
14615 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
14616 || d
->icode
== CODE_FOR_maskncmpv4sf3
14617 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
14618 || d
->icode
== CODE_FOR_maskcmpv2df3
14619 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
14620 || d
->icode
== CODE_FOR_maskncmpv2df3
14621 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
14622 return ix86_expand_sse_compare (d
, arglist
, target
);
14624 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14627 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14628 if (d
->code
== fcode
)
14629 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14631 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14632 if (d
->code
== fcode
)
14633 return ix86_expand_sse_comi (d
, arglist
, target
);
14635 /* @@@ Should really do something sensible here. */
14639 /* Store OPERAND to the memory after reload is completed. This means
14640 that we can't easily use assign_stack_local. */
14642 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
14645 if (!reload_completed
)
14647 if (TARGET_RED_ZONE
)
14649 result
= gen_rtx_MEM (mode
,
14650 gen_rtx_PLUS (Pmode
,
14652 GEN_INT (-RED_ZONE_SIZE
)));
14653 emit_move_insn (result
, operand
);
14655 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
14661 operand
= gen_lowpart (DImode
, operand
);
14665 gen_rtx_SET (VOIDmode
,
14666 gen_rtx_MEM (DImode
,
14667 gen_rtx_PRE_DEC (DImode
,
14668 stack_pointer_rtx
)),
14674 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14683 split_di (&operand
, 1, operands
, operands
+ 1);
14685 gen_rtx_SET (VOIDmode
,
14686 gen_rtx_MEM (SImode
,
14687 gen_rtx_PRE_DEC (Pmode
,
14688 stack_pointer_rtx
)),
14691 gen_rtx_SET (VOIDmode
,
14692 gen_rtx_MEM (SImode
,
14693 gen_rtx_PRE_DEC (Pmode
,
14694 stack_pointer_rtx
)),
14699 /* It is better to store HImodes as SImodes. */
14700 if (!TARGET_PARTIAL_REG_STALL
)
14701 operand
= gen_lowpart (SImode
, operand
);
14705 gen_rtx_SET (VOIDmode
,
14706 gen_rtx_MEM (GET_MODE (operand
),
14707 gen_rtx_PRE_DEC (SImode
,
14708 stack_pointer_rtx
)),
14714 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14719 /* Free operand from the memory. */
14721 ix86_free_from_memory (enum machine_mode mode
)
14723 if (!TARGET_RED_ZONE
)
14727 if (mode
== DImode
|| TARGET_64BIT
)
14729 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14733 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14734 to pop or add instruction if registers are available. */
14735 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14736 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14741 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14742 QImode must go into class Q_REGS.
14743 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14744 movdf to do mem-to-mem moves through integer regs. */
14746 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
14748 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14750 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14752 /* SSE can't load any constant directly yet. */
14753 if (SSE_CLASS_P (class))
14755 /* Floats can load 0 and 1. */
14756 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14758 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14759 if (MAYBE_SSE_CLASS_P (class))
14760 return (reg_class_subset_p (class, GENERAL_REGS
)
14761 ? GENERAL_REGS
: FLOAT_REGS
);
14765 /* General regs can load everything. */
14766 if (reg_class_subset_p (class, GENERAL_REGS
))
14767 return GENERAL_REGS
;
14768 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14769 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14772 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14774 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14779 /* If we are copying between general and FP registers, we need a memory
14780 location. The same is true for SSE and MMX registers.
14782 The macro can't work reliably when one of the CLASSES is class containing
14783 registers from multiple units (SSE, MMX, integer). We avoid this by never
14784 combining those units in single alternative in the machine description.
14785 Ensure that this constraint holds to avoid unexpected surprises.
14787 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14788 enforce these sanity checks. */
14790 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
14791 enum machine_mode mode
, int strict
)
14793 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14794 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14795 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14796 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14797 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14798 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14805 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14806 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14807 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
14808 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
14809 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
14811 /* Return the cost of moving data from a register in class CLASS1 to
14812 one in class CLASS2.
14814 It is not required that the cost always equal 2 when FROM is the same as TO;
14815 on some machines it is expensive to move between registers if they are not
14816 general registers. */
14818 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
14819 enum reg_class class2
)
14821 /* In case we require secondary memory, compute cost of the store followed
14822 by load. In order to avoid bad register allocation choices, we need
14823 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14825 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14829 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14830 MEMORY_MOVE_COST (mode
, class1
, 1));
14831 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14832 MEMORY_MOVE_COST (mode
, class2
, 1));
14834 /* In case of copying from general_purpose_register we may emit multiple
14835 stores followed by single load causing memory size mismatch stall.
14836 Count this as arbitrarily high cost of 20. */
14837 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14840 /* In the case of FP/MMX moves, the registers actually overlap, and we
14841 have to switch modes in order to treat them differently. */
14842 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14843 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14849 /* Moves between SSE/MMX and integer unit are expensive. */
14850 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14851 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14852 return ix86_cost
->mmxsse_to_integer
;
14853 if (MAYBE_FLOAT_CLASS_P (class1
))
14854 return ix86_cost
->fp_move
;
14855 if (MAYBE_SSE_CLASS_P (class1
))
14856 return ix86_cost
->sse_move
;
14857 if (MAYBE_MMX_CLASS_P (class1
))
14858 return ix86_cost
->mmx_move
;
14862 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14864 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
14866 /* Flags and only flags can only hold CCmode values. */
14867 if (CC_REGNO_P (regno
))
14868 return GET_MODE_CLASS (mode
) == MODE_CC
;
14869 if (GET_MODE_CLASS (mode
) == MODE_CC
14870 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14871 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14873 if (FP_REGNO_P (regno
))
14874 return VALID_FP_MODE_P (mode
);
14875 if (SSE_REGNO_P (regno
))
14876 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
14877 if (MMX_REGNO_P (regno
))
14879 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
14880 /* We handle both integer and floats in the general purpose registers.
14881 In future we should be able to handle vector modes as well. */
14882 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14884 /* Take care for QImode values - they can be in non-QI regs, but then
14885 they do cause partial register stalls. */
14886 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14888 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14891 /* Return the cost of moving data of mode M between a
14892 register and memory. A value of 2 is the default; this cost is
14893 relative to those in `REGISTER_MOVE_COST'.
14895 If moving between registers and memory is more expensive than
14896 between two registers, you should define this macro to express the
14899 Model also increased moving costs of QImode registers in non
14903 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
14905 if (FLOAT_CLASS_P (class))
14922 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14924 if (SSE_CLASS_P (class))
14927 switch (GET_MODE_SIZE (mode
))
14941 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14943 if (MMX_CLASS_P (class))
14946 switch (GET_MODE_SIZE (mode
))
14957 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14959 switch (GET_MODE_SIZE (mode
))
14963 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14964 : ix86_cost
->movzbl_load
);
14966 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14967 : ix86_cost
->int_store
[0] + 4);
14970 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14972 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14973 if (mode
== TFmode
)
14975 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14976 * (((int) GET_MODE_SIZE (mode
)
14977 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
14981 /* Compute a (partial) cost for rtx X. Return true if the complete
14982 cost has been computed, and false if subexpressions should be
14983 scanned. In either case, *TOTAL contains the cost result. */
14986 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
14988 enum machine_mode mode
= GET_MODE (x
);
14996 if (TARGET_64BIT
&& !x86_64_sign_extended_value (x
))
14998 else if (TARGET_64BIT
&& !x86_64_zero_extended_value (x
))
15000 else if (flag_pic
&& SYMBOLIC_CONST (x
)
15002 || (!GET_CODE (x
) != LABEL_REF
15003 && (GET_CODE (x
) != SYMBOL_REF
15004 || !SYMBOL_REF_LOCAL_P (x
)))))
15011 if (mode
== VOIDmode
)
15014 switch (standard_80387_constant_p (x
))
15019 default: /* Other constants */
15024 /* Start with (MEM (SYMBOL_REF)), since that's where
15025 it'll probably end up. Add a penalty for size. */
15026 *total
= (COSTS_N_INSNS (1)
15027 + (flag_pic
!= 0 && !TARGET_64BIT
)
15028 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
15034 /* The zero extensions is often completely free on x86_64, so make
15035 it as cheap as possible. */
15036 if (TARGET_64BIT
&& mode
== DImode
15037 && GET_MODE (XEXP (x
, 0)) == SImode
)
15039 else if (TARGET_ZERO_EXTEND_WITH_AND
)
15040 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15042 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
15046 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
15050 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
15051 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
15053 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15056 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15059 if ((value
== 2 || value
== 3)
15060 && !TARGET_DECOMPOSE_LEA
15061 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
15063 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15073 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
15075 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15077 if (INTVAL (XEXP (x
, 1)) > 32)
15078 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
15080 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
15084 if (GET_CODE (XEXP (x
, 1)) == AND
)
15085 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
15087 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
15092 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15093 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
15095 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
15100 if (FLOAT_MODE_P (mode
))
15101 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
15102 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
15104 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
15107 for (nbits
= 0; value
!= 0; value
>>= 1)
15110 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
15111 + nbits
* ix86_cost
->mult_bit
);
15115 /* This is arbitrary */
15116 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
15117 + 7 * ix86_cost
->mult_bit
);
15125 if (FLOAT_MODE_P (mode
))
15126 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
15128 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
15132 if (FLOAT_MODE_P (mode
))
15133 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15134 else if (!TARGET_DECOMPOSE_LEA
15135 && GET_MODE_CLASS (mode
) == MODE_INT
15136 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
15138 if (GET_CODE (XEXP (x
, 0)) == PLUS
15139 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
15140 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
15141 && CONSTANT_P (XEXP (x
, 1)))
15143 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
15144 if (val
== 2 || val
== 4 || val
== 8)
15146 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15147 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15148 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
15150 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15154 else if (GET_CODE (XEXP (x
, 0)) == MULT
15155 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
15157 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
15158 if (val
== 2 || val
== 4 || val
== 8)
15160 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15161 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15162 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15166 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
15168 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
15169 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
15170 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
15171 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
15178 if (FLOAT_MODE_P (mode
))
15180 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15188 if (!TARGET_64BIT
&& mode
== DImode
)
15190 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
15191 + (rtx_cost (XEXP (x
, 0), outer_code
)
15192 << (GET_MODE (XEXP (x
, 0)) != DImode
))
15193 + (rtx_cost (XEXP (x
, 1), outer_code
)
15194 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
15200 if (FLOAT_MODE_P (mode
))
15202 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
15208 if (!TARGET_64BIT
&& mode
== DImode
)
15209 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
15211 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15215 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
15220 if (FLOAT_MODE_P (mode
))
15221 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
15225 if (FLOAT_MODE_P (mode
))
15226 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
15230 if (XINT (x
, 1) == UNSPEC_TP
)
15239 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15241 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
15244 fputs ("\tpushl $", asm_out_file
);
15245 assemble_name (asm_out_file
, XSTR (symbol
, 0));
15246 fputc ('\n', asm_out_file
);
15252 static int current_machopic_label_num
;
15254 /* Given a symbol name and its associated stub, write out the
15255 definition of the stub. */
15258 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
15260 unsigned int length
;
15261 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
15262 int label
= ++current_machopic_label_num
;
15264 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15265 symb
= (*targetm
.strip_name_encoding
) (symb
);
15267 length
= strlen (stub
);
15268 binder_name
= alloca (length
+ 32);
15269 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
15271 length
= strlen (symb
);
15272 symbol_name
= alloca (length
+ 32);
15273 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
15275 sprintf (lazy_ptr_name
, "L%d$lz", label
);
15278 machopic_picsymbol_stub_section ();
15280 machopic_symbol_stub_section ();
15282 fprintf (file
, "%s:\n", stub
);
15283 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15287 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
15288 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
15289 fprintf (file
, "\tjmp %%edx\n");
15292 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
15294 fprintf (file
, "%s:\n", binder_name
);
15298 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
15299 fprintf (file
, "\tpushl %%eax\n");
15302 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
15304 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
15306 machopic_lazy_symbol_ptr_section ();
15307 fprintf (file
, "%s:\n", lazy_ptr_name
);
15308 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15309 fprintf (file
, "\t.long %s\n", binder_name
);
15311 #endif /* TARGET_MACHO */
15313 /* Order the registers for register allocator. */
15316 x86_order_regs_for_local_alloc (void)
15321 /* First allocate the local general purpose registers. */
15322 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15323 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
15324 reg_alloc_order
[pos
++] = i
;
15326 /* Global general purpose registers. */
15327 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15328 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
15329 reg_alloc_order
[pos
++] = i
;
15331 /* x87 registers come first in case we are doing FP math
15333 if (!TARGET_SSE_MATH
)
15334 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15335 reg_alloc_order
[pos
++] = i
;
15337 /* SSE registers. */
15338 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15339 reg_alloc_order
[pos
++] = i
;
15340 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15341 reg_alloc_order
[pos
++] = i
;
15343 /* x87 registers. */
15344 if (TARGET_SSE_MATH
)
15345 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15346 reg_alloc_order
[pos
++] = i
;
15348 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
15349 reg_alloc_order
[pos
++] = i
;
15351 /* Initialize the rest of array as we do not allocate some registers
15353 while (pos
< FIRST_PSEUDO_REGISTER
)
15354 reg_alloc_order
[pos
++] = 0;
15357 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15358 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15361 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15362 struct attribute_spec.handler. */
15364 ix86_handle_struct_attribute (tree
*node
, tree name
,
15365 tree args ATTRIBUTE_UNUSED
,
15366 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
15369 if (DECL_P (*node
))
15371 if (TREE_CODE (*node
) == TYPE_DECL
)
15372 type
= &TREE_TYPE (*node
);
15377 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
15378 || TREE_CODE (*type
) == UNION_TYPE
)))
15380 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
15381 *no_add_attrs
= true;
15384 else if ((is_attribute_p ("ms_struct", name
)
15385 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
15386 || ((is_attribute_p ("gcc_struct", name
)
15387 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
15389 warning ("`%s' incompatible attribute ignored",
15390 IDENTIFIER_POINTER (name
));
15391 *no_add_attrs
= true;
15398 ix86_ms_bitfield_layout_p (tree record_type
)
15400 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
15401 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
15402 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
15405 /* Returns an expression indicating where the this parameter is
15406 located on entry to the FUNCTION. */
15409 x86_this_parameter (tree function
)
15411 tree type
= TREE_TYPE (function
);
15415 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
15416 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
15419 if (ix86_function_regparm (type
, function
) > 0)
15423 parm
= TYPE_ARG_TYPES (type
);
15424 /* Figure out whether or not the function has a variable number of
15426 for (; parm
; parm
= TREE_CHAIN (parm
))
15427 if (TREE_VALUE (parm
) == void_type_node
)
15429 /* If not, the this parameter is in the first argument. */
15433 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
15435 return gen_rtx_REG (SImode
, regno
);
15439 if (aggregate_value_p (TREE_TYPE (type
), type
))
15440 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
15442 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
15445 /* Determine whether x86_output_mi_thunk can succeed. */
15448 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
15449 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
15450 HOST_WIDE_INT vcall_offset
, tree function
)
15452 /* 64-bit can handle anything. */
15456 /* For 32-bit, everything's fine if we have one free register. */
15457 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
15460 /* Need a free register for vcall_offset. */
15464 /* Need a free register for GOT references. */
15465 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
15468 /* Otherwise ok. */
15472 /* Output the assembler code for a thunk function. THUNK_DECL is the
15473 declaration for the thunk function itself, FUNCTION is the decl for
15474 the target function. DELTA is an immediate constant offset to be
15475 added to THIS. If VCALL_OFFSET is nonzero, the word at
15476 *(*this + vcall_offset) should be added to THIS. */
15479 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
15480 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
15481 HOST_WIDE_INT vcall_offset
, tree function
)
15484 rtx
this = x86_this_parameter (function
);
15487 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15488 pull it in now and let DELTA benefit. */
15491 else if (vcall_offset
)
15493 /* Put the this parameter into %eax. */
15495 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
15496 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15499 this_reg
= NULL_RTX
;
15501 /* Adjust the this parameter by a fixed constant. */
15504 xops
[0] = GEN_INT (delta
);
15505 xops
[1] = this_reg
? this_reg
: this;
15508 if (!x86_64_general_operand (xops
[0], DImode
))
15510 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15512 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
15516 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15519 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15522 /* Adjust the this parameter by a value stored in the vtable. */
15526 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15529 int tmp_regno
= 2 /* ECX */;
15530 if (lookup_attribute ("fastcall",
15531 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
15532 tmp_regno
= 0 /* EAX */;
15533 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
15536 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
15539 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15541 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15543 /* Adjust the this parameter. */
15544 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
15545 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
15547 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
15548 xops
[0] = GEN_INT (vcall_offset
);
15550 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15551 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
15553 xops
[1] = this_reg
;
15555 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15557 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15560 /* If necessary, drop THIS back to its stack slot. */
15561 if (this_reg
&& this_reg
!= this)
15563 xops
[0] = this_reg
;
15565 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15568 xops
[0] = XEXP (DECL_RTL (function
), 0);
15571 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15572 output_asm_insn ("jmp\t%P0", xops
);
15575 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
15576 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15577 tmp
= gen_rtx_MEM (QImode
, tmp
);
15579 output_asm_insn ("jmp\t%A0", xops
);
15584 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15585 output_asm_insn ("jmp\t%P0", xops
);
15590 const char *ip
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function
));
15591 tmp
= gen_rtx_SYMBOL_REF (Pmode
, machopic_stub_name (ip
));
15592 tmp
= gen_rtx_MEM (QImode
, tmp
);
15594 output_asm_insn ("jmp\t%0", xops
);
15597 #endif /* TARGET_MACHO */
15599 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15600 output_set_got (tmp
);
15603 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15604 output_asm_insn ("jmp\t{*}%1", xops
);
15610 x86_file_start (void)
15612 default_file_start ();
15613 if (X86_FILE_START_VERSION_DIRECTIVE
)
15614 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
15615 if (X86_FILE_START_FLTUSED
)
15616 fputs ("\t.global\t__fltused\n", asm_out_file
);
15617 if (ix86_asm_dialect
== ASM_INTEL
)
15618 fputs ("\t.intel_syntax\n", asm_out_file
);
15622 x86_field_alignment (tree field
, int computed
)
15624 enum machine_mode mode
;
15625 tree type
= TREE_TYPE (field
);
15627 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
15629 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
15630 ? get_inner_array_type (type
) : type
);
15631 if (mode
== DFmode
|| mode
== DCmode
15632 || GET_MODE_CLASS (mode
) == MODE_INT
15633 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
15634 return MIN (32, computed
);
15638 /* Output assembler code to FILE to increment profiler label # LABELNO
15639 for profiling a function entry. */
15641 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
15646 #ifndef NO_PROFILE_COUNTERS
15647 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
15649 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
15653 #ifndef NO_PROFILE_COUNTERS
15654 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
15656 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15660 #ifndef NO_PROFILE_COUNTERS
15661 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15662 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
15664 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
15668 #ifndef NO_PROFILE_COUNTERS
15669 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
15670 PROFILE_COUNT_REGISTER
);
15672 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15676 /* We don't have exact information about the insn sizes, but we may assume
15677 quite safely that we are informed about all 1 byte insns and memory
15678 address sizes. This is enough to eliminate unnecessary padding in
15682 min_insn_size (rtx insn
)
15686 if (!INSN_P (insn
) || !active_insn_p (insn
))
15689 /* Discard alignments we've emit and jump instructions. */
15690 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
15691 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
15693 if (GET_CODE (insn
) == JUMP_INSN
15694 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
15695 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
15698 /* Important case - calls are always 5 bytes.
15699 It is common to have many calls in the row. */
15700 if (GET_CODE (insn
) == CALL_INSN
15701 && symbolic_reference_mentioned_p (PATTERN (insn
))
15702 && !SIBLING_CALL_P (insn
))
15704 if (get_attr_length (insn
) <= 1)
15707 /* For normal instructions we may rely on the sizes of addresses
15708 and the presence of symbol to require 4 bytes of encoding.
15709 This is not the case for jumps where references are PC relative. */
15710 if (GET_CODE (insn
) != JUMP_INSN
)
15712 l
= get_attr_length_address (insn
);
15713 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
15722 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15726 k8_avoid_jump_misspredicts (void)
15728 rtx insn
, start
= get_insns ();
15729 int nbytes
= 0, njumps
= 0;
15732 /* Look for all minimal intervals of instructions containing 4 jumps.
15733 The intervals are bounded by START and INSN. NBYTES is the total
15734 size of instructions in the interval including INSN and not including
15735 START. When the NBYTES is smaller than 16 bytes, it is possible
15736 that the end of START and INSN ends up in the same 16byte page.
15738 The smallest offset in the page INSN can start is the case where START
15739 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15740 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15742 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15745 nbytes
+= min_insn_size (insn
);
15747 fprintf(rtl_dump_file
, "Insn %i estimated to %i bytes\n",
15748 INSN_UID (insn
), min_insn_size (insn
));
15749 if ((GET_CODE (insn
) == JUMP_INSN
15750 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
15751 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
15752 || GET_CODE (insn
) == CALL_INSN
)
15759 start
= NEXT_INSN (start
);
15760 if ((GET_CODE (start
) == JUMP_INSN
15761 && GET_CODE (PATTERN (start
)) != ADDR_VEC
15762 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
15763 || GET_CODE (start
) == CALL_INSN
)
15764 njumps
--, isjump
= 1;
15767 nbytes
-= min_insn_size (start
);
15772 fprintf(rtl_dump_file
, "Interval %i to %i has %i bytes\n",
15773 INSN_UID (start
), INSN_UID (insn
), nbytes
);
15775 if (njumps
== 3 && isjump
&& nbytes
< 16)
15777 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
15780 fprintf (rtl_dump_file
, "Padding insn %i by %i bytes!\n", INSN_UID (insn
), padsize
);
15781 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
15786 /* Implement machine specific optimizations.
15787 At the moment we implement single transformation: AMD Athlon works faster
15788 when RET is not destination of conditional jump or directly preceded
15789 by other jump instruction. We avoid the penalty by inserting NOP just
15790 before the RET instructions in such cases. */
15796 if (!TARGET_ATHLON_K8
|| !optimize
|| optimize_size
)
15798 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
15800 basic_block bb
= e
->src
;
15801 rtx ret
= BB_END (bb
);
15803 bool replace
= false;
15805 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
15806 || !maybe_hot_bb_p (bb
))
15808 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
15809 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
15811 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
15814 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
15815 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
15816 && !(e
->flags
& EDGE_FALLTHRU
))
15821 prev
= prev_active_insn (ret
);
15823 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
15824 || GET_CODE (prev
) == CALL_INSN
))
15826 /* Empty functions get branch mispredict even when the jump destination
15827 is not visible to us. */
15828 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
15833 emit_insn_before (gen_return_internal_long (), ret
);
15837 k8_avoid_jump_misspredicts ();
15840 /* Return nonzero when QImode register that must be represented via REX prefix
15843 x86_extended_QIreg_mentioned_p (rtx insn
)
15846 extract_insn_cached (insn
);
15847 for (i
= 0; i
< recog_data
.n_operands
; i
++)
15848 if (REG_P (recog_data
.operand
[i
])
15849 && REGNO (recog_data
.operand
[i
]) >= 4)
15854 /* Return nonzero when P points to register encoded via REX prefix.
15855 Called via for_each_rtx. */
15857 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
15859 unsigned int regno
;
15862 regno
= REGNO (*p
);
15863 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
15866 /* Return true when INSN mentions register that must be encoded using REX
15869 x86_extended_reg_mentioned_p (rtx insn
)
15871 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
15874 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15875 optabs would emit if we didn't have TFmode patterns. */
15878 x86_emit_floatuns (rtx operands
[2])
15880 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
15881 enum machine_mode mode
, inmode
;
15883 inmode
= GET_MODE (operands
[1]);
15884 if (inmode
!= SImode
15885 && inmode
!= DImode
)
15889 in
= force_reg (inmode
, operands
[1]);
15890 mode
= GET_MODE (out
);
15891 neglab
= gen_label_rtx ();
15892 donelab
= gen_label_rtx ();
15893 i1
= gen_reg_rtx (Pmode
);
15894 f0
= gen_reg_rtx (mode
);
15896 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
15898 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
15899 emit_jump_insn (gen_jump (donelab
));
15902 emit_label (neglab
);
15904 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15905 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15906 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
15907 expand_float (f0
, i0
, 0);
15908 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
15910 emit_label (donelab
);
15913 /* Return if we do not know how to pass TYPE solely in registers. */
15915 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
15917 if (default_must_pass_in_stack (mode
, type
))
15919 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
15922 /* Initialize vector TARGET via VALS. */
15924 ix86_expand_vector_init (rtx target
, rtx vals
)
15926 enum machine_mode mode
= GET_MODE (target
);
15927 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15928 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
15931 for (i
= n_elts
- 1; i
>= 0; i
--)
15932 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
15933 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
15936 /* Few special cases first...
15937 ... constants are best loaded from constant pool. */
15940 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15944 /* ... values where only first field is non-constant are best loaded
15945 from the pool and overwriten via move later. */
15948 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
15949 GET_MODE_INNER (mode
), 0);
15951 op
= force_reg (mode
, op
);
15952 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
15953 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15954 switch (GET_MODE (target
))
15957 emit_insn (gen_sse2_movsd (target
, target
, op
));
15960 emit_insn (gen_sse_movss (target
, target
, op
));
15968 /* And the busy sequence doing rotations. */
15969 switch (GET_MODE (target
))
15974 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
15976 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
15978 vecop0
= force_reg (V2DFmode
, vecop0
);
15979 vecop1
= force_reg (V2DFmode
, vecop1
);
15980 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
15986 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
15988 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
15990 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
15992 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
15993 rtx tmp1
= gen_reg_rtx (V4SFmode
);
15994 rtx tmp2
= gen_reg_rtx (V4SFmode
);
15996 vecop0
= force_reg (V4SFmode
, vecop0
);
15997 vecop1
= force_reg (V4SFmode
, vecop1
);
15998 vecop2
= force_reg (V4SFmode
, vecop2
);
15999 vecop3
= force_reg (V4SFmode
, vecop3
);
16000 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
16001 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
16002 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
16010 #include "gt-i386.h"