1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost
= { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost
= { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost
= { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost
= {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost
= {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost
= {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost
= {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost
= {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost
= {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs
*ix86_cost
= &pentium_cost
;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
474 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
;
475 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
476 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
477 const int x86_double_with_add
= ~m_386
;
478 const int x86_use_bit_test
= m_386
;
479 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
480 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
;
481 const int x86_3dnow_a
= m_ATHLON_K8
;
482 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
;
483 const int x86_branch_hints
= m_PENT4
;
484 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
485 const int x86_partial_reg_stall
= m_PPRO
;
486 const int x86_use_loop
= m_K6
;
487 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
488 const int x86_use_mov0
= m_K6
;
489 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
490 const int x86_read_modify_write
= ~m_PENT
;
491 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
492 const int x86_split_long_moves
= m_PPRO
;
493 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
494 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
495 const int x86_single_stringop
= m_386
| m_PENT4
;
496 const int x86_qimode_math
= ~(0);
497 const int x86_promote_qi_regs
= 0;
498 const int x86_himode_math
= ~(m_PPRO
);
499 const int x86_promote_hi_regs
= m_PPRO
;
500 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
;
501 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
;
502 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
;
503 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
504 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_PPRO
);
505 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
;
506 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
;
507 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_PPRO
;
508 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
509 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
510 const int x86_decompose_lea
= m_PENT4
;
511 const int x86_shift1
= ~m_486
;
512 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
;
513 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_PPRO
;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs
= m_ATHLON_K8
;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
521 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
522 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
;
523 const int x86_use_ffreep
= m_ATHLON_K8
;
524 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
525 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
526 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_PPRO
;
527 /* Some CPU cores are not able to predict more than 4 branch instructions in
528 the 16 byte window. */
529 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
;
531 /* In case the average insn count for single function invocation is
532 lower than this constant, emit fast (but longer) prologue and
534 #define FAST_PROLOGUE_INSN_COUNT 20
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
538 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
539 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
547 AREG
, DREG
, CREG
, BREG
,
549 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
551 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
552 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
557 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
559 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
561 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
562 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
563 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers
[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
586 static int const x86_64_int_return_registers
[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0
= NULL_RTX
;
672 rtx ix86_compare_op1
= NULL_RTX
;
674 #define MAX_386_STACK_LOCALS 3
675 /* Size of the register save area. */
676 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
678 /* Define the structure for the machine field in struct function. */
680 struct stack_local_entry
GTY(())
685 struct stack_local_entry
*next
;
688 /* Structure describing stack frame layout.
689 Stack grows downward:
695 saved frame pointer if frame_pointer_needed
696 <- HARD_FRAME_POINTER
702 > to_allocate <- FRAME_POINTER
714 int outgoing_arguments_size
;
717 HOST_WIDE_INT to_allocate
;
718 /* The offsets relative to ARG_POINTER. */
719 HOST_WIDE_INT frame_pointer_offset
;
720 HOST_WIDE_INT hard_frame_pointer_offset
;
721 HOST_WIDE_INT stack_pointer_offset
;
723 /* When save_regs_using_mov is set, emit prologue using
724 move instead of push instructions. */
725 bool save_regs_using_mov
;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string
;
733 enum cmodel ix86_cmodel
;
735 const char *ix86_asm_string
;
736 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
738 const char *ix86_tls_dialect_string
;
739 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath
;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_tune
;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch
;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_tune_string
; /* for -mtune=<xxx> */
751 const char *ix86_arch_string
; /* for -march=<xxx> */
752 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string
;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse
;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string
;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string
;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string
;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary
;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost
;
779 const char *ix86_branch_cost_string
;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string
;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix
[16];
786 static int internal_label_prefix_len
;
788 static int local_symbolic_operand (rtx
, enum machine_mode
);
789 static int tls_symbolic_operand_1 (rtx
, enum tls_model
);
790 static void output_pic_addr_const (FILE *, rtx
, int);
791 static void put_condition_code (enum rtx_code
, enum machine_mode
,
793 static const char *get_some_local_dynamic_name (void);
794 static int get_some_local_dynamic_name_1 (rtx
*, void *);
795 static rtx
maybe_get_pool_constant (rtx
);
796 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
797 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
799 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
800 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
802 static rtx
get_thread_pointer (int);
803 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
804 static void get_pc_thunk_name (char [32], unsigned int);
805 static rtx
gen_push (rtx
);
806 static int memory_address_length (rtx addr
);
807 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
808 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
809 static struct machine_function
* ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
814 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
816 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
817 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
818 static rtx
ix86_expand_aligntest (rtx
, int);
819 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
820 static int ix86_issue_rate (void);
821 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
822 static int ia32_use_dfa_pipeline_interface (void);
823 static int ia32_multipass_dfa_lookahead (void);
824 static void ix86_init_mmx_sse_builtins (void);
825 static rtx
x86_this_parameter (tree
);
826 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
827 HOST_WIDE_INT
, tree
);
828 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
829 static void x86_file_start (void);
830 static void ix86_reorg (void);
831 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
832 static tree
ix86_build_builtin_va_list (void);
833 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
838 rtx base
, index
, disp
;
840 enum ix86_address_seg
{ SEG_DEFAULT
, SEG_FS
, SEG_GS
} seg
;
843 static int ix86_decompose_address (rtx
, struct ix86_address
*);
844 static int ix86_address_cost (rtx
);
845 static bool ix86_cannot_force_const_mem (rtx
);
846 static rtx
ix86_delegitimize_address (rtx
);
848 struct builtin_description
;
849 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
851 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
853 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
854 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
855 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
856 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
857 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
858 static enum rtx_code
ix86_fp_compare_code_to_integer (enum rtx_code
);
859 static void ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*,
860 enum rtx_code
*, enum rtx_code
*);
861 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
862 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
863 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
864 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
865 static int ix86_fp_comparison_cost (enum rtx_code code
);
866 static unsigned int ix86_select_alt_pic_regnum (void);
867 static int ix86_save_reg (unsigned int, int);
868 static void ix86_compute_frame_layout (struct ix86_frame
*);
869 static int ix86_comp_type_attributes (tree
, tree
);
870 static int ix86_function_regparm (tree
, tree
);
871 const struct attribute_spec ix86_attribute_table
[];
872 static bool ix86_function_ok_for_sibcall (tree
, tree
);
873 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
874 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
875 static int ix86_value_regno (enum machine_mode
);
876 static bool contains_128bit_aligned_vector_p (tree
);
877 static bool ix86_ms_bitfield_layout_p (tree
);
878 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
879 static int extended_reg_mentioned_1 (rtx
*, void *);
880 static bool ix86_rtx_costs (rtx
, int, int, int *);
881 static int min_insn_size (rtx
);
882 static tree
ix86_md_asm_clobbers (tree clobbers
);
884 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
885 static void ix86_svr3_asm_out_constructor (rtx
, int);
888 /* Register class used for passing given 64bit part of the argument.
889 These represent classes as documented by the PS ABI, with the exception
890 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
891 use SF or DFmode move instead of DImode to avoid reformatting penalties.
893 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
894 whenever possible (upper half does contain padding).
896 enum x86_64_reg_class
899 X86_64_INTEGER_CLASS
,
900 X86_64_INTEGERSI_CLASS
,
909 static const char * const x86_64_reg_class_name
[] =
910 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
912 #define MAX_CLASSES 4
913 static int classify_argument (enum machine_mode
, tree
,
914 enum x86_64_reg_class
[MAX_CLASSES
], int);
915 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
916 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
918 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
919 enum x86_64_reg_class
);
921 /* Table of constants used by fldpi, fldln2, etc.... */
922 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
923 static bool ext_80387_constants_init
= 0;
924 static void init_ext_80387_constants (void);
926 /* Initialize the GCC target structure. */
927 #undef TARGET_ATTRIBUTE_TABLE
928 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
929 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
930 # undef TARGET_MERGE_DECL_ATTRIBUTES
931 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #undef TARGET_COMP_TYPE_ATTRIBUTES
935 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
937 #undef TARGET_INIT_BUILTINS
938 #define TARGET_INIT_BUILTINS ix86_init_builtins
940 #undef TARGET_EXPAND_BUILTIN
941 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
943 #undef TARGET_ASM_FUNCTION_EPILOGUE
944 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
946 #undef TARGET_ASM_OPEN_PAREN
947 #define TARGET_ASM_OPEN_PAREN ""
948 #undef TARGET_ASM_CLOSE_PAREN
949 #define TARGET_ASM_CLOSE_PAREN ""
951 #undef TARGET_ASM_ALIGNED_HI_OP
952 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
953 #undef TARGET_ASM_ALIGNED_SI_OP
954 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
956 #undef TARGET_ASM_ALIGNED_DI_OP
957 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #undef TARGET_ASM_UNALIGNED_HI_OP
961 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
962 #undef TARGET_ASM_UNALIGNED_SI_OP
963 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
964 #undef TARGET_ASM_UNALIGNED_DI_OP
965 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
967 #undef TARGET_SCHED_ADJUST_COST
968 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
969 #undef TARGET_SCHED_ISSUE_RATE
970 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
971 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
972 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
973 ia32_use_dfa_pipeline_interface
974 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
975 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
976 ia32_multipass_dfa_lookahead
978 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
979 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
982 #undef TARGET_HAVE_TLS
983 #define TARGET_HAVE_TLS true
985 #undef TARGET_CANNOT_FORCE_CONST_MEM
986 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
988 #undef TARGET_DELEGITIMIZE_ADDRESS
989 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
991 #undef TARGET_MS_BITFIELD_LAYOUT_P
992 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
994 #undef TARGET_ASM_OUTPUT_MI_THUNK
995 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
996 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
997 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
999 #undef TARGET_ASM_FILE_START
1000 #define TARGET_ASM_FILE_START x86_file_start
1002 #undef TARGET_RTX_COSTS
1003 #define TARGET_RTX_COSTS ix86_rtx_costs
1004 #undef TARGET_ADDRESS_COST
1005 #define TARGET_ADDRESS_COST ix86_address_cost
1007 #undef TARGET_FIXED_CONDITION_CODE_REGS
1008 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1009 #undef TARGET_CC_MODES_COMPATIBLE
1010 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 #undef TARGET_BUILD_BUILTIN_VA_LIST
1016 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1018 #undef TARGET_MD_ASM_CLOBBERS
1019 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1021 #undef TARGET_PROMOTE_PROTOTYPES
1022 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1024 #undef TARGET_SETUP_INCOMING_VARARGS
1025 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1027 struct gcc_target targetm
= TARGET_INITIALIZER
;
1029 /* The svr4 ABI for the i386 says that records and unions are returned
1031 #ifndef DEFAULT_PCC_STRUCT_RETURN
1032 #define DEFAULT_PCC_STRUCT_RETURN 1
1035 /* Sometimes certain combinations of command options do not make
1036 sense on a particular target machine. You can define a macro
1037 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1038 defined, is executed once just after all the command options have
1041 Don't use this macro to turn on various extra optimizations for
1042 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1045 override_options (void)
1048 /* Comes from final.c -- no real reason to change it. */
1049 #define MAX_CODE_ALIGN 16
1053 const struct processor_costs
*cost
; /* Processor costs */
1054 const int target_enable
; /* Target flags to enable. */
1055 const int target_disable
; /* Target flags to disable. */
1056 const int align_loop
; /* Default alignments. */
1057 const int align_loop_max_skip
;
1058 const int align_jump
;
1059 const int align_jump_max_skip
;
1060 const int align_func
;
1062 const processor_target_table
[PROCESSOR_max
] =
1064 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1065 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1066 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1067 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1068 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1069 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1070 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1071 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16}
1074 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1077 const char *const name
; /* processor name or nickname. */
1078 const enum processor_type processor
;
1079 const enum pta_flags
1085 PTA_PREFETCH_SSE
= 16,
1091 const processor_alias_table
[] =
1093 {"i386", PROCESSOR_I386
, 0},
1094 {"i486", PROCESSOR_I486
, 0},
1095 {"i586", PROCESSOR_PENTIUM
, 0},
1096 {"pentium", PROCESSOR_PENTIUM
, 0},
1097 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1098 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1099 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1100 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1101 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1102 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1103 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1104 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1105 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1106 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1107 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1108 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1109 | PTA_MMX
| PTA_PREFETCH_SSE
},
1110 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1111 | PTA_MMX
| PTA_PREFETCH_SSE
},
1112 {"prescott", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1113 | PTA_MMX
| PTA_PREFETCH_SSE
},
1114 {"nocona", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1115 | PTA_MMX
| PTA_PREFETCH_SSE
},
1116 {"k6", PROCESSOR_K6
, PTA_MMX
},
1117 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1118 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1119 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1121 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1122 | PTA_3DNOW
| PTA_3DNOW_A
},
1123 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1124 | PTA_3DNOW_A
| PTA_SSE
},
1125 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1126 | PTA_3DNOW_A
| PTA_SSE
},
1127 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1128 | PTA_3DNOW_A
| PTA_SSE
},
1129 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1130 | PTA_SSE
| PTA_SSE2
},
1131 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1132 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1133 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1134 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1135 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1136 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1137 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1138 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1141 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1143 /* Set the default values for switches whose default depends on TARGET_64BIT
1144 in case they weren't overwritten by command line options. */
1147 if (flag_omit_frame_pointer
== 2)
1148 flag_omit_frame_pointer
= 1;
1149 if (flag_asynchronous_unwind_tables
== 2)
1150 flag_asynchronous_unwind_tables
= 1;
1151 if (flag_pcc_struct_return
== 2)
1152 flag_pcc_struct_return
= 0;
1156 if (flag_omit_frame_pointer
== 2)
1157 flag_omit_frame_pointer
= 0;
1158 if (flag_asynchronous_unwind_tables
== 2)
1159 flag_asynchronous_unwind_tables
= 0;
1160 if (flag_pcc_struct_return
== 2)
1161 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1164 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1165 SUBTARGET_OVERRIDE_OPTIONS
;
1168 if (!ix86_tune_string
&& ix86_arch_string
)
1169 ix86_tune_string
= ix86_arch_string
;
1170 if (!ix86_tune_string
)
1171 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1172 if (!ix86_arch_string
)
1173 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1175 if (ix86_cmodel_string
!= 0)
1177 if (!strcmp (ix86_cmodel_string
, "small"))
1178 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1180 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1181 else if (!strcmp (ix86_cmodel_string
, "32"))
1182 ix86_cmodel
= CM_32
;
1183 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1184 ix86_cmodel
= CM_KERNEL
;
1185 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1186 ix86_cmodel
= CM_MEDIUM
;
1187 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1188 ix86_cmodel
= CM_LARGE
;
1190 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1194 ix86_cmodel
= CM_32
;
1196 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1198 if (ix86_asm_string
!= 0)
1200 if (!strcmp (ix86_asm_string
, "intel"))
1201 ix86_asm_dialect
= ASM_INTEL
;
1202 else if (!strcmp (ix86_asm_string
, "att"))
1203 ix86_asm_dialect
= ASM_ATT
;
1205 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1207 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1208 error ("code model `%s' not supported in the %s bit mode",
1209 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1210 if (ix86_cmodel
== CM_LARGE
)
1211 sorry ("code model `large' not supported yet");
1212 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1213 sorry ("%i-bit mode not compiled in",
1214 (target_flags
& MASK_64BIT
) ? 64 : 32);
1216 for (i
= 0; i
< pta_size
; i
++)
1217 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1219 ix86_arch
= processor_alias_table
[i
].processor
;
1220 /* Default cpu tuning to the architecture. */
1221 ix86_tune
= ix86_arch
;
1222 if (processor_alias_table
[i
].flags
& PTA_MMX
1223 && !(target_flags_explicit
& MASK_MMX
))
1224 target_flags
|= MASK_MMX
;
1225 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1226 && !(target_flags_explicit
& MASK_3DNOW
))
1227 target_flags
|= MASK_3DNOW
;
1228 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1229 && !(target_flags_explicit
& MASK_3DNOW_A
))
1230 target_flags
|= MASK_3DNOW_A
;
1231 if (processor_alias_table
[i
].flags
& PTA_SSE
1232 && !(target_flags_explicit
& MASK_SSE
))
1233 target_flags
|= MASK_SSE
;
1234 if (processor_alias_table
[i
].flags
& PTA_SSE2
1235 && !(target_flags_explicit
& MASK_SSE2
))
1236 target_flags
|= MASK_SSE2
;
1237 if (processor_alias_table
[i
].flags
& PTA_SSE3
1238 && !(target_flags_explicit
& MASK_SSE3
))
1239 target_flags
|= MASK_SSE3
;
1240 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1241 x86_prefetch_sse
= true;
1242 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1243 error ("CPU you selected does not support x86-64 instruction set");
1248 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1250 for (i
= 0; i
< pta_size
; i
++)
1251 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1253 ix86_tune
= processor_alias_table
[i
].processor
;
1254 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1255 error ("CPU you selected does not support x86-64 instruction set");
1258 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1259 x86_prefetch_sse
= true;
1261 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1264 ix86_cost
= &size_cost
;
1266 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1267 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1268 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1270 /* Arrange to set up i386_stack_locals for all functions. */
1271 init_machine_status
= ix86_init_machine_status
;
1273 /* Validate -mregparm= value. */
1274 if (ix86_regparm_string
)
1276 i
= atoi (ix86_regparm_string
);
1277 if (i
< 0 || i
> REGPARM_MAX
)
1278 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1284 ix86_regparm
= REGPARM_MAX
;
1286 /* If the user has provided any of the -malign-* options,
1287 warn and use that value only if -falign-* is not set.
1288 Remove this code in GCC 3.2 or later. */
1289 if (ix86_align_loops_string
)
1291 warning ("-malign-loops is obsolete, use -falign-loops");
1292 if (align_loops
== 0)
1294 i
= atoi (ix86_align_loops_string
);
1295 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1296 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1298 align_loops
= 1 << i
;
1302 if (ix86_align_jumps_string
)
1304 warning ("-malign-jumps is obsolete, use -falign-jumps");
1305 if (align_jumps
== 0)
1307 i
= atoi (ix86_align_jumps_string
);
1308 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1309 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1311 align_jumps
= 1 << i
;
1315 if (ix86_align_funcs_string
)
1317 warning ("-malign-functions is obsolete, use -falign-functions");
1318 if (align_functions
== 0)
1320 i
= atoi (ix86_align_funcs_string
);
1321 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1322 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1324 align_functions
= 1 << i
;
1328 /* Default align_* from the processor table. */
1329 if (align_loops
== 0)
1331 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1332 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1334 if (align_jumps
== 0)
1336 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1337 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1339 if (align_functions
== 0)
1341 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1344 /* Validate -mpreferred-stack-boundary= value, or provide default.
1345 The default of 128 bits is for Pentium III's SSE __m128, but we
1346 don't want additional code to keep the stack aligned when
1347 optimizing for code size. */
1348 ix86_preferred_stack_boundary
= (optimize_size
1349 ? TARGET_64BIT
? 128 : 32
1351 if (ix86_preferred_stack_boundary_string
)
1353 i
= atoi (ix86_preferred_stack_boundary_string
);
1354 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1355 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1356 TARGET_64BIT
? 4 : 2);
1358 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1361 /* Validate -mbranch-cost= value, or provide default. */
1362 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1363 if (ix86_branch_cost_string
)
1365 i
= atoi (ix86_branch_cost_string
);
1367 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1369 ix86_branch_cost
= i
;
1372 if (ix86_tls_dialect_string
)
1374 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1375 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1376 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1377 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1379 error ("bad value (%s) for -mtls-dialect= switch",
1380 ix86_tls_dialect_string
);
1383 /* Keep nonleaf frame pointers. */
1384 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1385 flag_omit_frame_pointer
= 1;
1387 /* If we're doing fast math, we don't care about comparison order
1388 wrt NaNs. This lets us use a shorter comparison sequence. */
1389 if (flag_unsafe_math_optimizations
)
1390 target_flags
&= ~MASK_IEEE_FP
;
1392 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1393 since the insns won't need emulation. */
1394 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1395 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1397 /* Turn on SSE2 builtins for -msse3. */
1399 target_flags
|= MASK_SSE2
;
1401 /* Turn on SSE builtins for -msse2. */
1403 target_flags
|= MASK_SSE
;
1407 if (TARGET_ALIGN_DOUBLE
)
1408 error ("-malign-double makes no sense in the 64bit mode");
1410 error ("-mrtd calling convention not supported in the 64bit mode");
1411 /* Enable by default the SSE and MMX builtins. */
1412 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1413 ix86_fpmath
= FPMATH_SSE
;
1417 ix86_fpmath
= FPMATH_387
;
1418 /* i386 ABI does not specify red zone. It still makes sense to use it
1419 when programmer takes care to stack from being destroyed. */
1420 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1421 target_flags
|= MASK_NO_RED_ZONE
;
1424 if (ix86_fpmath_string
!= 0)
1426 if (! strcmp (ix86_fpmath_string
, "387"))
1427 ix86_fpmath
= FPMATH_387
;
1428 else if (! strcmp (ix86_fpmath_string
, "sse"))
1432 warning ("SSE instruction set disabled, using 387 arithmetics");
1433 ix86_fpmath
= FPMATH_387
;
1436 ix86_fpmath
= FPMATH_SSE
;
1438 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1439 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1443 warning ("SSE instruction set disabled, using 387 arithmetics");
1444 ix86_fpmath
= FPMATH_387
;
1446 else if (!TARGET_80387
)
1448 warning ("387 instruction set disabled, using SSE arithmetics");
1449 ix86_fpmath
= FPMATH_SSE
;
1452 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1455 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1458 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1462 target_flags
|= MASK_MMX
;
1463 x86_prefetch_sse
= true;
1466 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1469 target_flags
|= MASK_MMX
;
1470 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1471 extensions it adds. */
1472 if (x86_3dnow_a
& (1 << ix86_arch
))
1473 target_flags
|= MASK_3DNOW_A
;
1475 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1476 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1478 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1480 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1483 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1484 p
= strchr (internal_label_prefix
, 'X');
1485 internal_label_prefix_len
= p
- internal_label_prefix
;
1491 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1493 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1494 make the problem with not enough registers even worse. */
1495 #ifdef INSN_SCHEDULING
1497 flag_schedule_insns
= 0;
1500 /* The default values of these switches depend on the TARGET_64BIT
1501 that is not known at this moment. Mark these values with 2 and
1502 let user the to override these. In case there is no command line option
1503 specifying them, we will set the defaults in override_options. */
1505 flag_omit_frame_pointer
= 2;
1506 flag_pcc_struct_return
= 2;
1507 flag_asynchronous_unwind_tables
= 2;
1510 /* Table of valid machine attributes. */
1511 const struct attribute_spec ix86_attribute_table
[] =
1513 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1514 /* Stdcall attribute says callee is responsible for popping arguments
1515 if they are not variable. */
1516 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1517 /* Fastcall attribute says callee is responsible for popping arguments
1518 if they are not variable. */
1519 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1520 /* Cdecl attribute says the callee is a normal C declaration */
1521 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1522 /* Regparm attribute specifies how many integer arguments are to be
1523 passed in registers. */
1524 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1525 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1526 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1527 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1528 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1530 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1531 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1532 { NULL
, 0, 0, false, false, false, NULL
}
1535 /* Decide whether we can make a sibling call to a function. DECL is the
1536 declaration of the function being targeted by the call and EXP is the
1537 CALL_EXPR representing the call. */
1540 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1542 /* If we are generating position-independent code, we cannot sibcall
1543 optimize any indirect call, or a direct call to a global function,
1544 as the PLT requires %ebx be live. */
1545 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1548 /* If we are returning floats on the 80387 register stack, we cannot
1549 make a sibcall from a function that doesn't return a float to a
1550 function that does or, conversely, from a function that does return
1551 a float to a function that doesn't; the necessary stack adjustment
1552 would not be executed. */
1553 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1554 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1557 /* If this call is indirect, we'll need to be able to use a call-clobbered
1558 register for the address of the target function. Make sure that all
1559 such registers are not used for passing parameters. */
1560 if (!decl
&& !TARGET_64BIT
)
1564 /* We're looking at the CALL_EXPR, we need the type of the function. */
1565 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1566 type
= TREE_TYPE (type
); /* pointer type */
1567 type
= TREE_TYPE (type
); /* function type */
1569 if (ix86_function_regparm (type
, NULL
) >= 3)
1571 /* ??? Need to count the actual number of registers to be used,
1572 not the possible number of registers. Fix later. */
1577 /* Otherwise okay. That also includes certain types of indirect calls. */
1581 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1582 arguments as in struct attribute_spec.handler. */
1584 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1585 tree args ATTRIBUTE_UNUSED
,
1586 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1588 if (TREE_CODE (*node
) != FUNCTION_TYPE
1589 && TREE_CODE (*node
) != METHOD_TYPE
1590 && TREE_CODE (*node
) != FIELD_DECL
1591 && TREE_CODE (*node
) != TYPE_DECL
)
1593 warning ("`%s' attribute only applies to functions",
1594 IDENTIFIER_POINTER (name
));
1595 *no_add_attrs
= true;
1599 if (is_attribute_p ("fastcall", name
))
1601 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1603 error ("fastcall and stdcall attributes are not compatible");
1605 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1607 error ("fastcall and regparm attributes are not compatible");
1610 else if (is_attribute_p ("stdcall", name
))
1612 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1614 error ("fastcall and stdcall attributes are not compatible");
1621 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1622 *no_add_attrs
= true;
1628 /* Handle a "regparm" attribute;
1629 arguments as in struct attribute_spec.handler. */
1631 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1632 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1634 if (TREE_CODE (*node
) != FUNCTION_TYPE
1635 && TREE_CODE (*node
) != METHOD_TYPE
1636 && TREE_CODE (*node
) != FIELD_DECL
1637 && TREE_CODE (*node
) != TYPE_DECL
)
1639 warning ("`%s' attribute only applies to functions",
1640 IDENTIFIER_POINTER (name
));
1641 *no_add_attrs
= true;
1647 cst
= TREE_VALUE (args
);
1648 if (TREE_CODE (cst
) != INTEGER_CST
)
1650 warning ("`%s' attribute requires an integer constant argument",
1651 IDENTIFIER_POINTER (name
));
1652 *no_add_attrs
= true;
1654 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1656 warning ("argument to `%s' attribute larger than %d",
1657 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1658 *no_add_attrs
= true;
1661 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1663 error ("fastcall and regparm attributes are not compatible");
1670 /* Return 0 if the attributes for two types are incompatible, 1 if they
1671 are compatible, and 2 if they are nearly compatible (which causes a
1672 warning to be generated). */
1675 ix86_comp_type_attributes (tree type1
, tree type2
)
1677 /* Check for mismatch of non-default calling convention. */
1678 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1680 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1683 /* Check for mismatched fastcall types */
1684 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1685 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1688 /* Check for mismatched return types (cdecl vs stdcall). */
1689 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1690 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1692 if (ix86_function_regparm (type1
, NULL
)
1693 != ix86_function_regparm (type2
, NULL
))
1698 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1699 DECL may be NULL when calling function indirectly
1700 or considering a libcall. */
1703 ix86_function_regparm (tree type
, tree decl
)
1706 int regparm
= ix86_regparm
;
1707 bool user_convention
= false;
1711 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1714 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1715 user_convention
= true;
1718 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1721 user_convention
= true;
1724 /* Use register calling convention for local functions when possible. */
1725 if (!TARGET_64BIT
&& !user_convention
&& decl
1726 && flag_unit_at_a_time
&& !profile_flag
)
1728 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1731 /* We can't use regparm(3) for nested functions as these use
1732 static chain pointer in third argument. */
1733 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1743 /* Return true if EAX is live at the start of the function. Used by
1744 ix86_expand_prologue to determine if we need special help before
1745 calling allocate_stack_worker. */
1748 ix86_eax_live_at_start_p (void)
1750 /* Cheat. Don't bother working forward from ix86_function_regparm
1751 to the function type to whether an actual argument is located in
1752 eax. Instead just look at cfg info, which is still close enough
1753 to correct at this point. This gives false positives for broken
1754 functions that might use uninitialized data that happens to be
1755 allocated in eax, but who cares? */
1756 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1759 /* Value is the number of bytes of arguments automatically
1760 popped when returning from a subroutine call.
1761 FUNDECL is the declaration node of the function (as a tree),
1762 FUNTYPE is the data type of the function (as a tree),
1763 or for a library call it is an identifier node for the subroutine name.
1764 SIZE is the number of bytes of arguments passed on the stack.
1766 On the 80386, the RTD insn may be used to pop them if the number
1767 of args is fixed, but if the number is variable then the caller
1768 must pop them all. RTD can't be used for library calls now
1769 because the library is compiled with the Unix compiler.
1770 Use of RTD is a selectable option, since it is incompatible with
1771 standard Unix calling sequences. If the option is not selected,
1772 the caller must always pop the args.
1774 The attribute stdcall is equivalent to RTD on a per module basis. */
1777 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1779 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1781 /* Cdecl functions override -mrtd, and never pop the stack. */
1782 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1784 /* Stdcall and fastcall functions will pop the stack if not
1786 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1787 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1791 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1792 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1793 == void_type_node
)))
1797 /* Lose any fake structure return argument if it is passed on the stack. */
1798 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1801 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1804 return GET_MODE_SIZE (Pmode
);
1810 /* Argument support functions. */
1812 /* Return true when register may be used to pass function parameters. */
1814 ix86_function_arg_regno_p (int regno
)
1818 return (regno
< REGPARM_MAX
1819 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1820 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1822 /* RAX is used as hidden argument to va_arg functions. */
1825 for (i
= 0; i
< REGPARM_MAX
; i
++)
1826 if (regno
== x86_64_int_parameter_registers
[i
])
1831 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1832 for a call to a function whose data type is FNTYPE.
1833 For a library call, FNTYPE is 0. */
1836 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1837 tree fntype
, /* tree ptr for function decl */
1838 rtx libname
, /* SYMBOL_REF of library name or 0 */
1841 static CUMULATIVE_ARGS zero_cum
;
1842 tree param
, next_param
;
1844 if (TARGET_DEBUG_ARG
)
1846 fprintf (stderr
, "\ninit_cumulative_args (");
1848 fprintf (stderr
, "fntype code = %s, ret code = %s",
1849 tree_code_name
[(int) TREE_CODE (fntype
)],
1850 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1852 fprintf (stderr
, "no fntype");
1855 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1860 /* Set up the number of registers to use for passing arguments. */
1862 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1864 cum
->nregs
= ix86_regparm
;
1865 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1866 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1867 cum
->warn_sse
= true;
1868 cum
->warn_mmx
= true;
1869 cum
->maybe_vaarg
= false;
1871 /* Use ecx and edx registers if function has fastcall attribute */
1872 if (fntype
&& !TARGET_64BIT
)
1874 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1882 /* Determine if this function has variable arguments. This is
1883 indicated by the last argument being 'void_type_mode' if there
1884 are no variable arguments. If there are variable arguments, then
1885 we won't pass anything in registers */
1887 if (cum
->nregs
|| !TARGET_MMX
|| !TARGET_SSE
)
1889 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1890 param
!= 0; param
= next_param
)
1892 next_param
= TREE_CHAIN (param
);
1893 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1904 cum
->maybe_vaarg
= true;
1908 if ((!fntype
&& !libname
)
1909 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1910 cum
->maybe_vaarg
= 1;
1912 if (TARGET_DEBUG_ARG
)
1913 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1918 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1919 of this code is to classify each 8bytes of incoming argument by the register
1920 class and assign registers accordingly. */
1922 /* Return the union class of CLASS1 and CLASS2.
1923 See the x86-64 PS ABI for details. */
1925 static enum x86_64_reg_class
1926 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
1928 /* Rule #1: If both classes are equal, this is the resulting class. */
1929 if (class1
== class2
)
1932 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1934 if (class1
== X86_64_NO_CLASS
)
1936 if (class2
== X86_64_NO_CLASS
)
1939 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1940 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1941 return X86_64_MEMORY_CLASS
;
1943 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1944 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1945 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1946 return X86_64_INTEGERSI_CLASS
;
1947 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1948 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1949 return X86_64_INTEGER_CLASS
;
1951 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1952 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1953 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1954 return X86_64_MEMORY_CLASS
;
1956 /* Rule #6: Otherwise class SSE is used. */
1957 return X86_64_SSE_CLASS
;
1960 /* Classify the argument of type TYPE and mode MODE.
1961 CLASSES will be filled by the register class used to pass each word
1962 of the operand. The number of words is returned. In case the parameter
1963 should be passed in memory, 0 is returned. As a special case for zero
1964 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1966 BIT_OFFSET is used internally for handling records and specifies offset
1967 of the offset in bits modulo 256 to avoid overflow cases.
1969 See the x86-64 PS ABI for details.
1973 classify_argument (enum machine_mode mode
, tree type
,
1974 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
1976 HOST_WIDE_INT bytes
=
1977 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1978 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1980 /* Variable sized entities are always passed/returned in memory. */
1984 if (mode
!= VOIDmode
1985 && MUST_PASS_IN_STACK (mode
, type
))
1988 if (type
&& AGGREGATE_TYPE_P (type
))
1992 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1994 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1998 for (i
= 0; i
< words
; i
++)
1999 classes
[i
] = X86_64_NO_CLASS
;
2001 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2002 signalize memory class, so handle it as special case. */
2005 classes
[0] = X86_64_NO_CLASS
;
2009 /* Classify each field of record and merge classes. */
2010 if (TREE_CODE (type
) == RECORD_TYPE
)
2012 /* For classes first merge in the field of the subclasses. */
2013 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
2015 tree bases
= TYPE_BINFO_BASETYPES (type
);
2016 int n_bases
= TREE_VEC_LENGTH (bases
);
2019 for (i
= 0; i
< n_bases
; ++i
)
2021 tree binfo
= TREE_VEC_ELT (bases
, i
);
2023 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2024 tree type
= BINFO_TYPE (binfo
);
2026 num
= classify_argument (TYPE_MODE (type
),
2028 (offset
+ bit_offset
) % 256);
2031 for (i
= 0; i
< num
; i
++)
2033 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2035 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2039 /* And now merge the fields of structure. */
2040 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2042 if (TREE_CODE (field
) == FIELD_DECL
)
2046 /* Bitfields are always classified as integer. Handle them
2047 early, since later code would consider them to be
2048 misaligned integers. */
2049 if (DECL_BIT_FIELD (field
))
2051 for (i
= int_bit_position (field
) / 8 / 8;
2052 i
< (int_bit_position (field
)
2053 + tree_low_cst (DECL_SIZE (field
), 0)
2056 merge_classes (X86_64_INTEGER_CLASS
,
2061 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2062 TREE_TYPE (field
), subclasses
,
2063 (int_bit_position (field
)
2064 + bit_offset
) % 256);
2067 for (i
= 0; i
< num
; i
++)
2070 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2072 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2078 /* Arrays are handled as small records. */
2079 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2082 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2083 TREE_TYPE (type
), subclasses
, bit_offset
);
2087 /* The partial classes are now full classes. */
2088 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2089 subclasses
[0] = X86_64_SSE_CLASS
;
2090 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2091 subclasses
[0] = X86_64_INTEGER_CLASS
;
2093 for (i
= 0; i
< words
; i
++)
2094 classes
[i
] = subclasses
[i
% num
];
2096 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2097 else if (TREE_CODE (type
) == UNION_TYPE
2098 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2100 /* For classes first merge in the field of the subclasses. */
2101 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
2103 tree bases
= TYPE_BINFO_BASETYPES (type
);
2104 int n_bases
= TREE_VEC_LENGTH (bases
);
2107 for (i
= 0; i
< n_bases
; ++i
)
2109 tree binfo
= TREE_VEC_ELT (bases
, i
);
2111 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2112 tree type
= BINFO_TYPE (binfo
);
2114 num
= classify_argument (TYPE_MODE (type
),
2116 (offset
+ (bit_offset
% 64)) % 256);
2119 for (i
= 0; i
< num
; i
++)
2121 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2123 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2127 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2129 if (TREE_CODE (field
) == FIELD_DECL
)
2132 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2133 TREE_TYPE (field
), subclasses
,
2137 for (i
= 0; i
< num
; i
++)
2138 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2142 else if (TREE_CODE (type
) == SET_TYPE
)
2146 classes
[0] = X86_64_INTEGERSI_CLASS
;
2149 else if (bytes
<= 8)
2151 classes
[0] = X86_64_INTEGER_CLASS
;
2154 else if (bytes
<= 12)
2156 classes
[0] = X86_64_INTEGER_CLASS
;
2157 classes
[1] = X86_64_INTEGERSI_CLASS
;
2162 classes
[0] = X86_64_INTEGER_CLASS
;
2163 classes
[1] = X86_64_INTEGER_CLASS
;
2170 /* Final merger cleanup. */
2171 for (i
= 0; i
< words
; i
++)
2173 /* If one class is MEMORY, everything should be passed in
2175 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2178 /* The X86_64_SSEUP_CLASS should be always preceded by
2179 X86_64_SSE_CLASS. */
2180 if (classes
[i
] == X86_64_SSEUP_CLASS
2181 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2182 classes
[i
] = X86_64_SSE_CLASS
;
2184 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2185 if (classes
[i
] == X86_64_X87UP_CLASS
2186 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2187 classes
[i
] = X86_64_SSE_CLASS
;
2192 /* Compute alignment needed. We align all types to natural boundaries with
2193 exception of XFmode that is aligned to 64bits. */
2194 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2196 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2199 mode_alignment
= 128;
2200 else if (mode
== XCmode
)
2201 mode_alignment
= 256;
2202 /* Misaligned fields are always returned in memory. */
2203 if (bit_offset
% mode_alignment
)
2207 /* Classification of atomic types. */
2217 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2218 classes
[0] = X86_64_INTEGERSI_CLASS
;
2220 classes
[0] = X86_64_INTEGER_CLASS
;
2224 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2227 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2228 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
2231 if (!(bit_offset
% 64))
2232 classes
[0] = X86_64_SSESF_CLASS
;
2234 classes
[0] = X86_64_SSE_CLASS
;
2237 classes
[0] = X86_64_SSEDF_CLASS
;
2240 classes
[0] = X86_64_X87_CLASS
;
2241 classes
[1] = X86_64_X87UP_CLASS
;
2247 classes
[0] = X86_64_X87_CLASS
;
2248 classes
[1] = X86_64_X87UP_CLASS
;
2249 classes
[2] = X86_64_X87_CLASS
;
2250 classes
[3] = X86_64_X87UP_CLASS
;
2253 classes
[0] = X86_64_SSEDF_CLASS
;
2254 classes
[1] = X86_64_SSEDF_CLASS
;
2257 classes
[0] = X86_64_SSE_CLASS
;
2265 classes
[0] = X86_64_SSE_CLASS
;
2266 classes
[1] = X86_64_SSEUP_CLASS
;
2281 /* Examine the argument and return set number of register required in each
2282 class. Return 0 iff parameter should be passed in memory. */
2284 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2285 int *int_nregs
, int *sse_nregs
)
2287 enum x86_64_reg_class
class[MAX_CLASSES
];
2288 int n
= classify_argument (mode
, type
, class, 0);
2294 for (n
--; n
>= 0; n
--)
2297 case X86_64_INTEGER_CLASS
:
2298 case X86_64_INTEGERSI_CLASS
:
2301 case X86_64_SSE_CLASS
:
2302 case X86_64_SSESF_CLASS
:
2303 case X86_64_SSEDF_CLASS
:
2306 case X86_64_NO_CLASS
:
2307 case X86_64_SSEUP_CLASS
:
2309 case X86_64_X87_CLASS
:
2310 case X86_64_X87UP_CLASS
:
2314 case X86_64_MEMORY_CLASS
:
2319 /* Construct container for the argument used by GCC interface. See
2320 FUNCTION_ARG for the detailed description. */
2322 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2323 int nintregs
, int nsseregs
, const int * intreg
,
2326 enum machine_mode tmpmode
;
2328 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2329 enum x86_64_reg_class
class[MAX_CLASSES
];
2333 int needed_sseregs
, needed_intregs
;
2334 rtx exp
[MAX_CLASSES
];
2337 n
= classify_argument (mode
, type
, class, 0);
2338 if (TARGET_DEBUG_ARG
)
2341 fprintf (stderr
, "Memory class\n");
2344 fprintf (stderr
, "Classes:");
2345 for (i
= 0; i
< n
; i
++)
2347 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2349 fprintf (stderr
, "\n");
2354 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2356 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2359 /* First construct simple cases. Avoid SCmode, since we want to use
2360 single register to pass this type. */
2361 if (n
== 1 && mode
!= SCmode
)
2364 case X86_64_INTEGER_CLASS
:
2365 case X86_64_INTEGERSI_CLASS
:
2366 return gen_rtx_REG (mode
, intreg
[0]);
2367 case X86_64_SSE_CLASS
:
2368 case X86_64_SSESF_CLASS
:
2369 case X86_64_SSEDF_CLASS
:
2370 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2371 case X86_64_X87_CLASS
:
2372 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2373 case X86_64_NO_CLASS
:
2374 /* Zero sized array, struct or class. */
2379 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2380 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2382 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2383 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2384 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2385 && class[1] == X86_64_INTEGER_CLASS
2386 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2387 && intreg
[0] + 1 == intreg
[1])
2388 return gen_rtx_REG (mode
, intreg
[0]);
2390 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2391 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2392 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2394 /* Otherwise figure out the entries of the PARALLEL. */
2395 for (i
= 0; i
< n
; i
++)
2399 case X86_64_NO_CLASS
:
2401 case X86_64_INTEGER_CLASS
:
2402 case X86_64_INTEGERSI_CLASS
:
2403 /* Merge TImodes on aligned occasions here too. */
2404 if (i
* 8 + 8 > bytes
)
2405 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2406 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2410 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2411 if (tmpmode
== BLKmode
)
2413 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2414 gen_rtx_REG (tmpmode
, *intreg
),
2418 case X86_64_SSESF_CLASS
:
2419 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2420 gen_rtx_REG (SFmode
,
2421 SSE_REGNO (sse_regno
)),
2425 case X86_64_SSEDF_CLASS
:
2426 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2427 gen_rtx_REG (DFmode
,
2428 SSE_REGNO (sse_regno
)),
2432 case X86_64_SSE_CLASS
:
2433 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2437 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2438 gen_rtx_REG (tmpmode
,
2439 SSE_REGNO (sse_regno
)),
2441 if (tmpmode
== TImode
)
2449 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2450 for (i
= 0; i
< nexps
; i
++)
2451 XVECEXP (ret
, 0, i
) = exp
[i
];
2455 /* Update the data in CUM to advance over an argument
2456 of mode MODE and data type TYPE.
2457 (TYPE is null for libcalls where that information may not be available.) */
2460 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2461 enum machine_mode mode
, /* current arg mode */
2462 tree type
, /* type of the argument or 0 if lib support */
2463 int named
) /* whether or not the argument was named */
2466 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2467 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2469 if (TARGET_DEBUG_ARG
)
2471 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2472 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2475 int int_nregs
, sse_nregs
;
2476 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2477 cum
->words
+= words
;
2478 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2480 cum
->nregs
-= int_nregs
;
2481 cum
->sse_nregs
-= sse_nregs
;
2482 cum
->regno
+= int_nregs
;
2483 cum
->sse_regno
+= sse_nregs
;
2486 cum
->words
+= words
;
2490 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2491 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2493 cum
->sse_words
+= words
;
2494 cum
->sse_nregs
-= 1;
2495 cum
->sse_regno
+= 1;
2496 if (cum
->sse_nregs
<= 0)
2502 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2503 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2505 cum
->mmx_words
+= words
;
2506 cum
->mmx_nregs
-= 1;
2507 cum
->mmx_regno
+= 1;
2508 if (cum
->mmx_nregs
<= 0)
2516 cum
->words
+= words
;
2517 cum
->nregs
-= words
;
2518 cum
->regno
+= words
;
2520 if (cum
->nregs
<= 0)
2530 /* Define where to put the arguments to a function.
2531 Value is zero to push the argument on the stack,
2532 or a hard register in which to store the argument.
2534 MODE is the argument's machine mode.
2535 TYPE is the data type of the argument (as a tree).
2536 This is null for libcalls where that information may
2538 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2539 the preceding args and about the function being called.
2540 NAMED is nonzero if this argument is a named parameter
2541 (otherwise it is an extra parameter matching an ellipsis). */
2544 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2545 enum machine_mode mode
, /* current arg mode */
2546 tree type
, /* type of the argument or 0 if lib support */
2547 int named
) /* != 0 for normal args, == 0 for ... args */
2551 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2552 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2553 static bool warnedsse
, warnedmmx
;
2555 /* Handle a hidden AL argument containing number of registers for varargs
2556 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2558 if (mode
== VOIDmode
)
2561 return GEN_INT (cum
->maybe_vaarg
2562 ? (cum
->sse_nregs
< 0
2570 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2571 &x86_64_int_parameter_registers
[cum
->regno
],
2576 /* For now, pass fp/complex values on the stack. */
2588 if (words
<= cum
->nregs
)
2590 int regno
= cum
->regno
;
2592 /* Fastcall allocates the first two DWORD (SImode) or
2593 smaller arguments to ECX and EDX. */
2596 if (mode
== BLKmode
|| mode
== DImode
)
2599 /* ECX not EAX is the first allocated register. */
2603 ret
= gen_rtx_REG (mode
, regno
);
2613 if (!type
|| !AGGREGATE_TYPE_P (type
))
2615 if (!TARGET_SSE
&& !warnedmmx
&& cum
->warn_sse
)
2618 warning ("SSE vector argument without SSE enabled "
2622 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2629 if (!type
|| !AGGREGATE_TYPE_P (type
))
2631 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2634 warning ("MMX vector argument without MMX enabled "
2638 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2643 if (TARGET_DEBUG_ARG
)
2646 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2647 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2650 print_simple_rtl (stderr
, ret
);
2652 fprintf (stderr
, ", stack");
2654 fprintf (stderr
, " )\n");
2660 /* A C expression that indicates when an argument must be passed by
2661 reference. If nonzero for an argument, a copy of that argument is
2662 made in memory and a pointer to the argument is passed instead of
2663 the argument itself. The pointer is passed in whatever way is
2664 appropriate for passing a pointer to that type. */
2667 function_arg_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2668 enum machine_mode mode ATTRIBUTE_UNUSED
,
2669 tree type
, int named ATTRIBUTE_UNUSED
)
2674 if (type
&& int_size_in_bytes (type
) == -1)
2676 if (TARGET_DEBUG_ARG
)
2677 fprintf (stderr
, "function_arg_pass_by_reference\n");
2684 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2687 contains_128bit_aligned_vector_p (tree type
)
2689 enum machine_mode mode
= TYPE_MODE (type
);
2690 if (SSE_REG_MODE_P (mode
)
2691 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2693 if (TYPE_ALIGN (type
) < 128)
2696 if (AGGREGATE_TYPE_P (type
))
2698 /* Walk the aggregates recursively. */
2699 if (TREE_CODE (type
) == RECORD_TYPE
2700 || TREE_CODE (type
) == UNION_TYPE
2701 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2705 if (TYPE_BINFO (type
) != NULL
2706 && TYPE_BINFO_BASETYPES (type
) != NULL
)
2708 tree bases
= TYPE_BINFO_BASETYPES (type
);
2709 int n_bases
= TREE_VEC_LENGTH (bases
);
2712 for (i
= 0; i
< n_bases
; ++i
)
2714 tree binfo
= TREE_VEC_ELT (bases
, i
);
2715 tree type
= BINFO_TYPE (binfo
);
2717 if (contains_128bit_aligned_vector_p (type
))
2721 /* And now merge the fields of structure. */
2722 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2724 if (TREE_CODE (field
) == FIELD_DECL
2725 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2729 /* Just for use if some languages passes arrays by value. */
2730 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2732 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2741 /* Gives the alignment boundary, in bits, of an argument with the
2742 specified mode and type. */
2745 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2749 align
= TYPE_ALIGN (type
);
2751 align
= GET_MODE_ALIGNMENT (mode
);
2752 if (align
< PARM_BOUNDARY
)
2753 align
= PARM_BOUNDARY
;
2756 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2757 make an exception for SSE modes since these require 128bit
2760 The handling here differs from field_alignment. ICC aligns MMX
2761 arguments to 4 byte boundaries, while structure fields are aligned
2762 to 8 byte boundaries. */
2765 if (!SSE_REG_MODE_P (mode
))
2766 align
= PARM_BOUNDARY
;
2770 if (!contains_128bit_aligned_vector_p (type
))
2771 align
= PARM_BOUNDARY
;
2779 /* Return true if N is a possible register number of function value. */
2781 ix86_function_value_regno_p (int regno
)
2785 return ((regno
) == 0
2786 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2787 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2789 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2790 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2791 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2794 /* Define how to find the value returned by a function.
2795 VALTYPE is the data type of the value (as a tree).
2796 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2797 otherwise, FUNC is 0. */
2799 ix86_function_value (tree valtype
)
2803 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2804 REGPARM_MAX
, SSE_REGPARM_MAX
,
2805 x86_64_int_return_registers
, 0);
2806 /* For zero sized structures, construct_container return NULL, but we need
2807 to keep rest of compiler happy by returning meaningful value. */
2809 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2813 return gen_rtx_REG (TYPE_MODE (valtype
),
2814 ix86_value_regno (TYPE_MODE (valtype
)));
2817 /* Return false iff type is returned in memory. */
2819 ix86_return_in_memory (tree type
)
2821 int needed_intregs
, needed_sseregs
, size
;
2822 enum machine_mode mode
= TYPE_MODE (type
);
2825 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2827 if (mode
== BLKmode
)
2830 size
= int_size_in_bytes (type
);
2832 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2835 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2837 /* User-created vectors small enough to fit in EAX. */
2841 /* MMX/3dNow values are returned on the stack, since we've
2842 got to EMMS/FEMMS before returning. */
2846 /* SSE values are returned in XMM0. */
2847 /* ??? Except when it doesn't exist? We have a choice of
2848 either (1) being abi incompatible with a -march switch,
2849 or (2) generating an error here. Given no good solution,
2850 I think the safest thing is one warning. The user won't
2851 be able to use -Werror, but.... */
2862 warning ("SSE vector return without SSE enabled "
2877 /* Define how to find the value returned by a library function
2878 assuming the value has mode MODE. */
2880 ix86_libcall_value (enum machine_mode mode
)
2890 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2893 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2898 return gen_rtx_REG (mode
, 0);
2902 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2905 /* Given a mode, return the register to use for a return value. */
2908 ix86_value_regno (enum machine_mode mode
)
2910 /* Floating point return values in %st(0). */
2911 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2912 return FIRST_FLOAT_REG
;
2913 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2914 we prevent this case when sse is not available. */
2915 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
2916 return FIRST_SSE_REG
;
2917 /* Everything else in %eax. */
2921 /* Create the va_list data type. */
2924 ix86_build_builtin_va_list (void)
2926 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2928 /* For i386 we use plain pointer to argument area. */
2930 return build_pointer_type (char_type_node
);
2932 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2933 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2935 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2936 unsigned_type_node
);
2937 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2938 unsigned_type_node
);
2939 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2941 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2944 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2945 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2946 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2947 DECL_FIELD_CONTEXT (f_sav
) = record
;
2949 TREE_CHAIN (record
) = type_decl
;
2950 TYPE_NAME (record
) = type_decl
;
2951 TYPE_FIELDS (record
) = f_gpr
;
2952 TREE_CHAIN (f_gpr
) = f_fpr
;
2953 TREE_CHAIN (f_fpr
) = f_ovf
;
2954 TREE_CHAIN (f_ovf
) = f_sav
;
2956 layout_type (record
);
2958 /* The correct type is an array type of one element. */
2959 return build_array_type (record
, build_index_type (size_zero_node
));
2962 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
2965 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2966 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
2969 CUMULATIVE_ARGS next_cum
;
2970 rtx save_area
= NULL_RTX
, mem
;
2983 /* Indicate to allocate space on the stack for varargs save area. */
2984 ix86_save_varrargs_registers
= 1;
2986 cfun
->stack_alignment_needed
= 128;
2988 fntype
= TREE_TYPE (current_function_decl
);
2989 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2990 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2991 != void_type_node
));
2993 /* For varargs, we do not want to skip the dummy va_dcl argument.
2994 For stdargs, we do want to skip the last named argument. */
2997 function_arg_advance (&next_cum
, mode
, type
, 1);
3000 save_area
= frame_pointer_rtx
;
3002 set
= get_varargs_alias_set ();
3004 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3006 mem
= gen_rtx_MEM (Pmode
,
3007 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3008 set_mem_alias_set (mem
, set
);
3009 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3010 x86_64_int_parameter_registers
[i
]));
3013 if (next_cum
.sse_nregs
)
3015 /* Now emit code to save SSE registers. The AX parameter contains number
3016 of SSE parameter registers used to call this function. We use
3017 sse_prologue_save insn template that produces computed jump across
3018 SSE saves. We need some preparation work to get this working. */
3020 label
= gen_label_rtx ();
3021 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3023 /* Compute address to jump to :
3024 label - 5*eax + nnamed_sse_arguments*5 */
3025 tmp_reg
= gen_reg_rtx (Pmode
);
3026 nsse_reg
= gen_reg_rtx (Pmode
);
3027 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3028 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3029 gen_rtx_MULT (Pmode
, nsse_reg
,
3031 if (next_cum
.sse_regno
)
3034 gen_rtx_CONST (DImode
,
3035 gen_rtx_PLUS (DImode
,
3037 GEN_INT (next_cum
.sse_regno
* 4))));
3039 emit_move_insn (nsse_reg
, label_ref
);
3040 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3042 /* Compute address of memory block we save into. We always use pointer
3043 pointing 127 bytes after first byte to store - this is needed to keep
3044 instruction size limited by 4 bytes. */
3045 tmp_reg
= gen_reg_rtx (Pmode
);
3046 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3047 plus_constant (save_area
,
3048 8 * REGPARM_MAX
+ 127)));
3049 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3050 set_mem_alias_set (mem
, set
);
3051 set_mem_align (mem
, BITS_PER_WORD
);
3053 /* And finally do the dirty job! */
3054 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3055 GEN_INT (next_cum
.sse_regno
), label
));
3060 /* Implement va_start. */
3063 ix86_va_start (tree valist
, rtx nextarg
)
3065 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3066 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3067 tree gpr
, fpr
, ovf
, sav
, t
;
3069 /* Only 64bit target needs something special. */
3072 std_expand_builtin_va_start (valist
, nextarg
);
3076 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3077 f_fpr
= TREE_CHAIN (f_gpr
);
3078 f_ovf
= TREE_CHAIN (f_fpr
);
3079 f_sav
= TREE_CHAIN (f_ovf
);
3081 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3082 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
3083 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
3084 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
3085 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
3087 /* Count number of gp and fp argument registers used. */
3088 words
= current_function_args_info
.words
;
3089 n_gpr
= current_function_args_info
.regno
;
3090 n_fpr
= current_function_args_info
.sse_regno
;
3092 if (TARGET_DEBUG_ARG
)
3093 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3094 (int) words
, (int) n_gpr
, (int) n_fpr
);
3096 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3097 build_int_2 (n_gpr
* 8, 0));
3098 TREE_SIDE_EFFECTS (t
) = 1;
3099 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3101 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3102 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
3103 TREE_SIDE_EFFECTS (t
) = 1;
3104 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3106 /* Find the overflow area. */
3107 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3109 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3110 build_int_2 (words
* UNITS_PER_WORD
, 0));
3111 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3112 TREE_SIDE_EFFECTS (t
) = 1;
3113 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3115 /* Find the register save area.
3116 Prologue of the function save it right above stack frame. */
3117 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3118 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3119 TREE_SIDE_EFFECTS (t
) = 1;
3120 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3123 /* Implement va_arg. */
3125 ix86_va_arg (tree valist
, tree type
)
3127 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3128 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3129 tree gpr
, fpr
, ovf
, sav
, t
;
3131 rtx lab_false
, lab_over
= NULL_RTX
;
3136 /* Only 64bit target needs something special. */
3139 return std_expand_builtin_va_arg (valist
, type
);
3142 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3143 f_fpr
= TREE_CHAIN (f_gpr
);
3144 f_ovf
= TREE_CHAIN (f_fpr
);
3145 f_sav
= TREE_CHAIN (f_ovf
);
3147 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3148 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
3149 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
3150 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
3151 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
3153 size
= int_size_in_bytes (type
);
3156 /* Passed by reference. */
3158 type
= build_pointer_type (type
);
3159 size
= int_size_in_bytes (type
);
3161 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3163 container
= construct_container (TYPE_MODE (type
), type
, 0,
3164 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3166 * Pull the value out of the saved registers ...
3169 addr_rtx
= gen_reg_rtx (Pmode
);
3173 rtx int_addr_rtx
, sse_addr_rtx
;
3174 int needed_intregs
, needed_sseregs
;
3177 lab_over
= gen_label_rtx ();
3178 lab_false
= gen_label_rtx ();
3180 examine_argument (TYPE_MODE (type
), type
, 0,
3181 &needed_intregs
, &needed_sseregs
);
3184 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3185 || TYPE_ALIGN (type
) > 128);
3187 /* In case we are passing structure, verify that it is consecutive block
3188 on the register save area. If not we need to do moves. */
3189 if (!need_temp
&& !REG_P (container
))
3191 /* Verify that all registers are strictly consecutive */
3192 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3196 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3198 rtx slot
= XVECEXP (container
, 0, i
);
3199 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3200 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3208 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3210 rtx slot
= XVECEXP (container
, 0, i
);
3211 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3212 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3219 int_addr_rtx
= addr_rtx
;
3220 sse_addr_rtx
= addr_rtx
;
3224 int_addr_rtx
= gen_reg_rtx (Pmode
);
3225 sse_addr_rtx
= gen_reg_rtx (Pmode
);
3227 /* First ensure that we fit completely in registers. */
3230 emit_cmp_and_jump_insns (expand_expr
3231 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
3232 GEN_INT ((REGPARM_MAX
- needed_intregs
+
3233 1) * 8), GE
, const1_rtx
, SImode
,
3238 emit_cmp_and_jump_insns (expand_expr
3239 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
3240 GEN_INT ((SSE_REGPARM_MAX
-
3241 needed_sseregs
+ 1) * 16 +
3242 REGPARM_MAX
* 8), GE
, const1_rtx
,
3243 SImode
, 1, lab_false
);
3246 /* Compute index to start of area used for integer regs. */
3249 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3250 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
3251 if (r
!= int_addr_rtx
)
3252 emit_move_insn (int_addr_rtx
, r
);
3256 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3257 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
3258 if (r
!= sse_addr_rtx
)
3259 emit_move_insn (sse_addr_rtx
, r
);
3267 /* Never use the memory itself, as it has the alias set. */
3268 x
= XEXP (assign_temp (type
, 0, 1, 0), 0);
3269 mem
= gen_rtx_MEM (BLKmode
, x
);
3270 force_operand (x
, addr_rtx
);
3271 set_mem_alias_set (mem
, get_varargs_alias_set ());
3272 set_mem_align (mem
, BITS_PER_UNIT
);
3274 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3276 rtx slot
= XVECEXP (container
, 0, i
);
3277 rtx reg
= XEXP (slot
, 0);
3278 enum machine_mode mode
= GET_MODE (reg
);
3284 if (SSE_REGNO_P (REGNO (reg
)))
3286 src_addr
= sse_addr_rtx
;
3287 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3291 src_addr
= int_addr_rtx
;
3292 src_offset
= REGNO (reg
) * 8;
3294 src_mem
= gen_rtx_MEM (mode
, src_addr
);
3295 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
3296 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
3297 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
3298 emit_move_insn (dest_mem
, src_mem
);
3305 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3306 build_int_2 (needed_intregs
* 8, 0));
3307 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3308 TREE_SIDE_EFFECTS (t
) = 1;
3309 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3314 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3315 build_int_2 (needed_sseregs
* 16, 0));
3316 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3317 TREE_SIDE_EFFECTS (t
) = 1;
3318 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3321 emit_jump_insn (gen_jump (lab_over
));
3323 emit_label (lab_false
);
3326 /* ... otherwise out of the overflow area. */
3328 /* Care for on-stack alignment if needed. */
3329 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3333 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3334 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
3335 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
3339 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
3341 emit_move_insn (addr_rtx
, r
);
3344 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
3345 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
3346 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3347 TREE_SIDE_EFFECTS (t
) = 1;
3348 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3351 emit_label (lab_over
);
3355 r
= gen_rtx_MEM (Pmode
, addr_rtx
);
3356 set_mem_alias_set (r
, get_varargs_alias_set ());
3357 emit_move_insn (addr_rtx
, r
);
3363 /* Return nonzero if OP is either a i387 or SSE fp register. */
3365 any_fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3367 return ANY_FP_REG_P (op
);
3370 /* Return nonzero if OP is an i387 fp register. */
3372 fp_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3374 return FP_REG_P (op
);
3377 /* Return nonzero if OP is a non-fp register_operand. */
3379 register_and_not_any_fp_reg_operand (rtx op
, enum machine_mode mode
)
3381 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
3384 /* Return nonzero if OP is a register operand other than an
3385 i387 fp register. */
3387 register_and_not_fp_reg_operand (rtx op
, enum machine_mode mode
)
3389 return register_operand (op
, mode
) && !FP_REG_P (op
);
3392 /* Return nonzero if OP is general operand representable on x86_64. */
3395 x86_64_general_operand (rtx op
, enum machine_mode mode
)
3398 return general_operand (op
, mode
);
3399 if (nonimmediate_operand (op
, mode
))
3401 return x86_64_sign_extended_value (op
);
3404 /* Return nonzero if OP is general operand representable on x86_64
3405 as either sign extended or zero extended constant. */
3408 x86_64_szext_general_operand (rtx op
, enum machine_mode mode
)
3411 return general_operand (op
, mode
);
3412 if (nonimmediate_operand (op
, mode
))
3414 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3417 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3420 x86_64_nonmemory_operand (rtx op
, enum machine_mode mode
)
3423 return nonmemory_operand (op
, mode
);
3424 if (register_operand (op
, mode
))
3426 return x86_64_sign_extended_value (op
);
3429 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3432 x86_64_movabs_operand (rtx op
, enum machine_mode mode
)
3434 if (!TARGET_64BIT
|| !flag_pic
)
3435 return nonmemory_operand (op
, mode
);
3436 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
3438 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
3443 /* Return nonzero if OPNUM's MEM should be matched
3444 in movabs* patterns. */
3447 ix86_check_movabs (rtx insn
, int opnum
)
3451 set
= PATTERN (insn
);
3452 if (GET_CODE (set
) == PARALLEL
)
3453 set
= XVECEXP (set
, 0, 0);
3454 if (GET_CODE (set
) != SET
)
3456 mem
= XEXP (set
, opnum
);
3457 while (GET_CODE (mem
) == SUBREG
)
3458 mem
= SUBREG_REG (mem
);
3459 if (GET_CODE (mem
) != MEM
)
3461 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3464 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3467 x86_64_szext_nonmemory_operand (rtx op
, enum machine_mode mode
)
3470 return nonmemory_operand (op
, mode
);
3471 if (register_operand (op
, mode
))
3473 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3476 /* Return nonzero if OP is immediate operand representable on x86_64. */
3479 x86_64_immediate_operand (rtx op
, enum machine_mode mode
)
3482 return immediate_operand (op
, mode
);
3483 return x86_64_sign_extended_value (op
);
3486 /* Return nonzero if OP is immediate operand representable on x86_64. */
3489 x86_64_zext_immediate_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3491 return x86_64_zero_extended_value (op
);
3494 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3495 for shift & compare patterns, as shifting by 0 does not change flags),
3496 else return zero. */
3499 const_int_1_31_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3501 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3504 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3505 reference and a constant. */
3508 symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3510 switch (GET_CODE (op
))
3518 if (GET_CODE (op
) == SYMBOL_REF
3519 || GET_CODE (op
) == LABEL_REF
3520 || (GET_CODE (op
) == UNSPEC
3521 && (XINT (op
, 1) == UNSPEC_GOT
3522 || XINT (op
, 1) == UNSPEC_GOTOFF
3523 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3525 if (GET_CODE (op
) != PLUS
3526 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3530 if (GET_CODE (op
) == SYMBOL_REF
3531 || GET_CODE (op
) == LABEL_REF
)
3533 /* Only @GOTOFF gets offsets. */
3534 if (GET_CODE (op
) != UNSPEC
3535 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3538 op
= XVECEXP (op
, 0, 0);
3539 if (GET_CODE (op
) == SYMBOL_REF
3540 || GET_CODE (op
) == LABEL_REF
)
3549 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3552 pic_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3554 if (GET_CODE (op
) != CONST
)
3559 if (GET_CODE (op
) == UNSPEC
3560 && XINT (op
, 1) == UNSPEC_GOTPCREL
)
3562 if (GET_CODE (op
) == PLUS
3563 && GET_CODE (XEXP (op
, 0)) == UNSPEC
3564 && XINT (XEXP (op
, 0), 1) == UNSPEC_GOTPCREL
)
3569 if (GET_CODE (op
) == UNSPEC
)
3571 if (GET_CODE (op
) != PLUS
3572 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3575 if (GET_CODE (op
) == UNSPEC
)
3581 /* Return true if OP is a symbolic operand that resolves locally. */
3584 local_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3586 if (GET_CODE (op
) == CONST
3587 && GET_CODE (XEXP (op
, 0)) == PLUS
3588 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3589 op
= XEXP (XEXP (op
, 0), 0);
3591 if (GET_CODE (op
) == LABEL_REF
)
3594 if (GET_CODE (op
) != SYMBOL_REF
)
3597 if (SYMBOL_REF_LOCAL_P (op
))
3600 /* There is, however, a not insubstantial body of code in the rest of
3601 the compiler that assumes it can just stick the results of
3602 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3603 /* ??? This is a hack. Should update the body of the compiler to
3604 always create a DECL an invoke targetm.encode_section_info. */
3605 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3606 internal_label_prefix_len
) == 0)
3612 /* Test for various thread-local symbols. */
3615 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3617 if (GET_CODE (op
) != SYMBOL_REF
)
3619 return SYMBOL_REF_TLS_MODEL (op
);
3623 tls_symbolic_operand_1 (rtx op
, enum tls_model kind
)
3625 if (GET_CODE (op
) != SYMBOL_REF
)
3627 return SYMBOL_REF_TLS_MODEL (op
) == kind
;
3631 global_dynamic_symbolic_operand (rtx op
,
3632 enum machine_mode mode ATTRIBUTE_UNUSED
)
3634 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3638 local_dynamic_symbolic_operand (rtx op
,
3639 enum machine_mode mode ATTRIBUTE_UNUSED
)
3641 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3645 initial_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3647 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3651 local_exec_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3653 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3656 /* Test for a valid operand for a call instruction. Don't allow the
3657 arg pointer register or virtual regs since they may decay into
3658 reg + const, which the patterns can't handle. */
3661 call_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3663 /* Disallow indirect through a virtual register. This leads to
3664 compiler aborts when trying to eliminate them. */
3665 if (GET_CODE (op
) == REG
3666 && (op
== arg_pointer_rtx
3667 || op
== frame_pointer_rtx
3668 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3669 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3672 /* Disallow `call 1234'. Due to varying assembler lameness this
3673 gets either rejected or translated to `call .+1234'. */
3674 if (GET_CODE (op
) == CONST_INT
)
3677 /* Explicitly allow SYMBOL_REF even if pic. */
3678 if (GET_CODE (op
) == SYMBOL_REF
)
3681 /* Otherwise we can allow any general_operand in the address. */
3682 return general_operand (op
, Pmode
);
3685 /* Test for a valid operand for a call instruction. Don't allow the
3686 arg pointer register or virtual regs since they may decay into
3687 reg + const, which the patterns can't handle. */
3690 sibcall_insn_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3692 /* Disallow indirect through a virtual register. This leads to
3693 compiler aborts when trying to eliminate them. */
3694 if (GET_CODE (op
) == REG
3695 && (op
== arg_pointer_rtx
3696 || op
== frame_pointer_rtx
3697 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3698 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3701 /* Explicitly allow SYMBOL_REF even if pic. */
3702 if (GET_CODE (op
) == SYMBOL_REF
)
3705 /* Otherwise we can only allow register operands. */
3706 return register_operand (op
, Pmode
);
3710 constant_call_address_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3712 if (GET_CODE (op
) == CONST
3713 && GET_CODE (XEXP (op
, 0)) == PLUS
3714 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3715 op
= XEXP (XEXP (op
, 0), 0);
3716 return GET_CODE (op
) == SYMBOL_REF
;
3719 /* Match exactly zero and one. */
3722 const0_operand (rtx op
, enum machine_mode mode
)
3724 return op
== CONST0_RTX (mode
);
3728 const1_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3730 return op
== const1_rtx
;
3733 /* Match 2, 4, or 8. Used for leal multiplicands. */
3736 const248_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3738 return (GET_CODE (op
) == CONST_INT
3739 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3743 const_0_to_3_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3745 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 4);
3749 const_0_to_7_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3751 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 8);
3755 const_0_to_15_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3757 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 16);
3761 const_0_to_255_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3763 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 256);
3767 /* True if this is a constant appropriate for an increment or decrement. */
3770 incdec_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3772 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3773 registers, since carry flag is not set. */
3774 if (TARGET_PENTIUM4
&& !optimize_size
)
3776 return op
== const1_rtx
|| op
== constm1_rtx
;
3779 /* Return nonzero if OP is acceptable as operand of DImode shift
3783 shiftdi_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3786 return nonimmediate_operand (op
, mode
);
3788 return register_operand (op
, mode
);
3791 /* Return false if this is the stack pointer, or any other fake
3792 register eliminable to the stack pointer. Otherwise, this is
3795 This is used to prevent esp from being used as an index reg.
3796 Which would only happen in pathological cases. */
3799 reg_no_sp_operand (rtx op
, enum machine_mode mode
)
3802 if (GET_CODE (t
) == SUBREG
)
3804 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3807 return register_operand (op
, mode
);
3811 mmx_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3813 return MMX_REG_P (op
);
3816 /* Return false if this is any eliminable register. Otherwise
3820 general_no_elim_operand (rtx op
, enum machine_mode mode
)
3823 if (GET_CODE (t
) == SUBREG
)
3825 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3826 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3827 || t
== virtual_stack_dynamic_rtx
)
3830 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3831 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3834 return general_operand (op
, mode
);
3837 /* Return false if this is any eliminable register. Otherwise
3838 register_operand or const_int. */
3841 nonmemory_no_elim_operand (rtx op
, enum machine_mode mode
)
3844 if (GET_CODE (t
) == SUBREG
)
3846 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3847 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3848 || t
== virtual_stack_dynamic_rtx
)
3851 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3854 /* Return false if this is any eliminable register or stack register,
3855 otherwise work like register_operand. */
3858 index_register_operand (rtx op
, enum machine_mode mode
)
3861 if (GET_CODE (t
) == SUBREG
)
3865 if (t
== arg_pointer_rtx
3866 || t
== frame_pointer_rtx
3867 || t
== virtual_incoming_args_rtx
3868 || t
== virtual_stack_vars_rtx
3869 || t
== virtual_stack_dynamic_rtx
3870 || REGNO (t
) == STACK_POINTER_REGNUM
)
3873 return general_operand (op
, mode
);
3876 /* Return true if op is a Q_REGS class register. */
3879 q_regs_operand (rtx op
, enum machine_mode mode
)
3881 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3883 if (GET_CODE (op
) == SUBREG
)
3884 op
= SUBREG_REG (op
);
3885 return ANY_QI_REG_P (op
);
3888 /* Return true if op is an flags register. */
3891 flags_reg_operand (rtx op
, enum machine_mode mode
)
3893 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3895 return REG_P (op
) && REGNO (op
) == FLAGS_REG
&& GET_MODE (op
) != VOIDmode
;
3898 /* Return true if op is a NON_Q_REGS class register. */
3901 non_q_regs_operand (rtx op
, enum machine_mode mode
)
3903 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3905 if (GET_CODE (op
) == SUBREG
)
3906 op
= SUBREG_REG (op
);
3907 return NON_QI_REG_P (op
);
3911 zero_extended_scalar_load_operand (rtx op
,
3912 enum machine_mode mode ATTRIBUTE_UNUSED
)
3915 if (GET_CODE (op
) != MEM
)
3917 op
= maybe_get_pool_constant (op
);
3920 if (GET_CODE (op
) != CONST_VECTOR
)
3923 (GET_MODE_SIZE (GET_MODE (op
)) /
3924 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op
))));
3925 for (n_elts
--; n_elts
> 0; n_elts
--)
3927 rtx elt
= CONST_VECTOR_ELT (op
, n_elts
);
3928 if (elt
!= CONST0_RTX (GET_MODE_INNER (GET_MODE (op
))))
3934 /* Return 1 when OP is operand acceptable for standard SSE move. */
3936 vector_move_operand (rtx op
, enum machine_mode mode
)
3938 if (nonimmediate_operand (op
, mode
))
3940 if (GET_MODE (op
) != mode
&& mode
!= VOIDmode
)
3942 return (op
== CONST0_RTX (GET_MODE (op
)));
3945 /* Return true if op if a valid address, and does not contain
3946 a segment override. */
3949 no_seg_address_operand (rtx op
, enum machine_mode mode
)
3951 struct ix86_address parts
;
3953 if (! address_operand (op
, mode
))
3956 if (! ix86_decompose_address (op
, &parts
))
3959 return parts
.seg
== SEG_DEFAULT
;
3962 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3965 sse_comparison_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
3967 enum rtx_code code
= GET_CODE (op
);
3970 /* Operations supported directly. */
3980 /* These are equivalent to ones above in non-IEEE comparisons. */
3987 return !TARGET_IEEE_FP
;
3992 /* Return 1 if OP is a valid comparison operator in valid mode. */
3994 ix86_comparison_operator (rtx op
, enum machine_mode mode
)
3996 enum machine_mode inmode
;
3997 enum rtx_code code
= GET_CODE (op
);
3998 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4000 if (!COMPARISON_P (op
))
4002 inmode
= GET_MODE (XEXP (op
, 0));
4004 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4006 enum rtx_code second_code
, bypass_code
;
4007 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4008 return (bypass_code
== NIL
&& second_code
== NIL
);
4015 if (inmode
== CCmode
|| inmode
== CCGCmode
4016 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
4019 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
4020 if (inmode
== CCmode
)
4024 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
4032 /* Return 1 if OP is a valid comparison operator testing carry flag
4035 ix86_carry_flag_operator (rtx op
, enum machine_mode mode
)
4037 enum machine_mode inmode
;
4038 enum rtx_code code
= GET_CODE (op
);
4040 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4042 if (!COMPARISON_P (op
))
4044 inmode
= GET_MODE (XEXP (op
, 0));
4045 if (GET_CODE (XEXP (op
, 0)) != REG
4046 || REGNO (XEXP (op
, 0)) != 17
4047 || XEXP (op
, 1) != const0_rtx
)
4050 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4052 enum rtx_code second_code
, bypass_code
;
4054 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4055 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4057 code
= ix86_fp_compare_code_to_integer (code
);
4059 else if (inmode
!= CCmode
)
4064 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4067 fcmov_comparison_operator (rtx op
, enum machine_mode mode
)
4069 enum machine_mode inmode
;
4070 enum rtx_code code
= GET_CODE (op
);
4072 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
4074 if (!COMPARISON_P (op
))
4076 inmode
= GET_MODE (XEXP (op
, 0));
4077 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4079 enum rtx_code second_code
, bypass_code
;
4081 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
4082 if (bypass_code
!= NIL
|| second_code
!= NIL
)
4084 code
= ix86_fp_compare_code_to_integer (code
);
4086 /* i387 supports just limited amount of conditional codes. */
4089 case LTU
: case GTU
: case LEU
: case GEU
:
4090 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
4093 case ORDERED
: case UNORDERED
:
4101 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4104 promotable_binary_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4106 switch (GET_CODE (op
))
4109 /* Modern CPUs have same latency for HImode and SImode multiply,
4110 but 386 and 486 do HImode multiply faster. */
4111 return ix86_tune
> PROCESSOR_I486
;
4123 /* Nearly general operand, but accept any const_double, since we wish
4124 to be able to drop them into memory rather than have them get pulled
4128 cmp_fp_expander_operand (rtx op
, enum machine_mode mode
)
4130 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4132 if (GET_CODE (op
) == CONST_DOUBLE
)
4134 return general_operand (op
, mode
);
4137 /* Match an SI or HImode register for a zero_extract. */
4140 ext_register_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4143 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
4144 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
4147 if (!register_operand (op
, VOIDmode
))
4150 /* Be careful to accept only registers having upper parts. */
4151 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
4152 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
4155 /* Return 1 if this is a valid binary floating-point operation.
4156 OP is the expression matched, and MODE is its mode. */
4159 binary_fp_operator (rtx op
, enum machine_mode mode
)
4161 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
4164 switch (GET_CODE (op
))
4170 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
4178 mult_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4180 return GET_CODE (op
) == MULT
;
4184 div_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
4186 return GET_CODE (op
) == DIV
;
4190 arith_or_logical_operator (rtx op
, enum machine_mode mode
)
4192 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
4193 && ARITHMETIC_P (op
));
4196 /* Returns 1 if OP is memory operand with a displacement. */
4199 memory_displacement_operand (rtx op
, enum machine_mode mode
)
4201 struct ix86_address parts
;
4203 if (! memory_operand (op
, mode
))
4206 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
4209 return parts
.disp
!= NULL_RTX
;
4212 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4213 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4215 ??? It seems likely that this will only work because cmpsi is an
4216 expander, and no actual insns use this. */
4219 cmpsi_operand (rtx op
, enum machine_mode mode
)
4221 if (nonimmediate_operand (op
, mode
))
4224 if (GET_CODE (op
) == AND
4225 && GET_MODE (op
) == SImode
4226 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
4227 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
4228 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
4229 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
4230 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
4231 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
4237 /* Returns 1 if OP is memory operand that can not be represented by the
4241 long_memory_operand (rtx op
, enum machine_mode mode
)
4243 if (! memory_operand (op
, mode
))
4246 return memory_address_length (op
) != 0;
4249 /* Return nonzero if the rtx is known aligned. */
4252 aligned_operand (rtx op
, enum machine_mode mode
)
4254 struct ix86_address parts
;
4256 if (!general_operand (op
, mode
))
4259 /* Registers and immediate operands are always "aligned". */
4260 if (GET_CODE (op
) != MEM
)
4263 /* Don't even try to do any aligned optimizations with volatiles. */
4264 if (MEM_VOLATILE_P (op
))
4269 /* Pushes and pops are only valid on the stack pointer. */
4270 if (GET_CODE (op
) == PRE_DEC
4271 || GET_CODE (op
) == POST_INC
)
4274 /* Decode the address. */
4275 if (! ix86_decompose_address (op
, &parts
))
4278 /* Look for some component that isn't known to be aligned. */
4282 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
4287 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
4292 if (GET_CODE (parts
.disp
) != CONST_INT
4293 || (INTVAL (parts
.disp
) & 3) != 0)
4297 /* Didn't find one -- this must be an aligned address. */
4301 /* Initialize the table of extra 80387 mathematical constants. */
4304 init_ext_80387_constants (void)
4306 static const char * cst
[5] =
4308 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4309 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4310 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4311 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4312 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4316 for (i
= 0; i
< 5; i
++)
4318 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4319 /* Ensure each constant is rounded to XFmode precision. */
4320 real_convert (&ext_80387_constants_table
[i
],
4321 XFmode
, &ext_80387_constants_table
[i
]);
4324 ext_80387_constants_init
= 1;
4327 /* Return true if the constant is something that can be loaded with
4328 a special instruction. */
4331 standard_80387_constant_p (rtx x
)
4333 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4336 if (x
== CONST0_RTX (GET_MODE (x
)))
4338 if (x
== CONST1_RTX (GET_MODE (x
)))
4341 /* For XFmode constants, try to find a special 80387 instruction when
4342 optimizing for size or on those CPUs that benefit from them. */
4343 if (GET_MODE (x
) == XFmode
4344 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4349 if (! ext_80387_constants_init
)
4350 init_ext_80387_constants ();
4352 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4353 for (i
= 0; i
< 5; i
++)
4354 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4361 /* Return the opcode of the special instruction to be used to load
4365 standard_80387_constant_opcode (rtx x
)
4367 switch (standard_80387_constant_p (x
))
4387 /* Return the CONST_DOUBLE representing the 80387 constant that is
4388 loaded by the specified special instruction. The argument IDX
4389 matches the return value from standard_80387_constant_p. */
4392 standard_80387_constant_rtx (int idx
)
4396 if (! ext_80387_constants_init
)
4397 init_ext_80387_constants ();
4413 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4417 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4420 standard_sse_constant_p (rtx x
)
4422 if (x
== const0_rtx
)
4424 return (x
== CONST0_RTX (GET_MODE (x
)));
4427 /* Returns 1 if OP contains a symbol reference */
4430 symbolic_reference_mentioned_p (rtx op
)
4435 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4438 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4439 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4445 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4446 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4450 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4457 /* Return 1 if it is appropriate to emit `ret' instructions in the
4458 body of a function. Do this only if the epilogue is simple, needing a
4459 couple of insns. Prior to reloading, we can't tell how many registers
4460 must be saved, so return 0 then. Return 0 if there is no frame
4461 marker to de-allocate.
4463 If NON_SAVING_SETJMP is defined and true, then it is not possible
4464 for the epilogue to be simple, so return 0. This is a special case
4465 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4466 until final, but jump_optimize may need to know sooner if a
4470 ix86_can_use_return_insn_p (void)
4472 struct ix86_frame frame
;
4474 #ifdef NON_SAVING_SETJMP
4475 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
4479 if (! reload_completed
|| frame_pointer_needed
)
4482 /* Don't allow more than 32 pop, since that's all we can do
4483 with one instruction. */
4484 if (current_function_pops_args
4485 && current_function_args_size
>= 32768)
4488 ix86_compute_frame_layout (&frame
);
4489 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4492 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4494 x86_64_sign_extended_value (rtx value
)
4496 switch (GET_CODE (value
))
4498 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4499 to be at least 32 and this all acceptable constants are
4500 represented as CONST_INT. */
4502 if (HOST_BITS_PER_WIDE_INT
== 32)
4506 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
4507 return trunc_int_for_mode (val
, SImode
) == val
;
4511 /* For certain code models, the symbolic references are known to fit.
4512 in CM_SMALL_PIC model we know it fits if it is local to the shared
4513 library. Don't count TLS SYMBOL_REFs here, since they should fit
4514 only if inside of UNSPEC handled below. */
4516 /* TLS symbols are not constant. */
4517 if (tls_symbolic_operand (value
, Pmode
))
4519 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
4521 /* For certain code models, the code is near as well. */
4523 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
4524 || ix86_cmodel
== CM_KERNEL
);
4526 /* We also may accept the offsetted memory references in certain special
4529 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
4530 switch (XINT (XEXP (value
, 0), 1))
4532 case UNSPEC_GOTPCREL
:
4534 case UNSPEC_GOTNTPOFF
:
4540 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4542 rtx op1
= XEXP (XEXP (value
, 0), 0);
4543 rtx op2
= XEXP (XEXP (value
, 0), 1);
4544 HOST_WIDE_INT offset
;
4546 if (ix86_cmodel
== CM_LARGE
)
4548 if (GET_CODE (op2
) != CONST_INT
)
4550 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
4551 switch (GET_CODE (op1
))
4554 /* For CM_SMALL assume that latest object is 16MB before
4555 end of 31bits boundary. We may also accept pretty
4556 large negative constants knowing that all objects are
4557 in the positive half of address space. */
4558 if (ix86_cmodel
== CM_SMALL
4559 && offset
< 16*1024*1024
4560 && trunc_int_for_mode (offset
, SImode
) == offset
)
4562 /* For CM_KERNEL we know that all object resist in the
4563 negative half of 32bits address space. We may not
4564 accept negative offsets, since they may be just off
4565 and we may accept pretty large positive ones. */
4566 if (ix86_cmodel
== CM_KERNEL
4568 && trunc_int_for_mode (offset
, SImode
) == offset
)
4572 /* These conditions are similar to SYMBOL_REF ones, just the
4573 constraints for code models differ. */
4574 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4575 && offset
< 16*1024*1024
4576 && trunc_int_for_mode (offset
, SImode
) == offset
)
4578 if (ix86_cmodel
== CM_KERNEL
4580 && trunc_int_for_mode (offset
, SImode
) == offset
)
4584 switch (XINT (op1
, 1))
4589 && trunc_int_for_mode (offset
, SImode
) == offset
)
4603 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4605 x86_64_zero_extended_value (rtx value
)
4607 switch (GET_CODE (value
))
4610 if (HOST_BITS_PER_WIDE_INT
== 32)
4611 return (GET_MODE (value
) == VOIDmode
4612 && !CONST_DOUBLE_HIGH (value
));
4616 if (HOST_BITS_PER_WIDE_INT
== 32)
4617 return INTVAL (value
) >= 0;
4619 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
4622 /* For certain code models, the symbolic references are known to fit. */
4624 /* TLS symbols are not constant. */
4625 if (tls_symbolic_operand (value
, Pmode
))
4627 return ix86_cmodel
== CM_SMALL
;
4629 /* For certain code models, the code is near as well. */
4631 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
4633 /* We also may accept the offsetted memory references in certain special
4636 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4638 rtx op1
= XEXP (XEXP (value
, 0), 0);
4639 rtx op2
= XEXP (XEXP (value
, 0), 1);
4641 if (ix86_cmodel
== CM_LARGE
)
4643 switch (GET_CODE (op1
))
4647 /* For small code model we may accept pretty large positive
4648 offsets, since one bit is available for free. Negative
4649 offsets are limited by the size of NULL pointer area
4650 specified by the ABI. */
4651 if (ix86_cmodel
== CM_SMALL
4652 && GET_CODE (op2
) == CONST_INT
4653 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4654 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4657 /* ??? For the kernel, we may accept adjustment of
4658 -0x10000000, since we know that it will just convert
4659 negative address space to positive, but perhaps this
4660 is not worthwhile. */
4663 /* These conditions are similar to SYMBOL_REF ones, just the
4664 constraints for code models differ. */
4665 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4666 && GET_CODE (op2
) == CONST_INT
4667 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4668 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4682 /* Value should be nonzero if functions must have frame pointers.
4683 Zero means the frame pointer need not be set up (and parms may
4684 be accessed via the stack pointer) in functions that seem suitable. */
4687 ix86_frame_pointer_required (void)
4689 /* If we accessed previous frames, then the generated code expects
4690 to be able to access the saved ebp value in our frame. */
4691 if (cfun
->machine
->accesses_prev_frame
)
4694 /* Several x86 os'es need a frame pointer for other reasons,
4695 usually pertaining to setjmp. */
4696 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4699 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4700 the frame pointer by default. Turn it back on now if we've not
4701 got a leaf function. */
4702 if (TARGET_OMIT_LEAF_FRAME_POINTER
4703 && (!current_function_is_leaf
))
4706 if (current_function_profile
)
4712 /* Record that the current function accesses previous call frames. */
4715 ix86_setup_frame_addresses (void)
4717 cfun
->machine
->accesses_prev_frame
= 1;
4720 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4721 # define USE_HIDDEN_LINKONCE 1
4723 # define USE_HIDDEN_LINKONCE 0
4726 static int pic_labels_used
;
4728 /* Fills in the label name that should be used for a pc thunk for
4729 the given register. */
4732 get_pc_thunk_name (char name
[32], unsigned int regno
)
4734 if (USE_HIDDEN_LINKONCE
)
4735 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4737 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4741 /* This function generates code for -fpic that loads %ebx with
4742 the return address of the caller and then returns. */
4745 ix86_file_end (void)
4750 for (regno
= 0; regno
< 8; ++regno
)
4754 if (! ((pic_labels_used
>> regno
) & 1))
4757 get_pc_thunk_name (name
, regno
);
4759 if (USE_HIDDEN_LINKONCE
)
4763 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4765 TREE_PUBLIC (decl
) = 1;
4766 TREE_STATIC (decl
) = 1;
4767 DECL_ONE_ONLY (decl
) = 1;
4769 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4770 named_section (decl
, NULL
, 0);
4772 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4773 fputs ("\t.hidden\t", asm_out_file
);
4774 assemble_name (asm_out_file
, name
);
4775 fputc ('\n', asm_out_file
);
4776 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4781 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4784 xops
[0] = gen_rtx_REG (SImode
, regno
);
4785 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4786 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4787 output_asm_insn ("ret", xops
);
4790 if (NEED_INDICATE_EXEC_STACK
)
4791 file_end_indicate_exec_stack ();
4794 /* Emit code for the SET_GOT patterns. */
4797 output_set_got (rtx dest
)
4802 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4804 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4806 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4809 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4811 output_asm_insn ("call\t%a2", xops
);
4814 /* Output the "canonical" label name ("Lxx$pb") here too. This
4815 is what will be referred to by the Mach-O PIC subsystem. */
4816 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4818 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4819 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4822 output_asm_insn ("pop{l}\t%0", xops
);
4827 get_pc_thunk_name (name
, REGNO (dest
));
4828 pic_labels_used
|= 1 << REGNO (dest
);
4830 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4831 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4832 output_asm_insn ("call\t%X2", xops
);
4835 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4836 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4837 else if (!TARGET_MACHO
)
4838 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4843 /* Generate an "push" pattern for input ARG. */
4848 return gen_rtx_SET (VOIDmode
,
4850 gen_rtx_PRE_DEC (Pmode
,
4851 stack_pointer_rtx
)),
4855 /* Return >= 0 if there is an unused call-clobbered register available
4856 for the entire function. */
4859 ix86_select_alt_pic_regnum (void)
4861 if (current_function_is_leaf
&& !current_function_profile
)
4864 for (i
= 2; i
>= 0; --i
)
4865 if (!regs_ever_live
[i
])
4869 return INVALID_REGNUM
;
4872 /* Return 1 if we need to save REGNO. */
4874 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4876 if (pic_offset_table_rtx
4877 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4878 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4879 || current_function_profile
4880 || current_function_calls_eh_return
4881 || current_function_uses_const_pool
))
4883 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4888 if (current_function_calls_eh_return
&& maybe_eh_return
)
4893 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4894 if (test
== INVALID_REGNUM
)
4901 return (regs_ever_live
[regno
]
4902 && !call_used_regs
[regno
]
4903 && !fixed_regs
[regno
]
4904 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4907 /* Return number of registers to be saved on the stack. */
4910 ix86_nsaved_regs (void)
4915 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4916 if (ix86_save_reg (regno
, true))
4921 /* Return the offset between two registers, one to be eliminated, and the other
4922 its replacement, at the start of a routine. */
4925 ix86_initial_elimination_offset (int from
, int to
)
4927 struct ix86_frame frame
;
4928 ix86_compute_frame_layout (&frame
);
4930 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4931 return frame
.hard_frame_pointer_offset
;
4932 else if (from
== FRAME_POINTER_REGNUM
4933 && to
== HARD_FRAME_POINTER_REGNUM
)
4934 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4937 if (to
!= STACK_POINTER_REGNUM
)
4939 else if (from
== ARG_POINTER_REGNUM
)
4940 return frame
.stack_pointer_offset
;
4941 else if (from
!= FRAME_POINTER_REGNUM
)
4944 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4948 /* Fill structure ix86_frame about frame of currently computed function. */
4951 ix86_compute_frame_layout (struct ix86_frame
*frame
)
4953 HOST_WIDE_INT total_size
;
4954 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4955 HOST_WIDE_INT offset
;
4956 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4957 HOST_WIDE_INT size
= get_frame_size ();
4959 frame
->nregs
= ix86_nsaved_regs ();
4962 /* During reload iteration the amount of registers saved can change.
4963 Recompute the value as needed. Do not recompute when amount of registers
4964 didn't change as reload does mutiple calls to the function and does not
4965 expect the decision to change within single iteration. */
4967 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4969 int count
= frame
->nregs
;
4971 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4972 /* The fast prologue uses move instead of push to save registers. This
4973 is significantly longer, but also executes faster as modern hardware
4974 can execute the moves in parallel, but can't do that for push/pop.
4976 Be careful about choosing what prologue to emit: When function takes
4977 many instructions to execute we may use slow version as well as in
4978 case function is known to be outside hot spot (this is known with
4979 feedback only). Weight the size of function by number of registers
4980 to save as it is cheap to use one or two push instructions but very
4981 slow to use many of them. */
4983 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4984 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4985 || (flag_branch_probabilities
4986 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4987 cfun
->machine
->use_fast_prologue_epilogue
= false;
4989 cfun
->machine
->use_fast_prologue_epilogue
4990 = !expensive_function_p (count
);
4992 if (TARGET_PROLOGUE_USING_MOVE
4993 && cfun
->machine
->use_fast_prologue_epilogue
)
4994 frame
->save_regs_using_mov
= true;
4996 frame
->save_regs_using_mov
= false;
4999 /* Skip return address and saved base pointer. */
5000 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5002 frame
->hard_frame_pointer_offset
= offset
;
5004 /* Do some sanity checking of stack_alignment_needed and
5005 preferred_alignment, since i386 port is the only using those features
5006 that may break easily. */
5008 if (size
&& !stack_alignment_needed
)
5010 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5012 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5014 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
5017 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5018 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5020 /* Register save area */
5021 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5024 if (ix86_save_varrargs_registers
)
5026 offset
+= X86_64_VARARGS_SIZE
;
5027 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5030 frame
->va_arg_size
= 0;
5032 /* Align start of frame for local function. */
5033 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5034 & -stack_alignment_needed
) - offset
;
5036 offset
+= frame
->padding1
;
5038 /* Frame pointer points here. */
5039 frame
->frame_pointer_offset
= offset
;
5043 /* Add outgoing arguments area. Can be skipped if we eliminated
5044 all the function calls as dead code.
5045 Skipping is however impossible when function calls alloca. Alloca
5046 expander assumes that last current_function_outgoing_args_size
5047 of stack frame are unused. */
5048 if (ACCUMULATE_OUTGOING_ARGS
5049 && (!current_function_is_leaf
|| current_function_calls_alloca
))
5051 offset
+= current_function_outgoing_args_size
;
5052 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5055 frame
->outgoing_arguments_size
= 0;
5057 /* Align stack boundary. Only needed if we're calling another function
5059 if (!current_function_is_leaf
|| current_function_calls_alloca
)
5060 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5061 & -preferred_alignment
) - offset
;
5063 frame
->padding2
= 0;
5065 offset
+= frame
->padding2
;
5067 /* We've reached end of stack frame. */
5068 frame
->stack_pointer_offset
= offset
;
5070 /* Size prologue needs to allocate. */
5071 frame
->to_allocate
=
5072 (size
+ frame
->padding1
+ frame
->padding2
5073 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5075 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5076 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5077 frame
->save_regs_using_mov
= false;
5079 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5080 && current_function_is_leaf
)
5082 frame
->red_zone_size
= frame
->to_allocate
;
5083 if (frame
->save_regs_using_mov
)
5084 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5085 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5086 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5089 frame
->red_zone_size
= 0;
5090 frame
->to_allocate
-= frame
->red_zone_size
;
5091 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5093 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5094 fprintf (stderr
, "size: %i\n", size
);
5095 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5096 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5097 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5098 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5099 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5100 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5101 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5102 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5103 frame
->hard_frame_pointer_offset
);
5104 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5108 /* Emit code to save registers in the prologue. */
5111 ix86_emit_save_regs (void)
5116 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5117 if (ix86_save_reg (regno
, true))
5119 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5120 RTX_FRAME_RELATED_P (insn
) = 1;
5124 /* Emit code to save registers using MOV insns. First register
5125 is restored from POINTER + OFFSET. */
5127 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5132 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5133 if (ix86_save_reg (regno
, true))
5135 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5137 gen_rtx_REG (Pmode
, regno
));
5138 RTX_FRAME_RELATED_P (insn
) = 1;
5139 offset
+= UNITS_PER_WORD
;
5143 /* Expand prologue or epilogue stack adjustment.
5144 The pattern exist to put a dependency on all ebp-based memory accesses.
5145 STYLE should be negative if instructions should be marked as frame related,
5146 zero if %r11 register is live and cannot be freely used and positive
5150 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5155 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5156 else if (x86_64_immediate_operand (offset
, DImode
))
5157 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5161 /* r11 is used by indirect sibcall return as well, set before the
5162 epilogue and used after the epilogue. ATM indirect sibcall
5163 shouldn't be used together with huge frame sizes in one
5164 function because of the frame_size check in sibcall.c. */
5167 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5168 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5170 RTX_FRAME_RELATED_P (insn
) = 1;
5171 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5175 RTX_FRAME_RELATED_P (insn
) = 1;
5178 /* Expand the prologue into a bunch of separate insns. */
5181 ix86_expand_prologue (void)
5185 struct ix86_frame frame
;
5186 HOST_WIDE_INT allocate
;
5188 ix86_compute_frame_layout (&frame
);
5190 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5191 slower on all targets. Also sdb doesn't like it. */
5193 if (frame_pointer_needed
)
5195 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5196 RTX_FRAME_RELATED_P (insn
) = 1;
5198 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5199 RTX_FRAME_RELATED_P (insn
) = 1;
5202 allocate
= frame
.to_allocate
;
5204 if (!frame
.save_regs_using_mov
)
5205 ix86_emit_save_regs ();
5207 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5209 /* When using red zone we may start register saving before allocating
5210 the stack frame saving one cycle of the prologue. */
5211 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5212 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5213 : stack_pointer_rtx
,
5214 -frame
.nregs
* UNITS_PER_WORD
);
5218 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5219 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5220 GEN_INT (-allocate
), -1);
5223 /* Only valid for Win32. */
5224 rtx eax
= gen_rtx_REG (SImode
, 0);
5225 bool eax_live
= ix86_eax_live_at_start_p ();
5232 emit_insn (gen_push (eax
));
5236 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
5237 RTX_FRAME_RELATED_P (insn
) = 1;
5239 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5240 RTX_FRAME_RELATED_P (insn
) = 1;
5244 rtx t
= plus_constant (stack_pointer_rtx
, allocate
);
5245 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5249 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5251 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5252 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5254 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5255 -frame
.nregs
* UNITS_PER_WORD
);
5258 pic_reg_used
= false;
5259 if (pic_offset_table_rtx
5260 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5261 || current_function_profile
))
5263 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5265 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5266 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5268 pic_reg_used
= true;
5273 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5275 /* Even with accurate pre-reload life analysis, we can wind up
5276 deleting all references to the pic register after reload.
5277 Consider if cross-jumping unifies two sides of a branch
5278 controlled by a comparison vs the only read from a global.
5279 In which case, allow the set_got to be deleted, though we're
5280 too late to do anything about the ebx save in the prologue. */
5281 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5284 /* Prevent function calls from be scheduled before the call to mcount.
5285 In the pic_reg_used case, make sure that the got load isn't deleted. */
5286 if (current_function_profile
)
5287 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5290 /* Emit code to restore saved registers using MOV insns. First register
5291 is restored from POINTER + OFFSET. */
5293 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5294 int maybe_eh_return
)
5297 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5299 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5300 if (ix86_save_reg (regno
, maybe_eh_return
))
5302 /* Ensure that adjust_address won't be forced to produce pointer
5303 out of range allowed by x86-64 instruction set. */
5304 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5308 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5309 emit_move_insn (r11
, GEN_INT (offset
));
5310 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5311 base_address
= gen_rtx_MEM (Pmode
, r11
);
5314 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5315 adjust_address (base_address
, Pmode
, offset
));
5316 offset
+= UNITS_PER_WORD
;
5320 /* Restore function stack, frame, and registers. */
5323 ix86_expand_epilogue (int style
)
5326 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5327 struct ix86_frame frame
;
5328 HOST_WIDE_INT offset
;
5330 ix86_compute_frame_layout (&frame
);
5332 /* Calculate start of saved registers relative to ebp. Special care
5333 must be taken for the normal return case of a function using
5334 eh_return: the eax and edx registers are marked as saved, but not
5335 restored along this path. */
5336 offset
= frame
.nregs
;
5337 if (current_function_calls_eh_return
&& style
!= 2)
5339 offset
*= -UNITS_PER_WORD
;
5341 /* If we're only restoring one register and sp is not valid then
5342 using a move instruction to restore the register since it's
5343 less work than reloading sp and popping the register.
5345 The default code result in stack adjustment using add/lea instruction,
5346 while this code results in LEAVE instruction (or discrete equivalent),
5347 so it is profitable in some other cases as well. Especially when there
5348 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5349 and there is exactly one register to pop. This heuristic may need some
5350 tuning in future. */
5351 if ((!sp_valid
&& frame
.nregs
<= 1)
5352 || (TARGET_EPILOGUE_USING_MOVE
5353 && cfun
->machine
->use_fast_prologue_epilogue
5354 && (frame
.nregs
> 1 || frame
.to_allocate
))
5355 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5356 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5357 && cfun
->machine
->use_fast_prologue_epilogue
5358 && frame
.nregs
== 1)
5359 || current_function_calls_eh_return
)
5361 /* Restore registers. We can use ebp or esp to address the memory
5362 locations. If both are available, default to ebp, since offsets
5363 are known to be small. Only exception is esp pointing directly to the
5364 end of block of saved registers, where we may simplify addressing
5367 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5368 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5369 frame
.to_allocate
, style
== 2);
5371 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5372 offset
, style
== 2);
5374 /* eh_return epilogues need %ecx added to the stack pointer. */
5377 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5379 if (frame_pointer_needed
)
5381 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5382 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5383 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5385 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5386 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5388 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5393 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5394 tmp
= plus_constant (tmp
, (frame
.to_allocate
5395 + frame
.nregs
* UNITS_PER_WORD
));
5396 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5399 else if (!frame_pointer_needed
)
5400 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5401 GEN_INT (frame
.to_allocate
5402 + frame
.nregs
* UNITS_PER_WORD
),
5404 /* If not an i386, mov & pop is faster than "leave". */
5405 else if (TARGET_USE_LEAVE
|| optimize_size
5406 || !cfun
->machine
->use_fast_prologue_epilogue
)
5407 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5410 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5411 hard_frame_pointer_rtx
,
5414 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5416 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5421 /* First step is to deallocate the stack frame so that we can
5422 pop the registers. */
5425 if (!frame_pointer_needed
)
5427 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5428 hard_frame_pointer_rtx
,
5429 GEN_INT (offset
), style
);
5431 else if (frame
.to_allocate
)
5432 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5433 GEN_INT (frame
.to_allocate
), style
);
5435 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5436 if (ix86_save_reg (regno
, false))
5439 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5441 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5443 if (frame_pointer_needed
)
5445 /* Leave results in shorter dependency chains on CPUs that are
5446 able to grok it fast. */
5447 if (TARGET_USE_LEAVE
)
5448 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5449 else if (TARGET_64BIT
)
5450 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5452 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5456 /* Sibcall epilogues don't want a return instruction. */
5460 if (current_function_pops_args
&& current_function_args_size
)
5462 rtx popc
= GEN_INT (current_function_pops_args
);
5464 /* i386 can only pop 64K bytes. If asked to pop more, pop
5465 return address, do explicit add, and jump indirectly to the
5468 if (current_function_pops_args
>= 65536)
5470 rtx ecx
= gen_rtx_REG (SImode
, 2);
5472 /* There is no "pascal" calling convention in 64bit ABI. */
5476 emit_insn (gen_popsi1 (ecx
));
5477 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5478 emit_jump_insn (gen_return_indirect_internal (ecx
));
5481 emit_jump_insn (gen_return_pop_internal (popc
));
5484 emit_jump_insn (gen_return_internal ());
5487 /* Reset from the function's potential modifications. */
5490 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5491 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5493 if (pic_offset_table_rtx
)
5494 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5497 /* Extract the parts of an RTL expression that is a valid memory address
5498 for an instruction. Return 0 if the structure of the address is
5499 grossly off. Return -1 if the address contains ASHIFT, so it is not
5500 strictly valid, but still used for computing length of lea instruction. */
5503 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5505 rtx base
= NULL_RTX
;
5506 rtx index
= NULL_RTX
;
5507 rtx disp
= NULL_RTX
;
5508 HOST_WIDE_INT scale
= 1;
5509 rtx scale_rtx
= NULL_RTX
;
5511 enum ix86_address_seg seg
= SEG_DEFAULT
;
5513 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5515 else if (GET_CODE (addr
) == PLUS
)
5525 addends
[n
++] = XEXP (op
, 1);
5528 while (GET_CODE (op
) == PLUS
);
5533 for (i
= n
; i
>= 0; --i
)
5536 switch (GET_CODE (op
))
5541 index
= XEXP (op
, 0);
5542 scale_rtx
= XEXP (op
, 1);
5546 if (XINT (op
, 1) == UNSPEC_TP
5547 && TARGET_TLS_DIRECT_SEG_REFS
5548 && seg
== SEG_DEFAULT
)
5549 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5578 else if (GET_CODE (addr
) == MULT
)
5580 index
= XEXP (addr
, 0); /* index*scale */
5581 scale_rtx
= XEXP (addr
, 1);
5583 else if (GET_CODE (addr
) == ASHIFT
)
5587 /* We're called for lea too, which implements ashift on occasion. */
5588 index
= XEXP (addr
, 0);
5589 tmp
= XEXP (addr
, 1);
5590 if (GET_CODE (tmp
) != CONST_INT
)
5592 scale
= INTVAL (tmp
);
5593 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5599 disp
= addr
; /* displacement */
5601 /* Extract the integral value of scale. */
5604 if (GET_CODE (scale_rtx
) != CONST_INT
)
5606 scale
= INTVAL (scale_rtx
);
5609 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5610 if (base
&& index
&& scale
== 1
5611 && (index
== arg_pointer_rtx
5612 || index
== frame_pointer_rtx
5613 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
5620 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5621 if ((base
== hard_frame_pointer_rtx
5622 || base
== frame_pointer_rtx
5623 || base
== arg_pointer_rtx
) && !disp
)
5626 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5627 Avoid this by transforming to [%esi+0]. */
5628 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5629 && base
&& !index
&& !disp
5631 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
5634 /* Special case: encode reg+reg instead of reg*2. */
5635 if (!base
&& index
&& scale
&& scale
== 2)
5636 base
= index
, scale
= 1;
5638 /* Special case: scaling cannot be encoded without base or displacement. */
5639 if (!base
&& !disp
&& index
&& scale
!= 1)
5651 /* Return cost of the memory address x.
5652 For i386, it is better to use a complex address than let gcc copy
5653 the address into a reg and make a new pseudo. But not if the address
5654 requires to two regs - that would mean more pseudos with longer
5657 ix86_address_cost (rtx x
)
5659 struct ix86_address parts
;
5662 if (!ix86_decompose_address (x
, &parts
))
5665 /* More complex memory references are better. */
5666 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5668 if (parts
.seg
!= SEG_DEFAULT
)
5671 /* Attempt to minimize number of registers in the address. */
5673 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5675 && (!REG_P (parts
.index
)
5676 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5680 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5682 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5683 && parts
.base
!= parts
.index
)
5686 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5687 since it's predecode logic can't detect the length of instructions
5688 and it degenerates to vector decoded. Increase cost of such
5689 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5690 to split such addresses or even refuse such addresses at all.
5692 Following addressing modes are affected:
5697 The first and last case may be avoidable by explicitly coding the zero in
5698 memory address, but I don't have AMD-K6 machine handy to check this
5702 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5703 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5704 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5710 /* If X is a machine specific address (i.e. a symbol or label being
5711 referenced as a displacement from the GOT implemented using an
5712 UNSPEC), then return the base term. Otherwise return X. */
5715 ix86_find_base_term (rtx x
)
5721 if (GET_CODE (x
) != CONST
)
5724 if (GET_CODE (term
) == PLUS
5725 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5726 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5727 term
= XEXP (term
, 0);
5728 if (GET_CODE (term
) != UNSPEC
5729 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5732 term
= XVECEXP (term
, 0, 0);
5734 if (GET_CODE (term
) != SYMBOL_REF
5735 && GET_CODE (term
) != LABEL_REF
)
5741 term
= ix86_delegitimize_address (x
);
5743 if (GET_CODE (term
) != SYMBOL_REF
5744 && GET_CODE (term
) != LABEL_REF
)
5750 /* Determine if a given RTX is a valid constant. We already know this
5751 satisfies CONSTANT_P. */
5754 legitimate_constant_p (rtx x
)
5758 switch (GET_CODE (x
))
5761 /* TLS symbols are not constant. */
5762 if (tls_symbolic_operand (x
, Pmode
))
5767 inner
= XEXP (x
, 0);
5769 /* Offsets of TLS symbols are never valid.
5770 Discourage CSE from creating them. */
5771 if (GET_CODE (inner
) == PLUS
5772 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5775 if (GET_CODE (inner
) == PLUS
)
5777 if (GET_CODE (XEXP (inner
, 1)) != CONST_INT
)
5779 inner
= XEXP (inner
, 0);
5782 /* Only some unspecs are valid as "constants". */
5783 if (GET_CODE (inner
) == UNSPEC
)
5784 switch (XINT (inner
, 1))
5788 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5790 return local_dynamic_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5800 /* Otherwise we handle everything else in the move patterns. */
5804 /* Determine if it's legal to put X into the constant pool. This
5805 is not possible for the address of thread-local symbols, which
5806 is checked above. */
5809 ix86_cannot_force_const_mem (rtx x
)
5811 return !legitimate_constant_p (x
);
5814 /* Determine if a given RTX is a valid constant address. */
5817 constant_address_p (rtx x
)
5819 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
5822 /* Nonzero if the constant value X is a legitimate general operand
5823 when generating PIC code. It is given that flag_pic is on and
5824 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5827 legitimate_pic_operand_p (rtx x
)
5831 switch (GET_CODE (x
))
5834 inner
= XEXP (x
, 0);
5836 /* Only some unspecs are valid as "constants". */
5837 if (GET_CODE (inner
) == UNSPEC
)
5838 switch (XINT (inner
, 1))
5841 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5849 return legitimate_pic_address_disp_p (x
);
5856 /* Determine if a given CONST RTX is a valid memory displacement
5860 legitimate_pic_address_disp_p (rtx disp
)
5864 /* In 64bit mode we can allow direct addresses of symbols and labels
5865 when they are not dynamic symbols. */
5868 /* TLS references should always be enclosed in UNSPEC. */
5869 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5871 if (GET_CODE (disp
) == SYMBOL_REF
5872 && ix86_cmodel
== CM_SMALL_PIC
5873 && SYMBOL_REF_LOCAL_P (disp
))
5875 if (GET_CODE (disp
) == LABEL_REF
)
5877 if (GET_CODE (disp
) == CONST
5878 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
5880 rtx op0
= XEXP (XEXP (disp
, 0), 0);
5881 rtx op1
= XEXP (XEXP (disp
, 0), 1);
5883 /* TLS references should always be enclosed in UNSPEC. */
5884 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
5886 if (((GET_CODE (op0
) == SYMBOL_REF
5887 && ix86_cmodel
== CM_SMALL_PIC
5888 && SYMBOL_REF_LOCAL_P (op0
))
5889 || GET_CODE (op0
) == LABEL_REF
)
5890 && GET_CODE (op1
) == CONST_INT
5891 && INTVAL (op1
) < 16*1024*1024
5892 && INTVAL (op1
) >= -16*1024*1024)
5896 if (GET_CODE (disp
) != CONST
)
5898 disp
= XEXP (disp
, 0);
5902 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5903 of GOT tables. We should not need these anyway. */
5904 if (GET_CODE (disp
) != UNSPEC
5905 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5908 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5909 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5915 if (GET_CODE (disp
) == PLUS
)
5917 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5919 disp
= XEXP (disp
, 0);
5923 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5924 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5926 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5927 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5928 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5930 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5931 if (! strcmp (sym_name
, "<pic base>"))
5936 if (GET_CODE (disp
) != UNSPEC
)
5939 switch (XINT (disp
, 1))
5944 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5946 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
5947 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
5948 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5950 case UNSPEC_GOTTPOFF
:
5951 case UNSPEC_GOTNTPOFF
:
5952 case UNSPEC_INDNTPOFF
:
5955 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5957 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5959 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5965 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5966 memory address for an instruction. The MODE argument is the machine mode
5967 for the MEM expression that wants to use this address.
5969 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5970 convert common non-canonical forms to canonical form so that they will
5974 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5976 struct ix86_address parts
;
5977 rtx base
, index
, disp
;
5978 HOST_WIDE_INT scale
;
5979 const char *reason
= NULL
;
5980 rtx reason_rtx
= NULL_RTX
;
5982 if (TARGET_DEBUG_ADDR
)
5985 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5986 GET_MODE_NAME (mode
), strict
);
5990 if (ix86_decompose_address (addr
, &parts
) <= 0)
5992 reason
= "decomposition failed";
5997 index
= parts
.index
;
5999 scale
= parts
.scale
;
6001 /* Validate base register.
6003 Don't allow SUBREG's here, it can lead to spill failures when the base
6004 is one word out of a two word structure, which is represented internally
6011 if (GET_CODE (base
) != REG
)
6013 reason
= "base is not a register";
6017 if (GET_MODE (base
) != Pmode
)
6019 reason
= "base is not in Pmode";
6023 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
6024 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
6026 reason
= "base is not valid";
6031 /* Validate index register.
6033 Don't allow SUBREG's here, it can lead to spill failures when the index
6034 is one word out of a two word structure, which is represented internally
6041 if (GET_CODE (index
) != REG
)
6043 reason
= "index is not a register";
6047 if (GET_MODE (index
) != Pmode
)
6049 reason
= "index is not in Pmode";
6053 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
6054 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
6056 reason
= "index is not valid";
6061 /* Validate scale factor. */
6064 reason_rtx
= GEN_INT (scale
);
6067 reason
= "scale without index";
6071 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6073 reason
= "scale is not a valid multiplier";
6078 /* Validate displacement. */
6083 if (GET_CODE (disp
) == CONST
6084 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6085 switch (XINT (XEXP (disp
, 0), 1))
6089 case UNSPEC_GOTPCREL
:
6092 goto is_legitimate_pic
;
6094 case UNSPEC_GOTTPOFF
:
6095 case UNSPEC_GOTNTPOFF
:
6096 case UNSPEC_INDNTPOFF
:
6102 reason
= "invalid address unspec";
6106 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
6108 && !machopic_operand_p (disp
)
6113 if (TARGET_64BIT
&& (index
|| base
))
6115 /* foo@dtpoff(%rX) is ok. */
6116 if (GET_CODE (disp
) != CONST
6117 || GET_CODE (XEXP (disp
, 0)) != PLUS
6118 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6119 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6120 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6121 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6123 reason
= "non-constant pic memory reference";
6127 else if (! legitimate_pic_address_disp_p (disp
))
6129 reason
= "displacement is an invalid pic construct";
6133 /* This code used to verify that a symbolic pic displacement
6134 includes the pic_offset_table_rtx register.
6136 While this is good idea, unfortunately these constructs may
6137 be created by "adds using lea" optimization for incorrect
6146 This code is nonsensical, but results in addressing
6147 GOT table with pic_offset_table_rtx base. We can't
6148 just refuse it easily, since it gets matched by
6149 "addsi3" pattern, that later gets split to lea in the
6150 case output register differs from input. While this
6151 can be handled by separate addsi pattern for this case
6152 that never results in lea, this seems to be easier and
6153 correct fix for crash to disable this test. */
6155 else if (GET_CODE (disp
) != LABEL_REF
6156 && GET_CODE (disp
) != CONST_INT
6157 && (GET_CODE (disp
) != CONST
6158 || !legitimate_constant_p (disp
))
6159 && (GET_CODE (disp
) != SYMBOL_REF
6160 || !legitimate_constant_p (disp
)))
6162 reason
= "displacement is not constant";
6165 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
6167 reason
= "displacement is out of range";
6172 /* Everything looks valid. */
6173 if (TARGET_DEBUG_ADDR
)
6174 fprintf (stderr
, "Success.\n");
6178 if (TARGET_DEBUG_ADDR
)
6180 fprintf (stderr
, "Error: %s\n", reason
);
6181 debug_rtx (reason_rtx
);
6186 /* Return an unique alias set for the GOT. */
6188 static HOST_WIDE_INT
6189 ix86_GOT_alias_set (void)
6191 static HOST_WIDE_INT set
= -1;
6193 set
= new_alias_set ();
6197 /* Return a legitimate reference for ORIG (an address) using the
6198 register REG. If REG is 0, a new pseudo is generated.
6200 There are two types of references that must be handled:
6202 1. Global data references must load the address from the GOT, via
6203 the PIC reg. An insn is emitted to do this load, and the reg is
6206 2. Static data references, constant pool addresses, and code labels
6207 compute the address as an offset from the GOT, whose base is in
6208 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6209 differentiate them from global data objects. The returned
6210 address is the PIC reg + an unspec constant.
6212 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6213 reg also appears in the address. */
6216 legitimize_pic_address (rtx orig
, rtx reg
)
6224 reg
= gen_reg_rtx (Pmode
);
6225 /* Use the generic Mach-O PIC machinery. */
6226 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6229 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6231 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6233 /* This symbol may be referenced via a displacement from the PIC
6234 base address (@GOTOFF). */
6236 if (reload_in_progress
)
6237 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6238 if (GET_CODE (addr
) == CONST
)
6239 addr
= XEXP (addr
, 0);
6240 if (GET_CODE (addr
) == PLUS
)
6242 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6243 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6246 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6247 new = gen_rtx_CONST (Pmode
, new);
6248 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6252 emit_move_insn (reg
, new);
6256 else if (GET_CODE (addr
) == SYMBOL_REF
)
6260 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6261 new = gen_rtx_CONST (Pmode
, new);
6262 new = gen_rtx_MEM (Pmode
, new);
6263 RTX_UNCHANGING_P (new) = 1;
6264 set_mem_alias_set (new, ix86_GOT_alias_set ());
6267 reg
= gen_reg_rtx (Pmode
);
6268 /* Use directly gen_movsi, otherwise the address is loaded
6269 into register for CSE. We don't want to CSE this addresses,
6270 instead we CSE addresses from the GOT table, so skip this. */
6271 emit_insn (gen_movsi (reg
, new));
6276 /* This symbol must be referenced via a load from the
6277 Global Offset Table (@GOT). */
6279 if (reload_in_progress
)
6280 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6281 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6282 new = gen_rtx_CONST (Pmode
, new);
6283 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6284 new = gen_rtx_MEM (Pmode
, new);
6285 RTX_UNCHANGING_P (new) = 1;
6286 set_mem_alias_set (new, ix86_GOT_alias_set ());
6289 reg
= gen_reg_rtx (Pmode
);
6290 emit_move_insn (reg
, new);
6296 if (GET_CODE (addr
) == CONST
)
6298 addr
= XEXP (addr
, 0);
6300 /* We must match stuff we generate before. Assume the only
6301 unspecs that can get here are ours. Not that we could do
6302 anything with them anyway.... */
6303 if (GET_CODE (addr
) == UNSPEC
6304 || (GET_CODE (addr
) == PLUS
6305 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6307 if (GET_CODE (addr
) != PLUS
)
6310 if (GET_CODE (addr
) == PLUS
)
6312 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6314 /* Check first to see if this is a constant offset from a @GOTOFF
6315 symbol reference. */
6316 if (local_symbolic_operand (op0
, Pmode
)
6317 && GET_CODE (op1
) == CONST_INT
)
6321 if (reload_in_progress
)
6322 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6323 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6325 new = gen_rtx_PLUS (Pmode
, new, op1
);
6326 new = gen_rtx_CONST (Pmode
, new);
6327 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6331 emit_move_insn (reg
, new);
6337 if (INTVAL (op1
) < -16*1024*1024
6338 || INTVAL (op1
) >= 16*1024*1024)
6339 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
6344 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6345 new = legitimize_pic_address (XEXP (addr
, 1),
6346 base
== reg
? NULL_RTX
: reg
);
6348 if (GET_CODE (new) == CONST_INT
)
6349 new = plus_constant (base
, INTVAL (new));
6352 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6354 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6355 new = XEXP (new, 1);
6357 new = gen_rtx_PLUS (Pmode
, base
, new);
6365 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6368 get_thread_pointer (int to_reg
)
6372 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6376 reg
= gen_reg_rtx (Pmode
);
6377 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6378 insn
= emit_insn (insn
);
6383 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6384 false if we expect this to be used for a memory address and true if
6385 we expect to load the address into a register. */
6388 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6390 rtx dest
, base
, off
, pic
;
6395 case TLS_MODEL_GLOBAL_DYNAMIC
:
6396 dest
= gen_reg_rtx (Pmode
);
6399 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6402 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6403 insns
= get_insns ();
6406 emit_libcall_block (insns
, dest
, rax
, x
);
6409 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6412 case TLS_MODEL_LOCAL_DYNAMIC
:
6413 base
= gen_reg_rtx (Pmode
);
6416 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6419 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6420 insns
= get_insns ();
6423 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6424 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6425 emit_libcall_block (insns
, base
, rax
, note
);
6428 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6430 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6431 off
= gen_rtx_CONST (Pmode
, off
);
6433 return gen_rtx_PLUS (Pmode
, base
, off
);
6435 case TLS_MODEL_INITIAL_EXEC
:
6439 type
= UNSPEC_GOTNTPOFF
;
6443 if (reload_in_progress
)
6444 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6445 pic
= pic_offset_table_rtx
;
6446 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6448 else if (!TARGET_GNU_TLS
)
6450 pic
= gen_reg_rtx (Pmode
);
6451 emit_insn (gen_set_got (pic
));
6452 type
= UNSPEC_GOTTPOFF
;
6457 type
= UNSPEC_INDNTPOFF
;
6460 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6461 off
= gen_rtx_CONST (Pmode
, off
);
6463 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6464 off
= gen_rtx_MEM (Pmode
, off
);
6465 RTX_UNCHANGING_P (off
) = 1;
6466 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6468 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6470 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6471 off
= force_reg (Pmode
, off
);
6472 return gen_rtx_PLUS (Pmode
, base
, off
);
6476 base
= get_thread_pointer (true);
6477 dest
= gen_reg_rtx (Pmode
);
6478 emit_insn (gen_subsi3 (dest
, base
, off
));
6482 case TLS_MODEL_LOCAL_EXEC
:
6483 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6484 (TARGET_64BIT
|| TARGET_GNU_TLS
)
6485 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6486 off
= gen_rtx_CONST (Pmode
, off
);
6488 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6490 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6491 return gen_rtx_PLUS (Pmode
, base
, off
);
6495 base
= get_thread_pointer (true);
6496 dest
= gen_reg_rtx (Pmode
);
6497 emit_insn (gen_subsi3 (dest
, base
, off
));
6508 /* Try machine-dependent ways of modifying an illegitimate address
6509 to be legitimate. If we find one, return the new, valid address.
6510 This macro is used in only one place: `memory_address' in explow.c.
6512 OLDX is the address as it was before break_out_memory_refs was called.
6513 In some cases it is useful to look at this to decide what needs to be done.
6515 MODE and WIN are passed so that this macro can use
6516 GO_IF_LEGITIMATE_ADDRESS.
6518 It is always safe for this macro to do nothing. It exists to recognize
6519 opportunities to optimize the output.
6521 For the 80386, we handle X+REG by loading X into a register R and
6522 using R+REG. R will go in a general reg and indexing will be used.
6523 However, if REG is a broken-out memory address or multiplication,
6524 nothing needs to be done because REG can certainly go in a general reg.
6526 When -fpic is used, special handling is needed for symbolic references.
6527 See comments by legitimize_pic_address in i386.c for details. */
6530 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6535 if (TARGET_DEBUG_ADDR
)
6537 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6538 GET_MODE_NAME (mode
));
6542 log
= tls_symbolic_operand (x
, mode
);
6544 return legitimize_tls_address (x
, log
, false);
6546 if (flag_pic
&& SYMBOLIC_CONST (x
))
6547 return legitimize_pic_address (x
, 0);
6549 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6550 if (GET_CODE (x
) == ASHIFT
6551 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6552 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
6555 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6556 GEN_INT (1 << log
));
6559 if (GET_CODE (x
) == PLUS
)
6561 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6563 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6564 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6565 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
6568 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6569 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6570 GEN_INT (1 << log
));
6573 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6574 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6575 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
6578 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6579 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6580 GEN_INT (1 << log
));
6583 /* Put multiply first if it isn't already. */
6584 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6586 rtx tmp
= XEXP (x
, 0);
6587 XEXP (x
, 0) = XEXP (x
, 1);
6592 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6593 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6594 created by virtual register instantiation, register elimination, and
6595 similar optimizations. */
6596 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6599 x
= gen_rtx_PLUS (Pmode
,
6600 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6601 XEXP (XEXP (x
, 1), 0)),
6602 XEXP (XEXP (x
, 1), 1));
6606 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6607 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6608 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6609 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6610 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6611 && CONSTANT_P (XEXP (x
, 1)))
6614 rtx other
= NULL_RTX
;
6616 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6618 constant
= XEXP (x
, 1);
6619 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6621 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6623 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6624 other
= XEXP (x
, 1);
6632 x
= gen_rtx_PLUS (Pmode
,
6633 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6634 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6635 plus_constant (other
, INTVAL (constant
)));
6639 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6642 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6645 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6648 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6651 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6655 && GET_CODE (XEXP (x
, 1)) == REG
6656 && GET_CODE (XEXP (x
, 0)) == REG
)
6659 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6662 x
= legitimize_pic_address (x
, 0);
6665 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6668 if (GET_CODE (XEXP (x
, 0)) == REG
)
6670 rtx temp
= gen_reg_rtx (Pmode
);
6671 rtx val
= force_operand (XEXP (x
, 1), temp
);
6673 emit_move_insn (temp
, val
);
6679 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6681 rtx temp
= gen_reg_rtx (Pmode
);
6682 rtx val
= force_operand (XEXP (x
, 0), temp
);
6684 emit_move_insn (temp
, val
);
6694 /* Print an integer constant expression in assembler syntax. Addition
6695 and subtraction are the only arithmetic that may appear in these
6696 expressions. FILE is the stdio stream to write to, X is the rtx, and
6697 CODE is the operand print code from the output string. */
6700 output_pic_addr_const (FILE *file
, rtx x
, int code
)
6704 switch (GET_CODE (x
))
6714 assemble_name (file
, XSTR (x
, 0));
6715 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
6716 fputs ("@PLT", file
);
6723 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6724 assemble_name (asm_out_file
, buf
);
6728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6732 /* This used to output parentheses around the expression,
6733 but that does not work on the 386 (either ATT or BSD assembler). */
6734 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6738 if (GET_MODE (x
) == VOIDmode
)
6740 /* We can use %d if the number is <32 bits and positive. */
6741 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6742 fprintf (file
, "0x%lx%08lx",
6743 (unsigned long) CONST_DOUBLE_HIGH (x
),
6744 (unsigned long) CONST_DOUBLE_LOW (x
));
6746 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6749 /* We can't handle floating point constants;
6750 PRINT_OPERAND must handle them. */
6751 output_operand_lossage ("floating constant misused");
6755 /* Some assemblers need integer constants to appear first. */
6756 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6758 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6760 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6762 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6764 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6766 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6774 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6775 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6777 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6779 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6783 if (XVECLEN (x
, 0) != 1)
6785 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6786 switch (XINT (x
, 1))
6789 fputs ("@GOT", file
);
6792 fputs ("@GOTOFF", file
);
6794 case UNSPEC_GOTPCREL
:
6795 fputs ("@GOTPCREL(%rip)", file
);
6797 case UNSPEC_GOTTPOFF
:
6798 /* FIXME: This might be @TPOFF in Sun ld too. */
6799 fputs ("@GOTTPOFF", file
);
6802 fputs ("@TPOFF", file
);
6806 fputs ("@TPOFF", file
);
6808 fputs ("@NTPOFF", file
);
6811 fputs ("@DTPOFF", file
);
6813 case UNSPEC_GOTNTPOFF
:
6815 fputs ("@GOTTPOFF(%rip)", file
);
6817 fputs ("@GOTNTPOFF", file
);
6819 case UNSPEC_INDNTPOFF
:
6820 fputs ("@INDNTPOFF", file
);
6823 output_operand_lossage ("invalid UNSPEC as operand");
6829 output_operand_lossage ("invalid expression as operand");
6833 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6834 We need to handle our special PIC relocations. */
6837 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
6840 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6844 fprintf (file
, "%s", ASM_LONG
);
6847 output_pic_addr_const (file
, x
, '\0');
6849 output_addr_const (file
, x
);
6853 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6854 We need to emit DTP-relative relocations. */
6857 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
6859 fputs (ASM_LONG
, file
);
6860 output_addr_const (file
, x
);
6861 fputs ("@DTPOFF", file
);
6867 fputs (", 0", file
);
6874 /* In the name of slightly smaller debug output, and to cater to
6875 general assembler losage, recognize PIC+GOTOFF and turn it back
6876 into a direct symbol reference. */
6879 ix86_delegitimize_address (rtx orig_x
)
6883 if (GET_CODE (x
) == MEM
)
6888 if (GET_CODE (x
) != CONST
6889 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6890 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6891 || GET_CODE (orig_x
) != MEM
)
6893 return XVECEXP (XEXP (x
, 0), 0, 0);
6896 if (GET_CODE (x
) != PLUS
6897 || GET_CODE (XEXP (x
, 1)) != CONST
)
6900 if (GET_CODE (XEXP (x
, 0)) == REG
6901 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6902 /* %ebx + GOT/GOTOFF */
6904 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6906 /* %ebx + %reg * scale + GOT/GOTOFF */
6908 if (GET_CODE (XEXP (y
, 0)) == REG
6909 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6911 else if (GET_CODE (XEXP (y
, 1)) == REG
6912 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6916 if (GET_CODE (y
) != REG
6917 && GET_CODE (y
) != MULT
6918 && GET_CODE (y
) != ASHIFT
)
6924 x
= XEXP (XEXP (x
, 1), 0);
6925 if (GET_CODE (x
) == UNSPEC
6926 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6927 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6930 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6931 return XVECEXP (x
, 0, 0);
6934 if (GET_CODE (x
) == PLUS
6935 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6936 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6937 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6938 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6939 && GET_CODE (orig_x
) != MEM
)))
6941 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6943 return gen_rtx_PLUS (Pmode
, y
, x
);
6951 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6956 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6958 enum rtx_code second_code
, bypass_code
;
6959 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6960 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6962 code
= ix86_fp_compare_code_to_integer (code
);
6966 code
= reverse_condition (code
);
6977 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6982 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6983 Those same assemblers have the same but opposite losage on cmov. */
6986 suffix
= fp
? "nbe" : "a";
6989 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6991 else if (mode
== CCmode
|| mode
== CCGCmode
)
7002 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
7004 else if (mode
== CCmode
|| mode
== CCGCmode
)
7013 suffix
= fp
? "nb" : "ae";
7016 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
7026 suffix
= fp
? "u" : "p";
7029 suffix
= fp
? "nu" : "np";
7034 fputs (suffix
, file
);
7037 /* Print the name of register X to FILE based on its machine mode and number.
7038 If CODE is 'w', pretend the mode is HImode.
7039 If CODE is 'b', pretend the mode is QImode.
7040 If CODE is 'k', pretend the mode is SImode.
7041 If CODE is 'q', pretend the mode is DImode.
7042 If CODE is 'h', pretend the reg is the `high' byte register.
7043 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7046 print_reg (rtx x
, int code
, FILE *file
)
7048 if (REGNO (x
) == ARG_POINTER_REGNUM
7049 || REGNO (x
) == FRAME_POINTER_REGNUM
7050 || REGNO (x
) == FLAGS_REG
7051 || REGNO (x
) == FPSR_REG
)
7054 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7057 if (code
== 'w' || MMX_REG_P (x
))
7059 else if (code
== 'b')
7061 else if (code
== 'k')
7063 else if (code
== 'q')
7065 else if (code
== 'y')
7067 else if (code
== 'h')
7070 code
= GET_MODE_SIZE (GET_MODE (x
));
7072 /* Irritatingly, AMD extended registers use different naming convention
7073 from the normal registers. */
7074 if (REX_INT_REG_P (x
))
7081 error ("extended registers have no high halves");
7084 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7087 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7090 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7093 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7096 error ("unsupported operand size for extended register");
7104 if (STACK_TOP_P (x
))
7106 fputs ("st(0)", file
);
7113 if (! ANY_FP_REG_P (x
))
7114 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7119 fputs (hi_reg_name
[REGNO (x
)], file
);
7122 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7124 fputs (qi_reg_name
[REGNO (x
)], file
);
7127 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7129 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7136 /* Locate some local-dynamic symbol still in use by this function
7137 so that we can print its name in some tls_local_dynamic_base
7141 get_some_local_dynamic_name (void)
7145 if (cfun
->machine
->some_ld_name
)
7146 return cfun
->machine
->some_ld_name
;
7148 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7150 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7151 return cfun
->machine
->some_ld_name
;
7157 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7161 if (GET_CODE (x
) == SYMBOL_REF
7162 && local_dynamic_symbolic_operand (x
, Pmode
))
7164 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7172 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7173 C -- print opcode suffix for set/cmov insn.
7174 c -- like C, but print reversed condition
7175 F,f -- likewise, but for floating-point.
7176 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7178 R -- print the prefix for register names.
7179 z -- print the opcode suffix for the size of the current operand.
7180 * -- print a star (in certain assembler syntax)
7181 A -- print an absolute memory reference.
7182 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7183 s -- print a shift double count, followed by the assemblers argument
7185 b -- print the QImode name of the register for the indicated operand.
7186 %b0 would print %al if operands[0] is reg 0.
7187 w -- likewise, print the HImode name of the register.
7188 k -- likewise, print the SImode name of the register.
7189 q -- likewise, print the DImode name of the register.
7190 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7191 y -- print "st(0)" instead of "st" as a register.
7192 D -- print condition for SSE cmp instruction.
7193 P -- if PIC, print an @PLT suffix.
7194 X -- don't print any sort of PIC '@' suffix for a symbol.
7195 & -- print some in-use local-dynamic symbol name.
7199 print_operand (FILE *file
, rtx x
, int code
)
7206 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7211 assemble_name (file
, get_some_local_dynamic_name ());
7215 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7217 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7219 /* Intel syntax. For absolute addresses, registers should not
7220 be surrounded by braces. */
7221 if (GET_CODE (x
) != REG
)
7224 PRINT_OPERAND (file
, x
, 0);
7232 PRINT_OPERAND (file
, x
, 0);
7237 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7242 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7247 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7252 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7257 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7262 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7267 /* 387 opcodes don't get size suffixes if the operands are
7269 if (STACK_REG_P (x
))
7272 /* Likewise if using Intel opcodes. */
7273 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7276 /* This is the size of op from size of operand. */
7277 switch (GET_MODE_SIZE (GET_MODE (x
)))
7280 #ifdef HAVE_GAS_FILDS_FISTS
7286 if (GET_MODE (x
) == SFmode
)
7301 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7303 #ifdef GAS_MNEMONICS
7329 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7331 PRINT_OPERAND (file
, x
, 0);
7337 /* Little bit of braindamage here. The SSE compare instructions
7338 does use completely different names for the comparisons that the
7339 fp conditional moves. */
7340 switch (GET_CODE (x
))
7355 fputs ("unord", file
);
7359 fputs ("neq", file
);
7363 fputs ("nlt", file
);
7367 fputs ("nle", file
);
7370 fputs ("ord", file
);
7378 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7379 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7381 switch (GET_MODE (x
))
7383 case HImode
: putc ('w', file
); break;
7385 case SFmode
: putc ('l', file
); break;
7387 case DFmode
: putc ('q', file
); break;
7395 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7398 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7399 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7402 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7405 /* Like above, but reverse condition */
7407 /* Check to see if argument to %c is really a constant
7408 and not a condition code which needs to be reversed. */
7409 if (!COMPARISON_P (x
))
7411 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7414 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7417 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7418 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7421 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7427 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7430 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7433 int pred_val
= INTVAL (XEXP (x
, 0));
7435 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7436 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7438 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7439 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7441 /* Emit hints only in the case default branch prediction
7442 heuristics would fail. */
7443 if (taken
!= cputaken
)
7445 /* We use 3e (DS) prefix for taken branches and
7446 2e (CS) prefix for not taken branches. */
7448 fputs ("ds ; ", file
);
7450 fputs ("cs ; ", file
);
7457 output_operand_lossage ("invalid operand code `%c'", code
);
7461 if (GET_CODE (x
) == REG
)
7462 print_reg (x
, code
, file
);
7464 else if (GET_CODE (x
) == MEM
)
7466 /* No `byte ptr' prefix for call instructions. */
7467 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7470 switch (GET_MODE_SIZE (GET_MODE (x
)))
7472 case 1: size
= "BYTE"; break;
7473 case 2: size
= "WORD"; break;
7474 case 4: size
= "DWORD"; break;
7475 case 8: size
= "QWORD"; break;
7476 case 12: size
= "XWORD"; break;
7477 case 16: size
= "XMMWORD"; break;
7482 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7485 else if (code
== 'w')
7487 else if (code
== 'k')
7491 fputs (" PTR ", file
);
7495 /* Avoid (%rip) for call operands. */
7496 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7497 && GET_CODE (x
) != CONST_INT
)
7498 output_addr_const (file
, x
);
7499 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7500 output_operand_lossage ("invalid constraints for operand");
7505 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7510 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7511 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7513 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7515 fprintf (file
, "0x%08lx", l
);
7518 /* These float cases don't actually occur as immediate operands. */
7519 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7523 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7524 fprintf (file
, "%s", dstr
);
7527 else if (GET_CODE (x
) == CONST_DOUBLE
7528 && GET_MODE (x
) == XFmode
)
7532 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7533 fprintf (file
, "%s", dstr
);
7540 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7542 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7545 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7546 || GET_CODE (x
) == LABEL_REF
)
7548 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7551 fputs ("OFFSET FLAT:", file
);
7554 if (GET_CODE (x
) == CONST_INT
)
7555 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7557 output_pic_addr_const (file
, x
, code
);
7559 output_addr_const (file
, x
);
7563 /* Print a memory operand whose address is ADDR. */
7566 print_operand_address (FILE *file
, rtx addr
)
7568 struct ix86_address parts
;
7569 rtx base
, index
, disp
;
7572 if (! ix86_decompose_address (addr
, &parts
))
7576 index
= parts
.index
;
7578 scale
= parts
.scale
;
7586 if (USER_LABEL_PREFIX
[0] == 0)
7588 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
7594 if (!base
&& !index
)
7596 /* Displacement only requires special attention. */
7598 if (GET_CODE (disp
) == CONST_INT
)
7600 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
7602 if (USER_LABEL_PREFIX
[0] == 0)
7604 fputs ("ds:", file
);
7606 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
7609 output_pic_addr_const (file
, disp
, 0);
7611 output_addr_const (file
, disp
);
7613 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7615 && ((GET_CODE (disp
) == SYMBOL_REF
7616 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
7617 || GET_CODE (disp
) == LABEL_REF
7618 || (GET_CODE (disp
) == CONST
7619 && GET_CODE (XEXP (disp
, 0)) == PLUS
7620 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
7621 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
7622 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
7623 fputs ("(%rip)", file
);
7627 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7632 output_pic_addr_const (file
, disp
, 0);
7633 else if (GET_CODE (disp
) == LABEL_REF
)
7634 output_asm_label (disp
);
7636 output_addr_const (file
, disp
);
7641 print_reg (base
, 0, file
);
7645 print_reg (index
, 0, file
);
7647 fprintf (file
, ",%d", scale
);
7653 rtx offset
= NULL_RTX
;
7657 /* Pull out the offset of a symbol; print any symbol itself. */
7658 if (GET_CODE (disp
) == CONST
7659 && GET_CODE (XEXP (disp
, 0)) == PLUS
7660 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7662 offset
= XEXP (XEXP (disp
, 0), 1);
7663 disp
= gen_rtx_CONST (VOIDmode
,
7664 XEXP (XEXP (disp
, 0), 0));
7668 output_pic_addr_const (file
, disp
, 0);
7669 else if (GET_CODE (disp
) == LABEL_REF
)
7670 output_asm_label (disp
);
7671 else if (GET_CODE (disp
) == CONST_INT
)
7674 output_addr_const (file
, disp
);
7680 print_reg (base
, 0, file
);
7683 if (INTVAL (offset
) >= 0)
7685 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7689 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7696 print_reg (index
, 0, file
);
7698 fprintf (file
, "*%d", scale
);
7706 output_addr_const_extra (FILE *file
, rtx x
)
7710 if (GET_CODE (x
) != UNSPEC
)
7713 op
= XVECEXP (x
, 0, 0);
7714 switch (XINT (x
, 1))
7716 case UNSPEC_GOTTPOFF
:
7717 output_addr_const (file
, op
);
7718 /* FIXME: This might be @TPOFF in Sun ld. */
7719 fputs ("@GOTTPOFF", file
);
7722 output_addr_const (file
, op
);
7723 fputs ("@TPOFF", file
);
7726 output_addr_const (file
, op
);
7728 fputs ("@TPOFF", file
);
7730 fputs ("@NTPOFF", file
);
7733 output_addr_const (file
, op
);
7734 fputs ("@DTPOFF", file
);
7736 case UNSPEC_GOTNTPOFF
:
7737 output_addr_const (file
, op
);
7739 fputs ("@GOTTPOFF(%rip)", file
);
7741 fputs ("@GOTNTPOFF", file
);
7743 case UNSPEC_INDNTPOFF
:
7744 output_addr_const (file
, op
);
7745 fputs ("@INDNTPOFF", file
);
7755 /* Split one or more DImode RTL references into pairs of SImode
7756 references. The RTL can be REG, offsettable MEM, integer constant, or
7757 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7758 split and "num" is its length. lo_half and hi_half are output arrays
7759 that parallel "operands". */
7762 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7766 rtx op
= operands
[num
];
7768 /* simplify_subreg refuse to split volatile memory addresses,
7769 but we still have to handle it. */
7770 if (GET_CODE (op
) == MEM
)
7772 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7773 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7777 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7778 GET_MODE (op
) == VOIDmode
7779 ? DImode
: GET_MODE (op
), 0);
7780 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7781 GET_MODE (op
) == VOIDmode
7782 ? DImode
: GET_MODE (op
), 4);
7786 /* Split one or more TImode RTL references into pairs of SImode
7787 references. The RTL can be REG, offsettable MEM, integer constant, or
7788 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7789 split and "num" is its length. lo_half and hi_half are output arrays
7790 that parallel "operands". */
7793 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
7797 rtx op
= operands
[num
];
7799 /* simplify_subreg refuse to split volatile memory addresses, but we
7800 still have to handle it. */
7801 if (GET_CODE (op
) == MEM
)
7803 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7804 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7808 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7809 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7814 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7815 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7816 is the expression of the binary operation. The output may either be
7817 emitted here, or returned to the caller, like all output_* functions.
7819 There is no guarantee that the operands are the same mode, as they
7820 might be within FLOAT or FLOAT_EXTEND expressions. */
7822 #ifndef SYSV386_COMPAT
7823 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7824 wants to fix the assemblers because that causes incompatibility
7825 with gcc. No-one wants to fix gcc because that causes
7826 incompatibility with assemblers... You can use the option of
7827 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7828 #define SYSV386_COMPAT 1
7832 output_387_binary_op (rtx insn
, rtx
*operands
)
7834 static char buf
[30];
7837 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7839 #ifdef ENABLE_CHECKING
7840 /* Even if we do not want to check the inputs, this documents input
7841 constraints. Which helps in understanding the following code. */
7842 if (STACK_REG_P (operands
[0])
7843 && ((REG_P (operands
[1])
7844 && REGNO (operands
[0]) == REGNO (operands
[1])
7845 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7846 || (REG_P (operands
[2])
7847 && REGNO (operands
[0]) == REGNO (operands
[2])
7848 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7849 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7855 switch (GET_CODE (operands
[3]))
7858 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7859 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7867 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7868 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7876 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7877 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7885 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7886 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7900 if (GET_MODE (operands
[0]) == SFmode
)
7901 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7903 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7908 switch (GET_CODE (operands
[3]))
7912 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7914 rtx temp
= operands
[2];
7915 operands
[2] = operands
[1];
7919 /* know operands[0] == operands[1]. */
7921 if (GET_CODE (operands
[2]) == MEM
)
7927 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7929 if (STACK_TOP_P (operands
[0]))
7930 /* How is it that we are storing to a dead operand[2]?
7931 Well, presumably operands[1] is dead too. We can't
7932 store the result to st(0) as st(0) gets popped on this
7933 instruction. Instead store to operands[2] (which I
7934 think has to be st(1)). st(1) will be popped later.
7935 gcc <= 2.8.1 didn't have this check and generated
7936 assembly code that the Unixware assembler rejected. */
7937 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7939 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7943 if (STACK_TOP_P (operands
[0]))
7944 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7946 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7951 if (GET_CODE (operands
[1]) == MEM
)
7957 if (GET_CODE (operands
[2]) == MEM
)
7963 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7966 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7967 derived assemblers, confusingly reverse the direction of
7968 the operation for fsub{r} and fdiv{r} when the
7969 destination register is not st(0). The Intel assembler
7970 doesn't have this brain damage. Read !SYSV386_COMPAT to
7971 figure out what the hardware really does. */
7972 if (STACK_TOP_P (operands
[0]))
7973 p
= "{p\t%0, %2|rp\t%2, %0}";
7975 p
= "{rp\t%2, %0|p\t%0, %2}";
7977 if (STACK_TOP_P (operands
[0]))
7978 /* As above for fmul/fadd, we can't store to st(0). */
7979 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7981 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7986 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7989 if (STACK_TOP_P (operands
[0]))
7990 p
= "{rp\t%0, %1|p\t%1, %0}";
7992 p
= "{p\t%1, %0|rp\t%0, %1}";
7994 if (STACK_TOP_P (operands
[0]))
7995 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7997 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8002 if (STACK_TOP_P (operands
[0]))
8004 if (STACK_TOP_P (operands
[1]))
8005 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8007 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8010 else if (STACK_TOP_P (operands
[1]))
8013 p
= "{\t%1, %0|r\t%0, %1}";
8015 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8021 p
= "{r\t%2, %0|\t%0, %2}";
8023 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8036 /* Output code to initialize control word copies used by
8037 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8038 is set to control word rounding downwards. */
8040 emit_i387_cw_initialization (rtx normal
, rtx round_down
)
8042 rtx reg
= gen_reg_rtx (HImode
);
8044 emit_insn (gen_x86_fnstcw_1 (normal
));
8045 emit_move_insn (reg
, normal
);
8046 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
8048 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8050 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
8051 emit_move_insn (round_down
, reg
);
8054 /* Output code for INSN to convert a float to a signed int. OPERANDS
8055 are the insn operands. The output may be [HSD]Imode and the input
8056 operand may be [SDX]Fmode. */
8059 output_fix_trunc (rtx insn
, rtx
*operands
)
8061 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8062 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8064 /* Jump through a hoop or two for DImode, since the hardware has no
8065 non-popping instruction. We used to do this a different way, but
8066 that was somewhat fragile and broke with post-reload splitters. */
8067 if (dimode_p
&& !stack_top_dies
)
8068 output_asm_insn ("fld\t%y1", operands
);
8070 if (!STACK_TOP_P (operands
[1]))
8073 if (GET_CODE (operands
[0]) != MEM
)
8076 output_asm_insn ("fldcw\t%3", operands
);
8077 if (stack_top_dies
|| dimode_p
)
8078 output_asm_insn ("fistp%z0\t%0", operands
);
8080 output_asm_insn ("fist%z0\t%0", operands
);
8081 output_asm_insn ("fldcw\t%2", operands
);
8086 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8087 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8088 when fucom should be used. */
8091 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8094 rtx cmp_op0
= operands
[0];
8095 rtx cmp_op1
= operands
[1];
8096 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
8101 cmp_op1
= operands
[2];
8105 if (GET_MODE (operands
[0]) == SFmode
)
8107 return "ucomiss\t{%1, %0|%0, %1}";
8109 return "comiss\t{%1, %0|%0, %1}";
8112 return "ucomisd\t{%1, %0|%0, %1}";
8114 return "comisd\t{%1, %0|%0, %1}";
8117 if (! STACK_TOP_P (cmp_op0
))
8120 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8122 if (STACK_REG_P (cmp_op1
)
8124 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8125 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8127 /* If both the top of the 387 stack dies, and the other operand
8128 is also a stack register that dies, then this must be a
8129 `fcompp' float compare */
8133 /* There is no double popping fcomi variant. Fortunately,
8134 eflags is immune from the fstp's cc clobbering. */
8136 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8138 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8146 return "fucompp\n\tfnstsw\t%0";
8148 return "fcompp\n\tfnstsw\t%0";
8161 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8163 static const char * const alt
[24] =
8175 "fcomi\t{%y1, %0|%0, %y1}",
8176 "fcomip\t{%y1, %0|%0, %y1}",
8177 "fucomi\t{%y1, %0|%0, %y1}",
8178 "fucomip\t{%y1, %0|%0, %y1}",
8185 "fcom%z2\t%y2\n\tfnstsw\t%0",
8186 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8187 "fucom%z2\t%y2\n\tfnstsw\t%0",
8188 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8190 "ficom%z2\t%y2\n\tfnstsw\t%0",
8191 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8199 mask
= eflags_p
<< 3;
8200 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
8201 mask
|= unordered_p
<< 1;
8202 mask
|= stack_top_dies
;
8215 ix86_output_addr_vec_elt (FILE *file
, int value
)
8217 const char *directive
= ASM_LONG
;
8222 directive
= ASM_QUAD
;
8228 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8232 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8235 fprintf (file
, "%s%s%d-%s%d\n",
8236 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8237 else if (HAVE_AS_GOTOFF_IN_DATA
)
8238 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8240 else if (TARGET_MACHO
)
8242 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8243 machopic_output_function_base_name (file
);
8244 fprintf(file
, "\n");
8248 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8249 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8252 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8256 ix86_expand_clear (rtx dest
)
8260 /* We play register width games, which are only valid after reload. */
8261 if (!reload_completed
)
8264 /* Avoid HImode and its attendant prefix byte. */
8265 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8266 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8268 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8270 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8271 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8273 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8274 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8280 /* X is an unchanging MEM. If it is a constant pool reference, return
8281 the constant pool rtx, else NULL. */
8284 maybe_get_pool_constant (rtx x
)
8286 x
= ix86_delegitimize_address (XEXP (x
, 0));
8288 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8289 return get_pool_constant (x
);
8295 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8297 int strict
= (reload_in_progress
|| reload_completed
);
8299 enum tls_model model
;
8304 model
= tls_symbolic_operand (op1
, Pmode
);
8307 op1
= legitimize_tls_address (op1
, model
, true);
8308 op1
= force_operand (op1
, op0
);
8313 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8318 rtx temp
= ((reload_in_progress
8319 || ((op0
&& GET_CODE (op0
) == REG
)
8321 ? op0
: gen_reg_rtx (Pmode
));
8322 op1
= machopic_indirect_data_reference (op1
, temp
);
8323 op1
= machopic_legitimize_pic_address (op1
, mode
,
8324 temp
== op1
? 0 : temp
);
8326 else if (MACHOPIC_INDIRECT
)
8327 op1
= machopic_indirect_data_reference (op1
, 0);
8331 if (GET_CODE (op0
) == MEM
)
8332 op1
= force_reg (Pmode
, op1
);
8336 if (GET_CODE (temp
) != REG
)
8337 temp
= gen_reg_rtx (Pmode
);
8338 temp
= legitimize_pic_address (op1
, temp
);
8343 #endif /* TARGET_MACHO */
8347 if (GET_CODE (op0
) == MEM
8348 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8349 || !push_operand (op0
, mode
))
8350 && GET_CODE (op1
) == MEM
)
8351 op1
= force_reg (mode
, op1
);
8353 if (push_operand (op0
, mode
)
8354 && ! general_no_elim_operand (op1
, mode
))
8355 op1
= copy_to_mode_reg (mode
, op1
);
8357 /* Force large constants in 64bit compilation into register
8358 to get them CSEed. */
8359 if (TARGET_64BIT
&& mode
== DImode
8360 && immediate_operand (op1
, mode
)
8361 && !x86_64_zero_extended_value (op1
)
8362 && !register_operand (op0
, mode
)
8363 && optimize
&& !reload_completed
&& !reload_in_progress
)
8364 op1
= copy_to_mode_reg (mode
, op1
);
8366 if (FLOAT_MODE_P (mode
))
8368 /* If we are loading a floating point constant to a register,
8369 force the value to memory now, since we'll get better code
8370 out the back end. */
8374 else if (GET_CODE (op1
) == CONST_DOUBLE
)
8376 op1
= validize_mem (force_const_mem (mode
, op1
));
8377 if (!register_operand (op0
, mode
))
8379 rtx temp
= gen_reg_rtx (mode
);
8380 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
8381 emit_move_insn (op0
, temp
);
8388 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
8392 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
8394 /* Force constants other than zero into memory. We do not know how
8395 the instructions used to build constants modify the upper 64 bits
8396 of the register, once we have that information we may be able
8397 to handle some of them more efficiently. */
8398 if ((reload_in_progress
| reload_completed
) == 0
8399 && register_operand (operands
[0], mode
)
8400 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
8401 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
8403 /* Make operand1 a register if it isn't already. */
8405 && !register_operand (operands
[0], mode
)
8406 && !register_operand (operands
[1], mode
))
8408 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
8409 emit_move_insn (operands
[0], temp
);
8413 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
8416 /* Attempt to expand a binary operator. Make the expansion closer to the
8417 actual machine, then just general_operand, which will allow 3 separate
8418 memory references (one output, two input) in a single insn. */
8421 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
8424 int matching_memory
;
8425 rtx src1
, src2
, dst
, op
, clob
;
8431 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8432 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8433 && (rtx_equal_p (dst
, src2
)
8434 || immediate_operand (src1
, mode
)))
8441 /* If the destination is memory, and we do not have matching source
8442 operands, do things in registers. */
8443 matching_memory
= 0;
8444 if (GET_CODE (dst
) == MEM
)
8446 if (rtx_equal_p (dst
, src1
))
8447 matching_memory
= 1;
8448 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8449 && rtx_equal_p (dst
, src2
))
8450 matching_memory
= 2;
8452 dst
= gen_reg_rtx (mode
);
8455 /* Both source operands cannot be in memory. */
8456 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8458 if (matching_memory
!= 2)
8459 src2
= force_reg (mode
, src2
);
8461 src1
= force_reg (mode
, src1
);
8464 /* If the operation is not commutable, source 1 cannot be a constant
8465 or non-matching memory. */
8466 if ((CONSTANT_P (src1
)
8467 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8468 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8469 src1
= force_reg (mode
, src1
);
8471 /* If optimizing, copy to regs to improve CSE */
8472 if (optimize
&& ! no_new_pseudos
)
8474 if (GET_CODE (dst
) == MEM
)
8475 dst
= gen_reg_rtx (mode
);
8476 if (GET_CODE (src1
) == MEM
)
8477 src1
= force_reg (mode
, src1
);
8478 if (GET_CODE (src2
) == MEM
)
8479 src2
= force_reg (mode
, src2
);
8482 /* Emit the instruction. */
8484 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8485 if (reload_in_progress
)
8487 /* Reload doesn't know about the flags register, and doesn't know that
8488 it doesn't want to clobber it. We can only do this with PLUS. */
8495 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8496 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8499 /* Fix up the destination if needed. */
8500 if (dst
!= operands
[0])
8501 emit_move_insn (operands
[0], dst
);
8504 /* Return TRUE or FALSE depending on whether the binary operator meets the
8505 appropriate constraints. */
8508 ix86_binary_operator_ok (enum rtx_code code
,
8509 enum machine_mode mode ATTRIBUTE_UNUSED
,
8512 /* Both source operands cannot be in memory. */
8513 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8515 /* If the operation is not commutable, source 1 cannot be a constant. */
8516 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
8518 /* If the destination is memory, we must have a matching source operand. */
8519 if (GET_CODE (operands
[0]) == MEM
8520 && ! (rtx_equal_p (operands
[0], operands
[1])
8521 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
8522 && rtx_equal_p (operands
[0], operands
[2]))))
8524 /* If the operation is not commutable and the source 1 is memory, we must
8525 have a matching destination. */
8526 if (GET_CODE (operands
[1]) == MEM
8527 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
8528 && ! rtx_equal_p (operands
[0], operands
[1]))
8533 /* Attempt to expand a unary operator. Make the expansion closer to the
8534 actual machine, then just general_operand, which will allow 2 separate
8535 memory references (one output, one input) in a single insn. */
8538 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
8541 int matching_memory
;
8542 rtx src
, dst
, op
, clob
;
8547 /* If the destination is memory, and we do not have matching source
8548 operands, do things in registers. */
8549 matching_memory
= 0;
8550 if (GET_CODE (dst
) == MEM
)
8552 if (rtx_equal_p (dst
, src
))
8553 matching_memory
= 1;
8555 dst
= gen_reg_rtx (mode
);
8558 /* When source operand is memory, destination must match. */
8559 if (!matching_memory
&& GET_CODE (src
) == MEM
)
8560 src
= force_reg (mode
, src
);
8562 /* If optimizing, copy to regs to improve CSE */
8563 if (optimize
&& ! no_new_pseudos
)
8565 if (GET_CODE (dst
) == MEM
)
8566 dst
= gen_reg_rtx (mode
);
8567 if (GET_CODE (src
) == MEM
)
8568 src
= force_reg (mode
, src
);
8571 /* Emit the instruction. */
8573 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8574 if (reload_in_progress
|| code
== NOT
)
8576 /* Reload doesn't know about the flags register, and doesn't know that
8577 it doesn't want to clobber it. */
8584 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8585 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8588 /* Fix up the destination if needed. */
8589 if (dst
!= operands
[0])
8590 emit_move_insn (operands
[0], dst
);
8593 /* Return TRUE or FALSE depending on whether the unary operator meets the
8594 appropriate constraints. */
8597 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
8598 enum machine_mode mode ATTRIBUTE_UNUSED
,
8599 rtx operands
[2] ATTRIBUTE_UNUSED
)
8601 /* If one of operands is memory, source and destination must match. */
8602 if ((GET_CODE (operands
[0]) == MEM
8603 || GET_CODE (operands
[1]) == MEM
)
8604 && ! rtx_equal_p (operands
[0], operands
[1]))
8609 /* Return TRUE or FALSE depending on whether the first SET in INSN
8610 has source and destination with matching CC modes, and that the
8611 CC mode is at least as constrained as REQ_MODE. */
8614 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8617 enum machine_mode set_mode
;
8619 set
= PATTERN (insn
);
8620 if (GET_CODE (set
) == PARALLEL
)
8621 set
= XVECEXP (set
, 0, 0);
8622 if (GET_CODE (set
) != SET
)
8624 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8627 set_mode
= GET_MODE (SET_DEST (set
));
8631 if (req_mode
!= CCNOmode
8632 && (req_mode
!= CCmode
8633 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8637 if (req_mode
== CCGCmode
)
8641 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8645 if (req_mode
== CCZmode
)
8655 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8658 /* Generate insn patterns to do an integer compare of OPERANDS. */
8661 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8663 enum machine_mode cmpmode
;
8666 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8667 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8669 /* This is very simple, but making the interface the same as in the
8670 FP case makes the rest of the code easier. */
8671 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8672 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8674 /* Return the test that should be put into the flags user, i.e.
8675 the bcc, scc, or cmov instruction. */
8676 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8679 /* Figure out whether to use ordered or unordered fp comparisons.
8680 Return the appropriate mode to use. */
8683 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8685 /* ??? In order to make all comparisons reversible, we do all comparisons
8686 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8687 all forms trapping and nontrapping comparisons, we can make inequality
8688 comparisons trapping again, since it results in better code when using
8689 FCOM based compares. */
8690 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8694 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8696 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8697 return ix86_fp_compare_mode (code
);
8700 /* Only zero flag is needed. */
8702 case NE
: /* ZF!=0 */
8704 /* Codes needing carry flag. */
8705 case GEU
: /* CF=0 */
8706 case GTU
: /* CF=0 & ZF=0 */
8707 case LTU
: /* CF=1 */
8708 case LEU
: /* CF=1 | ZF=1 */
8710 /* Codes possibly doable only with sign flag when
8711 comparing against zero. */
8712 case GE
: /* SF=OF or SF=0 */
8713 case LT
: /* SF<>OF or SF=1 */
8714 if (op1
== const0_rtx
)
8717 /* For other cases Carry flag is not required. */
8719 /* Codes doable only with sign flag when comparing
8720 against zero, but we miss jump instruction for it
8721 so we need to use relational tests against overflow
8722 that thus needs to be zero. */
8723 case GT
: /* ZF=0 & SF=OF */
8724 case LE
: /* ZF=1 | SF<>OF */
8725 if (op1
== const0_rtx
)
8729 /* strcmp pattern do (use flags) and combine may ask us for proper
8738 /* Return the fixed registers used for condition codes. */
8741 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8748 /* If two condition code modes are compatible, return a condition code
8749 mode which is compatible with both. Otherwise, return
8752 static enum machine_mode
8753 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8758 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8761 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8762 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8790 /* These are only compatible with themselves, which we already
8796 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8799 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8801 enum rtx_code swapped_code
= swap_condition (code
);
8802 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8803 || (ix86_fp_comparison_cost (swapped_code
)
8804 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8807 /* Swap, force into registers, or otherwise massage the two operands
8808 to a fp comparison. The operands are updated in place; the new
8809 comparison code is returned. */
8811 static enum rtx_code
8812 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8814 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8815 rtx op0
= *pop0
, op1
= *pop1
;
8816 enum machine_mode op_mode
= GET_MODE (op0
);
8817 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8819 /* All of the unordered compare instructions only work on registers.
8820 The same is true of the XFmode compare instructions. The same is
8821 true of the fcomi compare instructions. */
8824 && (fpcmp_mode
== CCFPUmode
8825 || op_mode
== XFmode
8826 || ix86_use_fcomi_compare (code
)))
8828 op0
= force_reg (op_mode
, op0
);
8829 op1
= force_reg (op_mode
, op1
);
8833 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8834 things around if they appear profitable, otherwise force op0
8837 if (standard_80387_constant_p (op0
) == 0
8838 || (GET_CODE (op0
) == MEM
8839 && ! (standard_80387_constant_p (op1
) == 0
8840 || GET_CODE (op1
) == MEM
)))
8843 tmp
= op0
, op0
= op1
, op1
= tmp
;
8844 code
= swap_condition (code
);
8847 if (GET_CODE (op0
) != REG
)
8848 op0
= force_reg (op_mode
, op0
);
8850 if (CONSTANT_P (op1
))
8852 if (standard_80387_constant_p (op1
))
8853 op1
= force_reg (op_mode
, op1
);
8855 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8859 /* Try to rearrange the comparison to make it cheaper. */
8860 if (ix86_fp_comparison_cost (code
)
8861 > ix86_fp_comparison_cost (swap_condition (code
))
8862 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8865 tmp
= op0
, op0
= op1
, op1
= tmp
;
8866 code
= swap_condition (code
);
8867 if (GET_CODE (op0
) != REG
)
8868 op0
= force_reg (op_mode
, op0
);
8876 /* Convert comparison codes we use to represent FP comparison to integer
8877 code that will result in proper branch. Return UNKNOWN if no such code
8879 static enum rtx_code
8880 ix86_fp_compare_code_to_integer (enum rtx_code code
)
8909 /* Split comparison code CODE into comparisons we can do using branch
8910 instructions. BYPASS_CODE is comparison code for branch that will
8911 branch around FIRST_CODE and SECOND_CODE. If some of branches
8912 is not required, set value to NIL.
8913 We never require more than two branches. */
8915 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
8916 enum rtx_code
*first_code
,
8917 enum rtx_code
*second_code
)
8923 /* The fcomi comparison sets flags as follows:
8933 case GT
: /* GTU - CF=0 & ZF=0 */
8934 case GE
: /* GEU - CF=0 */
8935 case ORDERED
: /* PF=0 */
8936 case UNORDERED
: /* PF=1 */
8937 case UNEQ
: /* EQ - ZF=1 */
8938 case UNLT
: /* LTU - CF=1 */
8939 case UNLE
: /* LEU - CF=1 | ZF=1 */
8940 case LTGT
: /* EQ - ZF=0 */
8942 case LT
: /* LTU - CF=1 - fails on unordered */
8944 *bypass_code
= UNORDERED
;
8946 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8948 *bypass_code
= UNORDERED
;
8950 case EQ
: /* EQ - ZF=1 - fails on unordered */
8952 *bypass_code
= UNORDERED
;
8954 case NE
: /* NE - ZF=0 - fails on unordered */
8956 *second_code
= UNORDERED
;
8958 case UNGE
: /* GEU - CF=0 - fails on unordered */
8960 *second_code
= UNORDERED
;
8962 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8964 *second_code
= UNORDERED
;
8969 if (!TARGET_IEEE_FP
)
8976 /* Return cost of comparison done fcom + arithmetics operations on AX.
8977 All following functions do use number of instructions as a cost metrics.
8978 In future this should be tweaked to compute bytes for optimize_size and
8979 take into account performance of various instructions on various CPUs. */
8981 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8983 if (!TARGET_IEEE_FP
)
8985 /* The cost of code output by ix86_expand_fp_compare. */
9013 /* Return cost of comparison done using fcomi operation.
9014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9016 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
9018 enum rtx_code bypass_code
, first_code
, second_code
;
9019 /* Return arbitrarily high cost when instruction is not supported - this
9020 prevents gcc from using it. */
9023 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9024 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
9027 /* Return cost of comparison done using sahf operation.
9028 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9030 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
9032 enum rtx_code bypass_code
, first_code
, second_code
;
9033 /* Return arbitrarily high cost when instruction is not preferred - this
9034 avoids gcc from using it. */
9035 if (!TARGET_USE_SAHF
&& !optimize_size
)
9037 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9038 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
9041 /* Compute cost of the comparison done using any method.
9042 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9044 ix86_fp_comparison_cost (enum rtx_code code
)
9046 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
9049 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
9050 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
9052 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
9053 if (min
> sahf_cost
)
9055 if (min
> fcomi_cost
)
9060 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9063 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
9064 rtx
*second_test
, rtx
*bypass_test
)
9066 enum machine_mode fpcmp_mode
, intcmp_mode
;
9068 int cost
= ix86_fp_comparison_cost (code
);
9069 enum rtx_code bypass_code
, first_code
, second_code
;
9071 fpcmp_mode
= ix86_fp_compare_mode (code
);
9072 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
9075 *second_test
= NULL_RTX
;
9077 *bypass_test
= NULL_RTX
;
9079 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9081 /* Do fcomi/sahf based test when profitable. */
9082 if ((bypass_code
== NIL
|| bypass_test
)
9083 && (second_code
== NIL
|| second_test
)
9084 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
9088 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9089 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
9095 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9096 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9098 scratch
= gen_reg_rtx (HImode
);
9099 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9100 emit_insn (gen_x86_sahf_1 (scratch
));
9103 /* The FP codes work out to act like unsigned. */
9104 intcmp_mode
= fpcmp_mode
;
9106 if (bypass_code
!= NIL
)
9107 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
9108 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9110 if (second_code
!= NIL
)
9111 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
9112 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9117 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9118 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
9119 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
9121 scratch
= gen_reg_rtx (HImode
);
9122 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
9124 /* In the unordered case, we have to check C2 for NaN's, which
9125 doesn't happen to work out to anything nice combination-wise.
9126 So do some bit twiddling on the value we've got in AH to come
9127 up with an appropriate set of condition codes. */
9129 intcmp_mode
= CCNOmode
;
9134 if (code
== GT
|| !TARGET_IEEE_FP
)
9136 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9141 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9142 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9143 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
9144 intcmp_mode
= CCmode
;
9150 if (code
== LT
&& TARGET_IEEE_FP
)
9152 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9153 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
9154 intcmp_mode
= CCmode
;
9159 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
9165 if (code
== GE
|| !TARGET_IEEE_FP
)
9167 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
9172 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9173 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9180 if (code
== LE
&& TARGET_IEEE_FP
)
9182 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9183 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
9184 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9185 intcmp_mode
= CCmode
;
9190 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
9196 if (code
== EQ
&& TARGET_IEEE_FP
)
9198 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9199 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
9200 intcmp_mode
= CCmode
;
9205 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9212 if (code
== NE
&& TARGET_IEEE_FP
)
9214 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
9215 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
9221 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
9227 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9231 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
9240 /* Return the test that should be put into the flags user, i.e.
9241 the bcc, scc, or cmov instruction. */
9242 return gen_rtx_fmt_ee (code
, VOIDmode
,
9243 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
9248 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
9251 op0
= ix86_compare_op0
;
9252 op1
= ix86_compare_op1
;
9255 *second_test
= NULL_RTX
;
9257 *bypass_test
= NULL_RTX
;
9259 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9260 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9261 second_test
, bypass_test
);
9263 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9268 /* Return true if the CODE will result in nontrivial jump sequence. */
9270 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
9272 enum rtx_code bypass_code
, first_code
, second_code
;
9275 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9276 return bypass_code
!= NIL
|| second_code
!= NIL
;
9280 ix86_expand_branch (enum rtx_code code
, rtx label
)
9284 switch (GET_MODE (ix86_compare_op0
))
9290 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9291 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9292 gen_rtx_LABEL_REF (VOIDmode
, label
),
9294 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9303 enum rtx_code bypass_code
, first_code
, second_code
;
9305 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9308 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9310 /* Check whether we will use the natural sequence with one jump. If
9311 so, we can expand jump early. Otherwise delay expansion by
9312 creating compound insn to not confuse optimizers. */
9313 if (bypass_code
== NIL
&& second_code
== NIL
9316 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9317 gen_rtx_LABEL_REF (VOIDmode
, label
),
9322 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9323 ix86_compare_op0
, ix86_compare_op1
);
9324 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9325 gen_rtx_LABEL_REF (VOIDmode
, label
),
9327 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9329 use_fcomi
= ix86_use_fcomi_compare (code
);
9330 vec
= rtvec_alloc (3 + !use_fcomi
);
9331 RTVEC_ELT (vec
, 0) = tmp
;
9333 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9335 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9338 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9340 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9348 /* Expand DImode branch into multiple compare+branch. */
9350 rtx lo
[2], hi
[2], label2
;
9351 enum rtx_code code1
, code2
, code3
;
9353 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9355 tmp
= ix86_compare_op0
;
9356 ix86_compare_op0
= ix86_compare_op1
;
9357 ix86_compare_op1
= tmp
;
9358 code
= swap_condition (code
);
9360 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9361 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9363 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9364 avoid two branches. This costs one extra insn, so disable when
9365 optimizing for size. */
9367 if ((code
== EQ
|| code
== NE
)
9369 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9374 if (hi
[1] != const0_rtx
)
9375 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9376 NULL_RTX
, 0, OPTAB_WIDEN
);
9379 if (lo
[1] != const0_rtx
)
9380 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9381 NULL_RTX
, 0, OPTAB_WIDEN
);
9383 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9384 NULL_RTX
, 0, OPTAB_WIDEN
);
9386 ix86_compare_op0
= tmp
;
9387 ix86_compare_op1
= const0_rtx
;
9388 ix86_expand_branch (code
, label
);
9392 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9393 op1 is a constant and the low word is zero, then we can just
9394 examine the high word. */
9396 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9399 case LT
: case LTU
: case GE
: case GEU
:
9400 ix86_compare_op0
= hi
[0];
9401 ix86_compare_op1
= hi
[1];
9402 ix86_expand_branch (code
, label
);
9408 /* Otherwise, we need two or three jumps. */
9410 label2
= gen_label_rtx ();
9413 code2
= swap_condition (code
);
9414 code3
= unsigned_condition (code
);
9418 case LT
: case GT
: case LTU
: case GTU
:
9421 case LE
: code1
= LT
; code2
= GT
; break;
9422 case GE
: code1
= GT
; code2
= LT
; break;
9423 case LEU
: code1
= LTU
; code2
= GTU
; break;
9424 case GEU
: code1
= GTU
; code2
= LTU
; break;
9426 case EQ
: code1
= NIL
; code2
= NE
; break;
9427 case NE
: code2
= NIL
; break;
9435 * if (hi(a) < hi(b)) goto true;
9436 * if (hi(a) > hi(b)) goto false;
9437 * if (lo(a) < lo(b)) goto true;
9441 ix86_compare_op0
= hi
[0];
9442 ix86_compare_op1
= hi
[1];
9445 ix86_expand_branch (code1
, label
);
9447 ix86_expand_branch (code2
, label2
);
9449 ix86_compare_op0
= lo
[0];
9450 ix86_compare_op1
= lo
[1];
9451 ix86_expand_branch (code3
, label
);
9454 emit_label (label2
);
9463 /* Split branch based on floating point condition. */
9465 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
9466 rtx target1
, rtx target2
, rtx tmp
)
9469 rtx label
= NULL_RTX
;
9471 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9474 if (target2
!= pc_rtx
)
9477 code
= reverse_condition_maybe_unordered (code
);
9482 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9483 tmp
, &second
, &bypass
);
9485 if (split_branch_probability
>= 0)
9487 /* Distribute the probabilities across the jumps.
9488 Assume the BYPASS and SECOND to be always test
9490 probability
= split_branch_probability
;
9492 /* Value of 1 is low enough to make no need for probability
9493 to be updated. Later we may run some experiments and see
9494 if unordered values are more frequent in practice. */
9496 bypass_probability
= 1;
9498 second_probability
= 1;
9500 if (bypass
!= NULL_RTX
)
9502 label
= gen_label_rtx ();
9503 i
= emit_jump_insn (gen_rtx_SET
9505 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9507 gen_rtx_LABEL_REF (VOIDmode
,
9510 if (bypass_probability
>= 0)
9512 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9513 GEN_INT (bypass_probability
),
9516 i
= emit_jump_insn (gen_rtx_SET
9518 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9519 condition
, target1
, target2
)));
9520 if (probability
>= 0)
9522 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9523 GEN_INT (probability
),
9525 if (second
!= NULL_RTX
)
9527 i
= emit_jump_insn (gen_rtx_SET
9529 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9531 if (second_probability
>= 0)
9533 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9534 GEN_INT (second_probability
),
9537 if (label
!= NULL_RTX
)
9542 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9544 rtx ret
, tmp
, tmpreg
, equiv
;
9545 rtx second_test
, bypass_test
;
9547 if (GET_MODE (ix86_compare_op0
) == DImode
9549 return 0; /* FAIL */
9551 if (GET_MODE (dest
) != QImode
)
9554 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9555 PUT_MODE (ret
, QImode
);
9560 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9561 if (bypass_test
|| second_test
)
9563 rtx test
= second_test
;
9565 rtx tmp2
= gen_reg_rtx (QImode
);
9572 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9574 PUT_MODE (test
, QImode
);
9575 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9578 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9580 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9583 /* Attach a REG_EQUAL note describing the comparison result. */
9584 equiv
= simplify_gen_relational (code
, QImode
,
9585 GET_MODE (ix86_compare_op0
),
9586 ix86_compare_op0
, ix86_compare_op1
);
9587 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9589 return 1; /* DONE */
9592 /* Expand comparison setting or clearing carry flag. Return true when
9593 successful and set pop for the operation. */
9595 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9597 enum machine_mode mode
=
9598 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9600 /* Do not handle DImode compares that go trought special path. Also we can't
9601 deal with FP compares yet. This is possible to add. */
9602 if ((mode
== DImode
&& !TARGET_64BIT
))
9604 if (FLOAT_MODE_P (mode
))
9606 rtx second_test
= NULL
, bypass_test
= NULL
;
9607 rtx compare_op
, compare_seq
;
9609 /* Shortcut: following common codes never translate into carry flag compares. */
9610 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9611 || code
== ORDERED
|| code
== UNORDERED
)
9614 /* These comparisons require zero flag; swap operands so they won't. */
9615 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9621 code
= swap_condition (code
);
9624 /* Try to expand the comparison and verify that we end up with carry flag
9625 based comparison. This is fails to be true only when we decide to expand
9626 comparison using arithmetic that is not too common scenario. */
9628 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9629 &second_test
, &bypass_test
);
9630 compare_seq
= get_insns ();
9633 if (second_test
|| bypass_test
)
9635 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9636 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9637 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9639 code
= GET_CODE (compare_op
);
9640 if (code
!= LTU
&& code
!= GEU
)
9642 emit_insn (compare_seq
);
9646 if (!INTEGRAL_MODE_P (mode
))
9654 /* Convert a==0 into (unsigned)a<1. */
9657 if (op1
!= const0_rtx
)
9660 code
= (code
== EQ
? LTU
: GEU
);
9663 /* Convert a>b into b<a or a>=b-1. */
9666 if (GET_CODE (op1
) == CONST_INT
)
9668 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9669 /* Bail out on overflow. We still can swap operands but that
9670 would force loading of the constant into register. */
9671 if (op1
== const0_rtx
9672 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9674 code
= (code
== GTU
? GEU
: LTU
);
9681 code
= (code
== GTU
? LTU
: GEU
);
9685 /* Convert a>=0 into (unsigned)a<0x80000000. */
9688 if (mode
== DImode
|| op1
!= const0_rtx
)
9690 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9691 code
= (code
== LT
? GEU
: LTU
);
9695 if (mode
== DImode
|| op1
!= constm1_rtx
)
9697 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9698 code
= (code
== LE
? GEU
: LTU
);
9704 /* Swapping operands may cause constant to appear as first operand. */
9705 if (!nonimmediate_operand (op0
, VOIDmode
))
9709 op0
= force_reg (mode
, op0
);
9711 ix86_compare_op0
= op0
;
9712 ix86_compare_op1
= op1
;
9713 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9714 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9720 ix86_expand_int_movcc (rtx operands
[])
9722 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9723 rtx compare_seq
, compare_op
;
9724 rtx second_test
, bypass_test
;
9725 enum machine_mode mode
= GET_MODE (operands
[0]);
9726 bool sign_bit_compare_p
= false;;
9729 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9730 compare_seq
= get_insns ();
9733 compare_code
= GET_CODE (compare_op
);
9735 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9736 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9737 sign_bit_compare_p
= true;
9739 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9740 HImode insns, we'd be swallowed in word prefix ops. */
9742 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9743 && (mode
!= DImode
|| TARGET_64BIT
)
9744 && GET_CODE (operands
[2]) == CONST_INT
9745 && GET_CODE (operands
[3]) == CONST_INT
)
9747 rtx out
= operands
[0];
9748 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9749 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9753 /* Sign bit compares are better done using shifts than we do by using
9755 if (sign_bit_compare_p
9756 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9757 ix86_compare_op1
, &compare_op
))
9759 /* Detect overlap between destination and compare sources. */
9762 if (!sign_bit_compare_p
)
9766 compare_code
= GET_CODE (compare_op
);
9768 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9769 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9772 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9775 /* To simplify rest of code, restrict to the GEU case. */
9776 if (compare_code
== LTU
)
9778 HOST_WIDE_INT tmp
= ct
;
9781 compare_code
= reverse_condition (compare_code
);
9782 code
= reverse_condition (code
);
9787 PUT_CODE (compare_op
,
9788 reverse_condition_maybe_unordered
9789 (GET_CODE (compare_op
)));
9791 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9795 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9796 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9797 tmp
= gen_reg_rtx (mode
);
9800 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9802 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9806 if (code
== GT
|| code
== GE
)
9807 code
= reverse_condition (code
);
9810 HOST_WIDE_INT tmp
= ct
;
9815 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9816 ix86_compare_op1
, VOIDmode
, 0, -1);
9829 tmp
= expand_simple_binop (mode
, PLUS
,
9831 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9842 tmp
= expand_simple_binop (mode
, IOR
,
9844 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9846 else if (diff
== -1 && ct
)
9856 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9858 tmp
= expand_simple_binop (mode
, PLUS
,
9859 copy_rtx (tmp
), GEN_INT (cf
),
9860 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9868 * andl cf - ct, dest
9878 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9881 tmp
= expand_simple_binop (mode
, AND
,
9883 gen_int_mode (cf
- ct
, mode
),
9884 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9886 tmp
= expand_simple_binop (mode
, PLUS
,
9887 copy_rtx (tmp
), GEN_INT (ct
),
9888 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9891 if (!rtx_equal_p (tmp
, out
))
9892 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9894 return 1; /* DONE */
9900 tmp
= ct
, ct
= cf
, cf
= tmp
;
9902 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9904 /* We may be reversing unordered compare to normal compare, that
9905 is not valid in general (we may convert non-trapping condition
9906 to trapping one), however on i386 we currently emit all
9907 comparisons unordered. */
9908 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9909 code
= reverse_condition_maybe_unordered (code
);
9913 compare_code
= reverse_condition (compare_code
);
9914 code
= reverse_condition (code
);
9919 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9920 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9922 if (ix86_compare_op1
== const0_rtx
9923 && (code
== LT
|| code
== GE
))
9924 compare_code
= code
;
9925 else if (ix86_compare_op1
== constm1_rtx
)
9929 else if (code
== GT
)
9934 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9935 if (compare_code
!= NIL
9936 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9937 && (cf
== -1 || ct
== -1))
9939 /* If lea code below could be used, only optimize
9940 if it results in a 2 insn sequence. */
9942 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9943 || diff
== 3 || diff
== 5 || diff
== 9)
9944 || (compare_code
== LT
&& ct
== -1)
9945 || (compare_code
== GE
&& cf
== -1))
9948 * notl op1 (if necessary)
9956 code
= reverse_condition (code
);
9959 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9960 ix86_compare_op1
, VOIDmode
, 0, -1);
9962 out
= expand_simple_binop (mode
, IOR
,
9964 out
, 1, OPTAB_DIRECT
);
9965 if (out
!= operands
[0])
9966 emit_move_insn (operands
[0], out
);
9968 return 1; /* DONE */
9973 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9974 || diff
== 3 || diff
== 5 || diff
== 9)
9975 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9976 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
9982 * lea cf(dest*(ct-cf)),dest
9986 * This also catches the degenerate setcc-only case.
9992 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9993 ix86_compare_op1
, VOIDmode
, 0, 1);
9996 /* On x86_64 the lea instruction operates on Pmode, so we need
9997 to get arithmetics done in proper mode to match. */
9999 tmp
= copy_rtx (out
);
10003 out1
= copy_rtx (out
);
10004 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
10008 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
10014 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
10017 if (!rtx_equal_p (tmp
, out
))
10020 out
= force_operand (tmp
, copy_rtx (out
));
10022 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
10024 if (!rtx_equal_p (out
, operands
[0]))
10025 emit_move_insn (operands
[0], copy_rtx (out
));
10027 return 1; /* DONE */
10031 * General case: Jumpful:
10032 * xorl dest,dest cmpl op1, op2
10033 * cmpl op1, op2 movl ct, dest
10034 * setcc dest jcc 1f
10035 * decl dest movl cf, dest
10036 * andl (cf-ct),dest 1:
10039 * Size 20. Size 14.
10041 * This is reasonably steep, but branch mispredict costs are
10042 * high on modern cpus, so consider failing only if optimizing
10046 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10047 && BRANCH_COST
>= 2)
10053 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10054 /* We may be reversing unordered compare to normal compare,
10055 that is not valid in general (we may convert non-trapping
10056 condition to trapping one), however on i386 we currently
10057 emit all comparisons unordered. */
10058 code
= reverse_condition_maybe_unordered (code
);
10061 code
= reverse_condition (code
);
10062 if (compare_code
!= NIL
)
10063 compare_code
= reverse_condition (compare_code
);
10067 if (compare_code
!= NIL
)
10069 /* notl op1 (if needed)
10074 For x < 0 (resp. x <= -1) there will be no notl,
10075 so if possible swap the constants to get rid of the
10077 True/false will be -1/0 while code below (store flag
10078 followed by decrement) is 0/-1, so the constants need
10079 to be exchanged once more. */
10081 if (compare_code
== GE
|| !cf
)
10083 code
= reverse_condition (code
);
10088 HOST_WIDE_INT tmp
= cf
;
10093 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10094 ix86_compare_op1
, VOIDmode
, 0, -1);
10098 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
10099 ix86_compare_op1
, VOIDmode
, 0, 1);
10101 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
10102 copy_rtx (out
), 1, OPTAB_DIRECT
);
10105 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
10106 gen_int_mode (cf
- ct
, mode
),
10107 copy_rtx (out
), 1, OPTAB_DIRECT
);
10109 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
10110 copy_rtx (out
), 1, OPTAB_DIRECT
);
10111 if (!rtx_equal_p (out
, operands
[0]))
10112 emit_move_insn (operands
[0], copy_rtx (out
));
10114 return 1; /* DONE */
10118 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
10120 /* Try a few things more with specific constants and a variable. */
10123 rtx var
, orig_out
, out
, tmp
;
10125 if (BRANCH_COST
<= 2)
10126 return 0; /* FAIL */
10128 /* If one of the two operands is an interesting constant, load a
10129 constant with the above and mask it in with a logical operation. */
10131 if (GET_CODE (operands
[2]) == CONST_INT
)
10134 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
10135 operands
[3] = constm1_rtx
, op
= and_optab
;
10136 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
10137 operands
[3] = const0_rtx
, op
= ior_optab
;
10139 return 0; /* FAIL */
10141 else if (GET_CODE (operands
[3]) == CONST_INT
)
10144 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
10145 operands
[2] = constm1_rtx
, op
= and_optab
;
10146 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
10147 operands
[2] = const0_rtx
, op
= ior_optab
;
10149 return 0; /* FAIL */
10152 return 0; /* FAIL */
10154 orig_out
= operands
[0];
10155 tmp
= gen_reg_rtx (mode
);
10158 /* Recurse to get the constant loaded. */
10159 if (ix86_expand_int_movcc (operands
) == 0)
10160 return 0; /* FAIL */
10162 /* Mask in the interesting variable. */
10163 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
10165 if (!rtx_equal_p (out
, orig_out
))
10166 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
10168 return 1; /* DONE */
10172 * For comparison with above,
10182 if (! nonimmediate_operand (operands
[2], mode
))
10183 operands
[2] = force_reg (mode
, operands
[2]);
10184 if (! nonimmediate_operand (operands
[3], mode
))
10185 operands
[3] = force_reg (mode
, operands
[3]);
10187 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10189 rtx tmp
= gen_reg_rtx (mode
);
10190 emit_move_insn (tmp
, operands
[3]);
10193 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10195 rtx tmp
= gen_reg_rtx (mode
);
10196 emit_move_insn (tmp
, operands
[2]);
10200 if (! register_operand (operands
[2], VOIDmode
)
10202 || ! register_operand (operands
[3], VOIDmode
)))
10203 operands
[2] = force_reg (mode
, operands
[2]);
10206 && ! register_operand (operands
[3], VOIDmode
))
10207 operands
[3] = force_reg (mode
, operands
[3]);
10209 emit_insn (compare_seq
);
10210 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10211 gen_rtx_IF_THEN_ELSE (mode
,
10212 compare_op
, operands
[2],
10215 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10216 gen_rtx_IF_THEN_ELSE (mode
,
10218 copy_rtx (operands
[3]),
10219 copy_rtx (operands
[0]))));
10221 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
10222 gen_rtx_IF_THEN_ELSE (mode
,
10224 copy_rtx (operands
[2]),
10225 copy_rtx (operands
[0]))));
10227 return 1; /* DONE */
10231 ix86_expand_fp_movcc (rtx operands
[])
10233 enum rtx_code code
;
10235 rtx compare_op
, second_test
, bypass_test
;
10237 /* For SF/DFmode conditional moves based on comparisons
10238 in same mode, we may want to use SSE min/max instructions. */
10239 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
10240 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
10241 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
10242 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10243 && (!TARGET_IEEE_FP
10244 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
10245 /* We may be called from the post-reload splitter. */
10246 && (!REG_P (operands
[0])
10247 || SSE_REG_P (operands
[0])
10248 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
10250 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
10251 code
= GET_CODE (operands
[1]);
10253 /* See if we have (cross) match between comparison operands and
10254 conditional move operands. */
10255 if (rtx_equal_p (operands
[2], op1
))
10260 code
= reverse_condition_maybe_unordered (code
);
10262 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
10264 /* Check for min operation. */
10265 if (code
== LT
|| code
== UNLE
)
10273 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10274 if (memory_operand (op0
, VOIDmode
))
10275 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10276 if (GET_MODE (operands
[0]) == SFmode
)
10277 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
10279 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
10282 /* Check for max operation. */
10283 if (code
== GT
|| code
== UNGE
)
10291 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10292 if (memory_operand (op0
, VOIDmode
))
10293 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10294 if (GET_MODE (operands
[0]) == SFmode
)
10295 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
10297 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
10301 /* Manage condition to be sse_comparison_operator. In case we are
10302 in non-ieee mode, try to canonicalize the destination operand
10303 to be first in the comparison - this helps reload to avoid extra
10305 if (!sse_comparison_operator (operands
[1], VOIDmode
)
10306 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
10308 rtx tmp
= ix86_compare_op0
;
10309 ix86_compare_op0
= ix86_compare_op1
;
10310 ix86_compare_op1
= tmp
;
10311 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
10312 VOIDmode
, ix86_compare_op0
,
10315 /* Similarly try to manage result to be first operand of conditional
10316 move. We also don't support the NE comparison on SSE, so try to
10318 if ((rtx_equal_p (operands
[0], operands
[3])
10319 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
10320 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
10322 rtx tmp
= operands
[2];
10323 operands
[2] = operands
[3];
10325 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10326 (GET_CODE (operands
[1])),
10327 VOIDmode
, ix86_compare_op0
,
10330 if (GET_MODE (operands
[0]) == SFmode
)
10331 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
10332 operands
[2], operands
[3],
10333 ix86_compare_op0
, ix86_compare_op1
));
10335 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
10336 operands
[2], operands
[3],
10337 ix86_compare_op0
, ix86_compare_op1
));
10341 /* The floating point conditional move instructions don't directly
10342 support conditions resulting from a signed integer comparison. */
10344 code
= GET_CODE (operands
[1]);
10345 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10347 /* The floating point conditional move instructions don't directly
10348 support signed integer comparisons. */
10350 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10352 if (second_test
!= NULL
|| bypass_test
!= NULL
)
10354 tmp
= gen_reg_rtx (QImode
);
10355 ix86_expand_setcc (code
, tmp
);
10357 ix86_compare_op0
= tmp
;
10358 ix86_compare_op1
= const0_rtx
;
10359 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10361 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10363 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10364 emit_move_insn (tmp
, operands
[3]);
10367 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10369 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10370 emit_move_insn (tmp
, operands
[2]);
10374 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10375 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10380 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10386 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10387 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10395 /* Expand conditional increment or decrement using adb/sbb instructions.
10396 The default case using setcc followed by the conditional move can be
10397 done by generic code. */
10399 ix86_expand_int_addcc (rtx operands
[])
10401 enum rtx_code code
= GET_CODE (operands
[1]);
10403 rtx val
= const0_rtx
;
10404 bool fpcmp
= false;
10405 enum machine_mode mode
= GET_MODE (operands
[0]);
10407 if (operands
[3] != const1_rtx
10408 && operands
[3] != constm1_rtx
)
10410 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10411 ix86_compare_op1
, &compare_op
))
10413 code
= GET_CODE (compare_op
);
10415 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10416 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10419 code
= ix86_fp_compare_code_to_integer (code
);
10426 PUT_CODE (compare_op
,
10427 reverse_condition_maybe_unordered
10428 (GET_CODE (compare_op
)));
10430 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10432 PUT_MODE (compare_op
, mode
);
10434 /* Construct either adc or sbb insn. */
10435 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10437 switch (GET_MODE (operands
[0]))
10440 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10443 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10446 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10449 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10457 switch (GET_MODE (operands
[0]))
10460 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10463 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10466 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10469 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10475 return 1; /* DONE */
10479 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10480 works for floating pointer parameters and nonoffsetable memories.
10481 For pushes, it returns just stack offsets; the values will be saved
10482 in the right order. Maximally three parts are generated. */
10485 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
10490 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
10492 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10494 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
10496 if (size
< 2 || size
> 3)
10499 /* Optimize constant pool reference to immediates. This is used by fp
10500 moves, that force all constants to memory to allow combining. */
10501 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
10503 rtx tmp
= maybe_get_pool_constant (operand
);
10508 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10510 /* The only non-offsetable memories we handle are pushes. */
10511 if (! push_operand (operand
, VOIDmode
))
10514 operand
= copy_rtx (operand
);
10515 PUT_MODE (operand
, Pmode
);
10516 parts
[0] = parts
[1] = parts
[2] = operand
;
10518 else if (!TARGET_64BIT
)
10520 if (mode
== DImode
)
10521 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10524 if (REG_P (operand
))
10526 if (!reload_completed
)
10528 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10529 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10531 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10533 else if (offsettable_memref_p (operand
))
10535 operand
= adjust_address (operand
, SImode
, 0);
10536 parts
[0] = operand
;
10537 parts
[1] = adjust_address (operand
, SImode
, 4);
10539 parts
[2] = adjust_address (operand
, SImode
, 8);
10541 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10546 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10550 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10551 parts
[2] = gen_int_mode (l
[2], SImode
);
10554 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10559 parts
[1] = gen_int_mode (l
[1], SImode
);
10560 parts
[0] = gen_int_mode (l
[0], SImode
);
10568 if (mode
== TImode
)
10569 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10570 if (mode
== XFmode
|| mode
== TFmode
)
10572 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10573 if (REG_P (operand
))
10575 if (!reload_completed
)
10577 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10578 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10580 else if (offsettable_memref_p (operand
))
10582 operand
= adjust_address (operand
, DImode
, 0);
10583 parts
[0] = operand
;
10584 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10586 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10591 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10592 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10593 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10594 if (HOST_BITS_PER_WIDE_INT
>= 64)
10597 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10598 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10601 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10602 if (upper_mode
== SImode
)
10603 parts
[1] = gen_int_mode (l
[2], SImode
);
10604 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10607 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10608 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10611 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10621 /* Emit insns to perform a move or push of DI, DF, and XF values.
10622 Return false when normal moves are needed; true when all required
10623 insns have been emitted. Operands 2-4 contain the input values
10624 int the correct order; operands 5-7 contain the output values. */
10627 ix86_split_long_move (rtx operands
[])
10632 int collisions
= 0;
10633 enum machine_mode mode
= GET_MODE (operands
[0]);
10635 /* The DFmode expanders may ask us to move double.
10636 For 64bit target this is single move. By hiding the fact
10637 here we simplify i386.md splitters. */
10638 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10640 /* Optimize constant pool reference to immediates. This is used by
10641 fp moves, that force all constants to memory to allow combining. */
10643 if (GET_CODE (operands
[1]) == MEM
10644 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10645 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10646 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10647 if (push_operand (operands
[0], VOIDmode
))
10649 operands
[0] = copy_rtx (operands
[0]);
10650 PUT_MODE (operands
[0], Pmode
);
10653 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10654 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10655 emit_move_insn (operands
[0], operands
[1]);
10659 /* The only non-offsettable memory we handle is push. */
10660 if (push_operand (operands
[0], VOIDmode
))
10662 else if (GET_CODE (operands
[0]) == MEM
10663 && ! offsettable_memref_p (operands
[0]))
10666 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10667 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10669 /* When emitting push, take care for source operands on the stack. */
10670 if (push
&& GET_CODE (operands
[1]) == MEM
10671 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10674 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10675 XEXP (part
[1][2], 0));
10676 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10677 XEXP (part
[1][1], 0));
10680 /* We need to do copy in the right order in case an address register
10681 of the source overlaps the destination. */
10682 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10684 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10686 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10689 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10692 /* Collision in the middle part can be handled by reordering. */
10693 if (collisions
== 1 && nparts
== 3
10694 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10697 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10698 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10701 /* If there are more collisions, we can't handle it by reordering.
10702 Do an lea to the last part and use only one colliding move. */
10703 else if (collisions
> 1)
10709 base
= part
[0][nparts
- 1];
10711 /* Handle the case when the last part isn't valid for lea.
10712 Happens in 64-bit mode storing the 12-byte XFmode. */
10713 if (GET_MODE (base
) != Pmode
)
10714 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10716 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10717 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10718 part
[1][1] = replace_equiv_address (part
[1][1],
10719 plus_constant (base
, UNITS_PER_WORD
));
10721 part
[1][2] = replace_equiv_address (part
[1][2],
10722 plus_constant (base
, 8));
10732 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10733 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10734 emit_move_insn (part
[0][2], part
[1][2]);
10739 /* In 64bit mode we don't have 32bit push available. In case this is
10740 register, it is OK - we will just use larger counterpart. We also
10741 retype memory - these comes from attempt to avoid REX prefix on
10742 moving of second half of TFmode value. */
10743 if (GET_MODE (part
[1][1]) == SImode
)
10745 if (GET_CODE (part
[1][1]) == MEM
)
10746 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10747 else if (REG_P (part
[1][1]))
10748 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10751 if (GET_MODE (part
[1][0]) == SImode
)
10752 part
[1][0] = part
[1][1];
10755 emit_move_insn (part
[0][1], part
[1][1]);
10756 emit_move_insn (part
[0][0], part
[1][0]);
10760 /* Choose correct order to not overwrite the source before it is copied. */
10761 if ((REG_P (part
[0][0])
10762 && REG_P (part
[1][1])
10763 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10765 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10767 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10771 operands
[2] = part
[0][2];
10772 operands
[3] = part
[0][1];
10773 operands
[4] = part
[0][0];
10774 operands
[5] = part
[1][2];
10775 operands
[6] = part
[1][1];
10776 operands
[7] = part
[1][0];
10780 operands
[2] = part
[0][1];
10781 operands
[3] = part
[0][0];
10782 operands
[5] = part
[1][1];
10783 operands
[6] = part
[1][0];
10790 operands
[2] = part
[0][0];
10791 operands
[3] = part
[0][1];
10792 operands
[4] = part
[0][2];
10793 operands
[5] = part
[1][0];
10794 operands
[6] = part
[1][1];
10795 operands
[7] = part
[1][2];
10799 operands
[2] = part
[0][0];
10800 operands
[3] = part
[0][1];
10801 operands
[5] = part
[1][0];
10802 operands
[6] = part
[1][1];
10805 emit_move_insn (operands
[2], operands
[5]);
10806 emit_move_insn (operands
[3], operands
[6]);
10808 emit_move_insn (operands
[4], operands
[7]);
10814 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10816 rtx low
[2], high
[2];
10819 if (GET_CODE (operands
[2]) == CONST_INT
)
10821 split_di (operands
, 2, low
, high
);
10822 count
= INTVAL (operands
[2]) & 63;
10826 emit_move_insn (high
[0], low
[1]);
10827 emit_move_insn (low
[0], const0_rtx
);
10830 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
10834 if (!rtx_equal_p (operands
[0], operands
[1]))
10835 emit_move_insn (operands
[0], operands
[1]);
10836 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10837 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
10842 if (!rtx_equal_p (operands
[0], operands
[1]))
10843 emit_move_insn (operands
[0], operands
[1]);
10845 split_di (operands
, 1, low
, high
);
10847 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10848 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10850 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10852 if (! no_new_pseudos
)
10853 scratch
= force_reg (SImode
, const0_rtx
);
10855 emit_move_insn (scratch
, const0_rtx
);
10857 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
10861 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10866 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
10868 rtx low
[2], high
[2];
10871 if (GET_CODE (operands
[2]) == CONST_INT
)
10873 split_di (operands
, 2, low
, high
);
10874 count
= INTVAL (operands
[2]) & 63;
10878 emit_move_insn (high
[0], high
[1]);
10879 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10880 emit_move_insn (low
[0], high
[0]);
10883 else if (count
>= 32)
10885 emit_move_insn (low
[0], high
[1]);
10887 if (! reload_completed
)
10888 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10891 emit_move_insn (high
[0], low
[0]);
10892 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10896 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10900 if (!rtx_equal_p (operands
[0], operands
[1]))
10901 emit_move_insn (operands
[0], operands
[1]);
10902 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10903 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10908 if (!rtx_equal_p (operands
[0], operands
[1]))
10909 emit_move_insn (operands
[0], operands
[1]);
10911 split_di (operands
, 1, low
, high
);
10913 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10914 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10916 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10918 if (! no_new_pseudos
)
10919 scratch
= gen_reg_rtx (SImode
);
10920 emit_move_insn (scratch
, high
[0]);
10921 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10922 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10926 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10931 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
10933 rtx low
[2], high
[2];
10936 if (GET_CODE (operands
[2]) == CONST_INT
)
10938 split_di (operands
, 2, low
, high
);
10939 count
= INTVAL (operands
[2]) & 63;
10943 emit_move_insn (low
[0], high
[1]);
10944 emit_move_insn (high
[0], const0_rtx
);
10947 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10951 if (!rtx_equal_p (operands
[0], operands
[1]))
10952 emit_move_insn (operands
[0], operands
[1]);
10953 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10954 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10959 if (!rtx_equal_p (operands
[0], operands
[1]))
10960 emit_move_insn (operands
[0], operands
[1]);
10962 split_di (operands
, 1, low
, high
);
10964 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10965 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10967 /* Heh. By reversing the arguments, we can reuse this pattern. */
10968 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10970 if (! no_new_pseudos
)
10971 scratch
= force_reg (SImode
, const0_rtx
);
10973 emit_move_insn (scratch
, const0_rtx
);
10975 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10979 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10983 /* Helper function for the string operations below. Dest VARIABLE whether
10984 it is aligned to VALUE bytes. If true, jump to the label. */
10986 ix86_expand_aligntest (rtx variable
, int value
)
10988 rtx label
= gen_label_rtx ();
10989 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10990 if (GET_MODE (variable
) == DImode
)
10991 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10993 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10994 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10999 /* Adjust COUNTER by the VALUE. */
11001 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
11003 if (GET_MODE (countreg
) == DImode
)
11004 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
11006 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
11009 /* Zero extend possibly SImode EXP to Pmode register. */
11011 ix86_zero_extend_to_Pmode (rtx exp
)
11014 if (GET_MODE (exp
) == VOIDmode
)
11015 return force_reg (Pmode
, exp
);
11016 if (GET_MODE (exp
) == Pmode
)
11017 return copy_to_mode_reg (Pmode
, exp
);
11018 r
= gen_reg_rtx (Pmode
);
11019 emit_insn (gen_zero_extendsidi2 (r
, exp
));
11023 /* Expand string move (memcpy) operation. Use i386 string operations when
11024 profitable. expand_clrstr contains similar code. */
11026 ix86_expand_movstr (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
11028 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
11029 enum machine_mode counter_mode
;
11030 HOST_WIDE_INT align
= 0;
11031 unsigned HOST_WIDE_INT count
= 0;
11033 if (GET_CODE (align_exp
) == CONST_INT
)
11034 align
= INTVAL (align_exp
);
11036 /* Can't use any of this if the user has appropriated esi or edi. */
11037 if (global_regs
[4] || global_regs
[5])
11040 /* This simple hack avoids all inlining code and simplifies code below. */
11041 if (!TARGET_ALIGN_STRINGOPS
)
11044 if (GET_CODE (count_exp
) == CONST_INT
)
11046 count
= INTVAL (count_exp
);
11047 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11051 /* Figure out proper mode for counter. For 32bits it is always SImode,
11052 for 64bits use SImode when possible, otherwise DImode.
11053 Set count to number of bytes copied when known at compile time. */
11054 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11055 || x86_64_zero_extended_value (count_exp
))
11056 counter_mode
= SImode
;
11058 counter_mode
= DImode
;
11060 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
11063 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11064 if (destreg
!= XEXP (dst
, 0))
11065 dst
= replace_equiv_address_nv (dst
, destreg
);
11066 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11067 if (srcreg
!= XEXP (src
, 0))
11068 src
= replace_equiv_address_nv (src
, srcreg
);
11070 /* When optimizing for size emit simple rep ; movsb instruction for
11071 counts not divisible by 4. */
11073 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11075 emit_insn (gen_cld ());
11076 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11077 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11078 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
11079 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
11083 /* For constant aligned (or small unaligned) copies use rep movsl
11084 followed by code copying the rest. For PentiumPro ensure 8 byte
11085 alignment to allow rep movsl acceleration. */
11087 else if (count
!= 0
11089 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11090 || optimize_size
|| count
< (unsigned int) 64))
11092 unsigned HOST_WIDE_INT offset
= 0;
11093 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11094 rtx srcmem
, dstmem
;
11096 emit_insn (gen_cld ());
11097 if (count
& ~(size
- 1))
11099 countreg
= copy_to_mode_reg (counter_mode
,
11100 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11101 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11102 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11104 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
11105 GEN_INT (size
== 4 ? 2 : 3));
11106 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11107 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11109 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11110 countreg
, destexp
, srcexp
));
11111 offset
= count
& ~(size
- 1);
11113 if (size
== 8 && (count
& 0x04))
11115 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
11117 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11119 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11124 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
11126 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11128 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11133 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
11135 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11137 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11140 /* The generic code based on the glibc implementation:
11141 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11142 allowing accelerated copying there)
11143 - copy the data using rep movsl
11144 - copy the rest. */
11149 rtx srcmem
, dstmem
;
11150 int desired_alignment
= (TARGET_PENTIUMPRO
11151 && (count
== 0 || count
>= (unsigned int) 260)
11152 ? 8 : UNITS_PER_WORD
);
11153 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11154 dst
= change_address (dst
, BLKmode
, destreg
);
11155 src
= change_address (src
, BLKmode
, srcreg
);
11157 /* In case we don't know anything about the alignment, default to
11158 library version, since it is usually equally fast and result in
11161 Also emit call when we know that the count is large and call overhead
11162 will not be important. */
11163 if (!TARGET_INLINE_ALL_STRINGOPS
11164 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11167 if (TARGET_SINGLE_STRINGOP
)
11168 emit_insn (gen_cld ());
11170 countreg2
= gen_reg_rtx (Pmode
);
11171 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11173 /* We don't use loops to align destination and to copy parts smaller
11174 than 4 bytes, because gcc is able to optimize such code better (in
11175 the case the destination or the count really is aligned, gcc is often
11176 able to predict the branches) and also it is friendlier to the
11177 hardware branch prediction.
11179 Using loops is beneficial for generic case, because we can
11180 handle small counts using the loops. Many CPUs (such as Athlon)
11181 have large REP prefix setup costs.
11183 This is quite costly. Maybe we can revisit this decision later or
11184 add some customizability to this code. */
11186 if (count
== 0 && align
< desired_alignment
)
11188 label
= gen_label_rtx ();
11189 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11190 LEU
, 0, counter_mode
, 1, label
);
11194 rtx label
= ix86_expand_aligntest (destreg
, 1);
11195 srcmem
= change_address (src
, QImode
, srcreg
);
11196 dstmem
= change_address (dst
, QImode
, destreg
);
11197 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11198 ix86_adjust_counter (countreg
, 1);
11199 emit_label (label
);
11200 LABEL_NUSES (label
) = 1;
11204 rtx label
= ix86_expand_aligntest (destreg
, 2);
11205 srcmem
= change_address (src
, HImode
, srcreg
);
11206 dstmem
= change_address (dst
, HImode
, destreg
);
11207 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11208 ix86_adjust_counter (countreg
, 2);
11209 emit_label (label
);
11210 LABEL_NUSES (label
) = 1;
11212 if (align
<= 4 && desired_alignment
> 4)
11214 rtx label
= ix86_expand_aligntest (destreg
, 4);
11215 srcmem
= change_address (src
, SImode
, srcreg
);
11216 dstmem
= change_address (dst
, SImode
, destreg
);
11217 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11218 ix86_adjust_counter (countreg
, 4);
11219 emit_label (label
);
11220 LABEL_NUSES (label
) = 1;
11223 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11225 emit_label (label
);
11226 LABEL_NUSES (label
) = 1;
11229 if (!TARGET_SINGLE_STRINGOP
)
11230 emit_insn (gen_cld ());
11233 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11235 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11239 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11240 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11242 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
11243 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11244 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
11245 countreg2
, destexp
, srcexp
));
11249 emit_label (label
);
11250 LABEL_NUSES (label
) = 1;
11252 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11254 srcmem
= change_address (src
, SImode
, srcreg
);
11255 dstmem
= change_address (dst
, SImode
, destreg
);
11256 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11258 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
11260 rtx label
= ix86_expand_aligntest (countreg
, 4);
11261 srcmem
= change_address (src
, SImode
, srcreg
);
11262 dstmem
= change_address (dst
, SImode
, destreg
);
11263 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11264 emit_label (label
);
11265 LABEL_NUSES (label
) = 1;
11267 if (align
> 2 && count
!= 0 && (count
& 2))
11269 srcmem
= change_address (src
, HImode
, srcreg
);
11270 dstmem
= change_address (dst
, HImode
, destreg
);
11271 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11273 if (align
<= 2 || count
== 0)
11275 rtx label
= ix86_expand_aligntest (countreg
, 2);
11276 srcmem
= change_address (src
, HImode
, srcreg
);
11277 dstmem
= change_address (dst
, HImode
, destreg
);
11278 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11279 emit_label (label
);
11280 LABEL_NUSES (label
) = 1;
11282 if (align
> 1 && count
!= 0 && (count
& 1))
11284 srcmem
= change_address (src
, QImode
, srcreg
);
11285 dstmem
= change_address (dst
, QImode
, destreg
);
11286 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11288 if (align
<= 1 || count
== 0)
11290 rtx label
= ix86_expand_aligntest (countreg
, 1);
11291 srcmem
= change_address (src
, QImode
, srcreg
);
11292 dstmem
= change_address (dst
, QImode
, destreg
);
11293 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
11294 emit_label (label
);
11295 LABEL_NUSES (label
) = 1;
11302 /* Expand string clear operation (bzero). Use i386 string operations when
11303 profitable. expand_movstr contains similar code. */
11305 ix86_expand_clrstr (rtx dst
, rtx count_exp
, rtx align_exp
)
11307 rtx destreg
, zeroreg
, countreg
, destexp
;
11308 enum machine_mode counter_mode
;
11309 HOST_WIDE_INT align
= 0;
11310 unsigned HOST_WIDE_INT count
= 0;
11312 if (GET_CODE (align_exp
) == CONST_INT
)
11313 align
= INTVAL (align_exp
);
11315 /* Can't use any of this if the user has appropriated esi. */
11316 if (global_regs
[4])
11319 /* This simple hack avoids all inlining code and simplifies code below. */
11320 if (!TARGET_ALIGN_STRINGOPS
)
11323 if (GET_CODE (count_exp
) == CONST_INT
)
11325 count
= INTVAL (count_exp
);
11326 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11329 /* Figure out proper mode for counter. For 32bits it is always SImode,
11330 for 64bits use SImode when possible, otherwise DImode.
11331 Set count to number of bytes copied when known at compile time. */
11332 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11333 || x86_64_zero_extended_value (count_exp
))
11334 counter_mode
= SImode
;
11336 counter_mode
= DImode
;
11338 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
11339 if (destreg
!= XEXP (dst
, 0))
11340 dst
= replace_equiv_address_nv (dst
, destreg
);
11342 emit_insn (gen_cld ());
11344 /* When optimizing for size emit simple rep ; movsb instruction for
11345 counts not divisible by 4. */
11347 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11349 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11350 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11351 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
11352 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11354 else if (count
!= 0
11356 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11357 || optimize_size
|| count
< (unsigned int) 64))
11359 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11360 unsigned HOST_WIDE_INT offset
= 0;
11362 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11363 if (count
& ~(size
- 1))
11365 countreg
= copy_to_mode_reg (counter_mode
,
11366 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11367 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11368 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11369 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
, GEN_INT (size
== 4 ? 2 : 3));
11370 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11371 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
11372 offset
= count
& ~(size
- 1);
11374 if (size
== 8 && (count
& 0x04))
11376 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11378 emit_insn (gen_strset (destreg
, mem
,
11379 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11384 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11386 emit_insn (gen_strset (destreg
, mem
,
11387 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11392 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11394 emit_insn (gen_strset (destreg
, mem
,
11395 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11402 /* Compute desired alignment of the string operation. */
11403 int desired_alignment
= (TARGET_PENTIUMPRO
11404 && (count
== 0 || count
>= (unsigned int) 260)
11405 ? 8 : UNITS_PER_WORD
);
11407 /* In case we don't know anything about the alignment, default to
11408 library version, since it is usually equally fast and result in
11411 Also emit call when we know that the count is large and call overhead
11412 will not be important. */
11413 if (!TARGET_INLINE_ALL_STRINGOPS
11414 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11417 if (TARGET_SINGLE_STRINGOP
)
11418 emit_insn (gen_cld ());
11420 countreg2
= gen_reg_rtx (Pmode
);
11421 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11422 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11423 /* Get rid of MEM_OFFSET, it won't be accurate. */
11424 dst
= change_address (dst
, BLKmode
, destreg
);
11426 if (count
== 0 && align
< desired_alignment
)
11428 label
= gen_label_rtx ();
11429 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11430 LEU
, 0, counter_mode
, 1, label
);
11434 rtx label
= ix86_expand_aligntest (destreg
, 1);
11435 emit_insn (gen_strset (destreg
, dst
,
11436 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11437 ix86_adjust_counter (countreg
, 1);
11438 emit_label (label
);
11439 LABEL_NUSES (label
) = 1;
11443 rtx label
= ix86_expand_aligntest (destreg
, 2);
11444 emit_insn (gen_strset (destreg
, dst
,
11445 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11446 ix86_adjust_counter (countreg
, 2);
11447 emit_label (label
);
11448 LABEL_NUSES (label
) = 1;
11450 if (align
<= 4 && desired_alignment
> 4)
11452 rtx label
= ix86_expand_aligntest (destreg
, 4);
11453 emit_insn (gen_strset (destreg
, dst
,
11455 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11457 ix86_adjust_counter (countreg
, 4);
11458 emit_label (label
);
11459 LABEL_NUSES (label
) = 1;
11462 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11464 emit_label (label
);
11465 LABEL_NUSES (label
) = 1;
11469 if (!TARGET_SINGLE_STRINGOP
)
11470 emit_insn (gen_cld ());
11473 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11475 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11479 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11480 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11482 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11483 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11487 emit_label (label
);
11488 LABEL_NUSES (label
) = 1;
11491 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11492 emit_insn (gen_strset (destreg
, dst
,
11493 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11494 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11496 rtx label
= ix86_expand_aligntest (countreg
, 4);
11497 emit_insn (gen_strset (destreg
, dst
,
11498 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11499 emit_label (label
);
11500 LABEL_NUSES (label
) = 1;
11502 if (align
> 2 && count
!= 0 && (count
& 2))
11503 emit_insn (gen_strset (destreg
, dst
,
11504 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11505 if (align
<= 2 || count
== 0)
11507 rtx label
= ix86_expand_aligntest (countreg
, 2);
11508 emit_insn (gen_strset (destreg
, dst
,
11509 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11510 emit_label (label
);
11511 LABEL_NUSES (label
) = 1;
11513 if (align
> 1 && count
!= 0 && (count
& 1))
11514 emit_insn (gen_strset (destreg
, dst
,
11515 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11516 if (align
<= 1 || count
== 0)
11518 rtx label
= ix86_expand_aligntest (countreg
, 1);
11519 emit_insn (gen_strset (destreg
, dst
,
11520 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11521 emit_label (label
);
11522 LABEL_NUSES (label
) = 1;
11528 /* Expand strlen. */
11530 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11532 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11534 /* The generic case of strlen expander is long. Avoid it's
11535 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11537 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11538 && !TARGET_INLINE_ALL_STRINGOPS
11540 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11543 addr
= force_reg (Pmode
, XEXP (src
, 0));
11544 scratch1
= gen_reg_rtx (Pmode
);
11546 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11549 /* Well it seems that some optimizer does not combine a call like
11550 foo(strlen(bar), strlen(bar));
11551 when the move and the subtraction is done here. It does calculate
11552 the length just once when these instructions are done inside of
11553 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11554 often used and I use one fewer register for the lifetime of
11555 output_strlen_unroll() this is better. */
11557 emit_move_insn (out
, addr
);
11559 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11561 /* strlensi_unroll_1 returns the address of the zero at the end of
11562 the string, like memchr(), so compute the length by subtracting
11563 the start address. */
11565 emit_insn (gen_subdi3 (out
, out
, addr
));
11567 emit_insn (gen_subsi3 (out
, out
, addr
));
11572 scratch2
= gen_reg_rtx (Pmode
);
11573 scratch3
= gen_reg_rtx (Pmode
);
11574 scratch4
= force_reg (Pmode
, constm1_rtx
);
11576 emit_move_insn (scratch3
, addr
);
11577 eoschar
= force_reg (QImode
, eoschar
);
11579 emit_insn (gen_cld ());
11580 src
= replace_equiv_address_nv (src
, scratch3
);
11582 /* If .md starts supporting :P, this can be done in .md. */
11583 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11584 scratch4
), UNSPEC_SCAS
);
11585 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11588 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11589 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11593 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11594 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11600 /* Expand the appropriate insns for doing strlen if not just doing
11603 out = result, initialized with the start address
11604 align_rtx = alignment of the address.
11605 scratch = scratch register, initialized with the startaddress when
11606 not aligned, otherwise undefined
11608 This is just the body. It needs the initializations mentioned above and
11609 some address computing at the end. These things are done in i386.md. */
11612 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11616 rtx align_2_label
= NULL_RTX
;
11617 rtx align_3_label
= NULL_RTX
;
11618 rtx align_4_label
= gen_label_rtx ();
11619 rtx end_0_label
= gen_label_rtx ();
11621 rtx tmpreg
= gen_reg_rtx (SImode
);
11622 rtx scratch
= gen_reg_rtx (SImode
);
11626 if (GET_CODE (align_rtx
) == CONST_INT
)
11627 align
= INTVAL (align_rtx
);
11629 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11631 /* Is there a known alignment and is it less than 4? */
11634 rtx scratch1
= gen_reg_rtx (Pmode
);
11635 emit_move_insn (scratch1
, out
);
11636 /* Is there a known alignment and is it not 2? */
11639 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11640 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11642 /* Leave just the 3 lower bits. */
11643 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11644 NULL_RTX
, 0, OPTAB_WIDEN
);
11646 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11647 Pmode
, 1, align_4_label
);
11648 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
11649 Pmode
, 1, align_2_label
);
11650 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
11651 Pmode
, 1, align_3_label
);
11655 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11656 check if is aligned to 4 - byte. */
11658 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
11659 NULL_RTX
, 0, OPTAB_WIDEN
);
11661 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11662 Pmode
, 1, align_4_label
);
11665 mem
= change_address (src
, QImode
, out
);
11667 /* Now compare the bytes. */
11669 /* Compare the first n unaligned byte on a byte per byte basis. */
11670 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11671 QImode
, 1, end_0_label
);
11673 /* Increment the address. */
11675 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11677 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11679 /* Not needed with an alignment of 2 */
11682 emit_label (align_2_label
);
11684 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11688 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11690 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11692 emit_label (align_3_label
);
11695 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11699 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11701 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11704 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11705 align this loop. It gives only huge programs, but does not help to
11707 emit_label (align_4_label
);
11709 mem
= change_address (src
, SImode
, out
);
11710 emit_move_insn (scratch
, mem
);
11712 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11714 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11716 /* This formula yields a nonzero result iff one of the bytes is zero.
11717 This saves three branches inside loop and many cycles. */
11719 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11720 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11721 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11722 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11723 gen_int_mode (0x80808080, SImode
)));
11724 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11729 rtx reg
= gen_reg_rtx (SImode
);
11730 rtx reg2
= gen_reg_rtx (Pmode
);
11731 emit_move_insn (reg
, tmpreg
);
11732 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11734 /* If zero is not in the first two bytes, move two bytes forward. */
11735 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11736 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11737 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11738 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11739 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11742 /* Emit lea manually to avoid clobbering of flags. */
11743 emit_insn (gen_rtx_SET (SImode
, reg2
,
11744 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
11746 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11747 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11748 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11749 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11756 rtx end_2_label
= gen_label_rtx ();
11757 /* Is zero in the first two bytes? */
11759 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11760 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11761 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11762 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11763 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11765 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11766 JUMP_LABEL (tmp
) = end_2_label
;
11768 /* Not in the first two. Move two bytes forward. */
11769 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11771 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
11773 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
11775 emit_label (end_2_label
);
11779 /* Avoid branch in fixing the byte. */
11780 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11781 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11782 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11784 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11786 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11788 emit_label (end_0_label
);
11792 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11793 rtx callarg2 ATTRIBUTE_UNUSED
,
11794 rtx pop
, int sibcall
)
11796 rtx use
= NULL
, call
;
11798 if (pop
== const0_rtx
)
11800 if (TARGET_64BIT
&& pop
)
11804 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11805 fnaddr
= machopic_indirect_call_target (fnaddr
);
11807 /* Static functions and indirect calls don't need the pic register. */
11808 if (! TARGET_64BIT
&& flag_pic
11809 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11810 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11811 use_reg (&use
, pic_offset_table_rtx
);
11813 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11815 rtx al
= gen_rtx_REG (QImode
, 0);
11816 emit_move_insn (al
, callarg2
);
11817 use_reg (&use
, al
);
11819 #endif /* TARGET_MACHO */
11821 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11823 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11824 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11826 if (sibcall
&& TARGET_64BIT
11827 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11830 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11831 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
11832 emit_move_insn (fnaddr
, addr
);
11833 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11836 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11838 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11841 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11842 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11843 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11846 call
= emit_call_insn (call
);
11848 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11852 /* Clear stack slot assignments remembered from previous functions.
11853 This is called from INIT_EXPANDERS once before RTL is emitted for each
11856 static struct machine_function
*
11857 ix86_init_machine_status (void)
11859 struct machine_function
*f
;
11861 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
11862 f
->use_fast_prologue_epilogue_nregs
= -1;
11867 /* Return a MEM corresponding to a stack slot with mode MODE.
11868 Allocate a new slot if necessary.
11870 The RTL for a function can have several slots available: N is
11871 which slot to use. */
11874 assign_386_stack_local (enum machine_mode mode
, int n
)
11876 struct stack_local_entry
*s
;
11878 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11881 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11882 if (s
->mode
== mode
&& s
->n
== n
)
11885 s
= (struct stack_local_entry
*)
11886 ggc_alloc (sizeof (struct stack_local_entry
));
11889 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11891 s
->next
= ix86_stack_locals
;
11892 ix86_stack_locals
= s
;
11896 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11898 static GTY(()) rtx ix86_tls_symbol
;
11900 ix86_tls_get_addr (void)
11903 if (!ix86_tls_symbol
)
11905 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11906 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11907 ? "___tls_get_addr"
11908 : "__tls_get_addr");
11911 return ix86_tls_symbol
;
11914 /* Calculate the length of the memory address in the instruction
11915 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11918 memory_address_length (rtx addr
)
11920 struct ix86_address parts
;
11921 rtx base
, index
, disp
;
11924 if (GET_CODE (addr
) == PRE_DEC
11925 || GET_CODE (addr
) == POST_INC
11926 || GET_CODE (addr
) == PRE_MODIFY
11927 || GET_CODE (addr
) == POST_MODIFY
)
11930 if (! ix86_decompose_address (addr
, &parts
))
11934 index
= parts
.index
;
11939 - esp as the base always wants an index,
11940 - ebp as the base always wants a displacement. */
11942 /* Register Indirect. */
11943 if (base
&& !index
&& !disp
)
11945 /* esp (for its index) and ebp (for its displacement) need
11946 the two-byte modrm form. */
11947 if (addr
== stack_pointer_rtx
11948 || addr
== arg_pointer_rtx
11949 || addr
== frame_pointer_rtx
11950 || addr
== hard_frame_pointer_rtx
)
11954 /* Direct Addressing. */
11955 else if (disp
&& !base
&& !index
)
11960 /* Find the length of the displacement constant. */
11963 if (GET_CODE (disp
) == CONST_INT
11964 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11970 /* ebp always wants a displacement. */
11971 else if (base
== hard_frame_pointer_rtx
)
11974 /* An index requires the two-byte modrm form.... */
11976 /* ...like esp, which always wants an index. */
11977 || base
== stack_pointer_rtx
11978 || base
== arg_pointer_rtx
11979 || base
== frame_pointer_rtx
)
11986 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11987 is set, expect that insn have 8bit immediate alternative. */
11989 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
11993 extract_insn_cached (insn
);
11994 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11995 if (CONSTANT_P (recog_data
.operand
[i
]))
12000 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
12001 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
12005 switch (get_attr_mode (insn
))
12016 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12021 fatal_insn ("unknown insn mode", insn
);
12027 /* Compute default value for "length_address" attribute. */
12029 ix86_attr_length_address_default (rtx insn
)
12033 if (get_attr_type (insn
) == TYPE_LEA
)
12035 rtx set
= PATTERN (insn
);
12036 if (GET_CODE (set
) == SET
)
12038 else if (GET_CODE (set
) == PARALLEL
12039 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
12040 set
= XVECEXP (set
, 0, 0);
12043 #ifdef ENABLE_CHECKING
12049 return memory_address_length (SET_SRC (set
));
12052 extract_insn_cached (insn
);
12053 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12054 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12056 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
12062 /* Return the maximum number of instructions a cpu can issue. */
12065 ix86_issue_rate (void)
12069 case PROCESSOR_PENTIUM
:
12073 case PROCESSOR_PENTIUMPRO
:
12074 case PROCESSOR_PENTIUM4
:
12075 case PROCESSOR_ATHLON
:
12084 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12085 by DEP_INSN and nothing set by DEP_INSN. */
12088 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12092 /* Simplify the test for uninteresting insns. */
12093 if (insn_type
!= TYPE_SETCC
12094 && insn_type
!= TYPE_ICMOV
12095 && insn_type
!= TYPE_FCMOV
12096 && insn_type
!= TYPE_IBR
)
12099 if ((set
= single_set (dep_insn
)) != 0)
12101 set
= SET_DEST (set
);
12104 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
12105 && XVECLEN (PATTERN (dep_insn
), 0) == 2
12106 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
12107 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
12109 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12110 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
12115 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
12118 /* This test is true if the dependent insn reads the flags but
12119 not any other potentially set register. */
12120 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
12123 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
12129 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12130 address with operands set by DEP_INSN. */
12133 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
12137 if (insn_type
== TYPE_LEA
12140 addr
= PATTERN (insn
);
12141 if (GET_CODE (addr
) == SET
)
12143 else if (GET_CODE (addr
) == PARALLEL
12144 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
12145 addr
= XVECEXP (addr
, 0, 0);
12148 addr
= SET_SRC (addr
);
12153 extract_insn_cached (insn
);
12154 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
12155 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
12157 addr
= XEXP (recog_data
.operand
[i
], 0);
12164 return modified_in_p (addr
, dep_insn
);
12168 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
12170 enum attr_type insn_type
, dep_insn_type
;
12171 enum attr_memory memory
, dep_memory
;
12173 int dep_insn_code_number
;
12175 /* Anti and output dependencies have zero cost on all CPUs. */
12176 if (REG_NOTE_KIND (link
) != 0)
12179 dep_insn_code_number
= recog_memoized (dep_insn
);
12181 /* If we can't recognize the insns, we can't really do anything. */
12182 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
12185 insn_type
= get_attr_type (insn
);
12186 dep_insn_type
= get_attr_type (dep_insn
);
12190 case PROCESSOR_PENTIUM
:
12191 /* Address Generation Interlock adds a cycle of latency. */
12192 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12195 /* ??? Compares pair with jump/setcc. */
12196 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
12199 /* Floating point stores require value to be ready one cycle earlier. */
12200 if (insn_type
== TYPE_FMOV
12201 && get_attr_memory (insn
) == MEMORY_STORE
12202 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12206 case PROCESSOR_PENTIUMPRO
:
12207 memory
= get_attr_memory (insn
);
12208 dep_memory
= get_attr_memory (dep_insn
);
12210 /* Since we can't represent delayed latencies of load+operation,
12211 increase the cost here for non-imov insns. */
12212 if (dep_insn_type
!= TYPE_IMOV
12213 && dep_insn_type
!= TYPE_FMOV
12214 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
12217 /* INT->FP conversion is expensive. */
12218 if (get_attr_fp_int_src (dep_insn
))
12221 /* There is one cycle extra latency between an FP op and a store. */
12222 if (insn_type
== TYPE_FMOV
12223 && (set
= single_set (dep_insn
)) != NULL_RTX
12224 && (set2
= single_set (insn
)) != NULL_RTX
12225 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
12226 && GET_CODE (SET_DEST (set2
)) == MEM
)
12229 /* Show ability of reorder buffer to hide latency of load by executing
12230 in parallel with previous instruction in case
12231 previous instruction is not needed to compute the address. */
12232 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12233 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12235 /* Claim moves to take one cycle, as core can issue one load
12236 at time and the next load can start cycle later. */
12237 if (dep_insn_type
== TYPE_IMOV
12238 || dep_insn_type
== TYPE_FMOV
)
12246 memory
= get_attr_memory (insn
);
12247 dep_memory
= get_attr_memory (dep_insn
);
12248 /* The esp dependency is resolved before the instruction is really
12250 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
12251 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
12254 /* Since we can't represent delayed latencies of load+operation,
12255 increase the cost here for non-imov insns. */
12256 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
12257 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
12259 /* INT->FP conversion is expensive. */
12260 if (get_attr_fp_int_src (dep_insn
))
12263 /* Show ability of reorder buffer to hide latency of load by executing
12264 in parallel with previous instruction in case
12265 previous instruction is not needed to compute the address. */
12266 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12267 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12269 /* Claim moves to take one cycle, as core can issue one load
12270 at time and the next load can start cycle later. */
12271 if (dep_insn_type
== TYPE_IMOV
12272 || dep_insn_type
== TYPE_FMOV
)
12281 case PROCESSOR_ATHLON
:
12283 memory
= get_attr_memory (insn
);
12284 dep_memory
= get_attr_memory (dep_insn
);
12286 /* Show ability of reorder buffer to hide latency of load by executing
12287 in parallel with previous instruction in case
12288 previous instruction is not needed to compute the address. */
12289 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
12290 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
12292 enum attr_unit unit
= get_attr_unit (insn
);
12295 /* Because of the difference between the length of integer and
12296 floating unit pipeline preparation stages, the memory operands
12297 for floating point are cheaper.
12299 ??? For Athlon it the difference is most probably 2. */
12300 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
12303 loadcost
= TARGET_ATHLON
? 2 : 0;
12305 if (cost
>= loadcost
)
12319 ia32_use_dfa_pipeline_interface (void)
12322 || TARGET_PENTIUMPRO
12323 || TARGET_ATHLON_K8
)
12328 /* How many alternative schedules to try. This should be as wide as the
12329 scheduling freedom in the DFA, but no wider. Making this value too
12330 large results extra work for the scheduler. */
12333 ia32_multipass_dfa_lookahead (void)
12335 if (ix86_tune
== PROCESSOR_PENTIUM
)
12338 if (ix86_tune
== PROCESSOR_PENTIUMPRO
)
12346 /* Compute the alignment given to a constant that is being placed in memory.
12347 EXP is the constant and ALIGN is the alignment that the object would
12349 The value of this function is used instead of that alignment to align
12353 ix86_constant_alignment (tree exp
, int align
)
12355 if (TREE_CODE (exp
) == REAL_CST
)
12357 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12359 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12362 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
12363 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
12364 return BITS_PER_WORD
;
12369 /* Compute the alignment for a static variable.
12370 TYPE is the data type, and ALIGN is the alignment that
12371 the object would ordinarily have. The value of this function is used
12372 instead of that alignment to align the object. */
12375 ix86_data_alignment (tree type
, int align
)
12377 if (AGGREGATE_TYPE_P (type
)
12378 && TYPE_SIZE (type
)
12379 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12380 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12381 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12384 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12385 to 16byte boundary. */
12388 if (AGGREGATE_TYPE_P (type
)
12389 && TYPE_SIZE (type
)
12390 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12391 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12392 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12396 if (TREE_CODE (type
) == ARRAY_TYPE
)
12398 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12400 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12403 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12406 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12408 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12411 else if ((TREE_CODE (type
) == RECORD_TYPE
12412 || TREE_CODE (type
) == UNION_TYPE
12413 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12414 && TYPE_FIELDS (type
))
12416 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12418 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12421 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12422 || TREE_CODE (type
) == INTEGER_TYPE
)
12424 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12426 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12433 /* Compute the alignment for a local variable.
12434 TYPE is the data type, and ALIGN is the alignment that
12435 the object would ordinarily have. The value of this macro is used
12436 instead of that alignment to align the object. */
12439 ix86_local_alignment (tree type
, int align
)
12441 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12442 to 16byte boundary. */
12445 if (AGGREGATE_TYPE_P (type
)
12446 && TYPE_SIZE (type
)
12447 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12448 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12449 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12452 if (TREE_CODE (type
) == ARRAY_TYPE
)
12454 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12456 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12459 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12461 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12463 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12466 else if ((TREE_CODE (type
) == RECORD_TYPE
12467 || TREE_CODE (type
) == UNION_TYPE
12468 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12469 && TYPE_FIELDS (type
))
12471 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12473 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12476 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12477 || TREE_CODE (type
) == INTEGER_TYPE
)
12480 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12482 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12488 /* Emit RTL insns to initialize the variable parts of a trampoline.
12489 FNADDR is an RTX for the address of the function's pure code.
12490 CXT is an RTX for the static chain value for the function. */
12492 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12496 /* Compute offset from the end of the jmp to the target function. */
12497 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12498 plus_constant (tramp
, 10),
12499 NULL_RTX
, 1, OPTAB_DIRECT
);
12500 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12501 gen_int_mode (0xb9, QImode
));
12502 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12503 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12504 gen_int_mode (0xe9, QImode
));
12505 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12510 /* Try to load address using shorter movl instead of movabs.
12511 We may want to support movq for kernel mode, but kernel does not use
12512 trampolines at the moment. */
12513 if (x86_64_zero_extended_value (fnaddr
))
12515 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12516 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12517 gen_int_mode (0xbb41, HImode
));
12518 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12519 gen_lowpart (SImode
, fnaddr
));
12524 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12525 gen_int_mode (0xbb49, HImode
));
12526 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12530 /* Load static chain using movabs to r10. */
12531 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12532 gen_int_mode (0xba49, HImode
));
12533 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12536 /* Jump to the r11 */
12537 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12538 gen_int_mode (0xff49, HImode
));
12539 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12540 gen_int_mode (0xe3, QImode
));
12542 if (offset
> TRAMPOLINE_SIZE
)
12546 #ifdef TRANSFER_FROM_TRAMPOLINE
12547 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
12548 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12552 #define def_builtin(MASK, NAME, TYPE, CODE) \
12554 if ((MASK) & target_flags \
12555 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12556 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12557 NULL, NULL_TREE); \
12560 struct builtin_description
12562 const unsigned int mask
;
12563 const enum insn_code icode
;
12564 const char *const name
;
12565 const enum ix86_builtins code
;
12566 const enum rtx_code comparison
;
12567 const unsigned int flag
;
12570 static const struct builtin_description bdesc_comi
[] =
12572 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12573 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12574 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12575 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12576 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12577 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12578 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12579 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12580 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12581 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12582 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12583 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12584 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12585 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12586 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12587 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12588 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12589 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12590 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12591 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12592 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12593 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12594 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12595 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12598 static const struct builtin_description bdesc_2arg
[] =
12601 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12602 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12603 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12604 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12605 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12606 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12607 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12608 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12610 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12611 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12612 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12613 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
12614 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
12615 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12616 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
12617 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
12618 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
12619 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
12620 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
12621 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
12622 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12623 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12624 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12625 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12626 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
12627 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
12628 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
12629 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12631 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12632 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12633 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12634 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12636 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12637 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12638 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12639 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12641 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12642 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12643 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12644 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12645 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12648 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12649 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12650 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12651 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
12652 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12653 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12654 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12655 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
12657 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12658 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12659 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12660 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12661 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12662 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12663 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12664 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12666 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12667 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12668 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12670 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12671 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12672 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12673 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12675 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12676 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12678 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12679 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12680 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12681 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12682 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12683 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12685 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12686 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12687 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12688 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12690 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12691 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12692 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12693 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12694 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12695 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12698 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12699 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12700 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12702 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12703 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12704 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
12706 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12707 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12708 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12709 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12710 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12711 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12713 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12714 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12715 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12716 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12717 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12718 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12720 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12721 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12722 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12723 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12725 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12726 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12729 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12730 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12731 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12732 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12733 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12734 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12735 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12736 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12738 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12739 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12740 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12741 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12742 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12743 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12744 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12745 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12746 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12747 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12748 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12749 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12750 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12751 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12752 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12753 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12754 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12755 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12756 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12757 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12759 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12760 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12761 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12762 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12764 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12765 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12766 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12767 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12769 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12770 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12771 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12774 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12775 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12776 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12777 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12778 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12779 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12780 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12781 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12783 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12784 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12785 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12786 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12787 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12788 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12789 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12790 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12792 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12793 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12794 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
12795 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12797 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12798 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12799 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12800 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12802 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12803 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12805 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12806 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12807 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12808 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12809 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12810 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12812 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12813 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12814 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12815 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12817 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12818 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12819 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12820 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12821 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12822 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12823 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12824 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12826 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12827 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12828 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12830 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12831 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12833 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12834 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12835 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12836 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12837 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12838 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12840 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12841 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12842 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12843 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12844 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12845 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12847 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12848 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12849 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12850 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12852 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12854 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12855 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
12856 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12857 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
12860 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
12861 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
12862 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
12863 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
12864 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
12865 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
12868 static const struct builtin_description bdesc_1arg
[] =
12870 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12871 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12873 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12874 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12875 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12877 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12878 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12879 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
12880 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12881 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12882 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
12884 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12885 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12886 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12887 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12889 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12891 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12892 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12894 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12895 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12896 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12897 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12898 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12900 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12902 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12903 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12904 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
12905 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
12907 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12908 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12909 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12911 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
12914 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
12915 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
12916 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
12920 ix86_init_builtins (void)
12923 ix86_init_mmx_sse_builtins ();
12926 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12927 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12930 ix86_init_mmx_sse_builtins (void)
12932 const struct builtin_description
* d
;
12935 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
12936 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
12937 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
12938 tree V2DI_type_node
= build_vector_type_for_mode (intDI_type_node
, V2DImode
);
12939 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
12940 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
12941 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
12942 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
12943 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
12944 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
12946 tree pchar_type_node
= build_pointer_type (char_type_node
);
12947 tree pcchar_type_node
= build_pointer_type (
12948 build_type_variant (char_type_node
, 1, 0));
12949 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12950 tree pcfloat_type_node
= build_pointer_type (
12951 build_type_variant (float_type_node
, 1, 0));
12952 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12953 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12954 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12957 tree int_ftype_v4sf_v4sf
12958 = build_function_type_list (integer_type_node
,
12959 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12960 tree v4si_ftype_v4sf_v4sf
12961 = build_function_type_list (V4SI_type_node
,
12962 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12963 /* MMX/SSE/integer conversions. */
12964 tree int_ftype_v4sf
12965 = build_function_type_list (integer_type_node
,
12966 V4SF_type_node
, NULL_TREE
);
12967 tree int64_ftype_v4sf
12968 = build_function_type_list (long_long_integer_type_node
,
12969 V4SF_type_node
, NULL_TREE
);
12970 tree int_ftype_v8qi
12971 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12972 tree v4sf_ftype_v4sf_int
12973 = build_function_type_list (V4SF_type_node
,
12974 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12975 tree v4sf_ftype_v4sf_int64
12976 = build_function_type_list (V4SF_type_node
,
12977 V4SF_type_node
, long_long_integer_type_node
,
12979 tree v4sf_ftype_v4sf_v2si
12980 = build_function_type_list (V4SF_type_node
,
12981 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12982 tree int_ftype_v4hi_int
12983 = build_function_type_list (integer_type_node
,
12984 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12985 tree v4hi_ftype_v4hi_int_int
12986 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12987 integer_type_node
, integer_type_node
,
12989 /* Miscellaneous. */
12990 tree v8qi_ftype_v4hi_v4hi
12991 = build_function_type_list (V8QI_type_node
,
12992 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12993 tree v4hi_ftype_v2si_v2si
12994 = build_function_type_list (V4HI_type_node
,
12995 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12996 tree v4sf_ftype_v4sf_v4sf_int
12997 = build_function_type_list (V4SF_type_node
,
12998 V4SF_type_node
, V4SF_type_node
,
12999 integer_type_node
, NULL_TREE
);
13000 tree v2si_ftype_v4hi_v4hi
13001 = build_function_type_list (V2SI_type_node
,
13002 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13003 tree v4hi_ftype_v4hi_int
13004 = build_function_type_list (V4HI_type_node
,
13005 V4HI_type_node
, integer_type_node
, NULL_TREE
);
13006 tree v4hi_ftype_v4hi_di
13007 = build_function_type_list (V4HI_type_node
,
13008 V4HI_type_node
, long_long_unsigned_type_node
,
13010 tree v2si_ftype_v2si_di
13011 = build_function_type_list (V2SI_type_node
,
13012 V2SI_type_node
, long_long_unsigned_type_node
,
13014 tree void_ftype_void
13015 = build_function_type (void_type_node
, void_list_node
);
13016 tree void_ftype_unsigned
13017 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
13018 tree void_ftype_unsigned_unsigned
13019 = build_function_type_list (void_type_node
, unsigned_type_node
,
13020 unsigned_type_node
, NULL_TREE
);
13021 tree void_ftype_pcvoid_unsigned_unsigned
13022 = build_function_type_list (void_type_node
, const_ptr_type_node
,
13023 unsigned_type_node
, unsigned_type_node
,
13025 tree unsigned_ftype_void
13026 = build_function_type (unsigned_type_node
, void_list_node
);
13028 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
13029 tree v4sf_ftype_void
13030 = build_function_type (V4SF_type_node
, void_list_node
);
13031 tree v2si_ftype_v4sf
13032 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
13033 /* Loads/stores. */
13034 tree void_ftype_v8qi_v8qi_pchar
13035 = build_function_type_list (void_type_node
,
13036 V8QI_type_node
, V8QI_type_node
,
13037 pchar_type_node
, NULL_TREE
);
13038 tree v4sf_ftype_pcfloat
13039 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
13040 /* @@@ the type is bogus */
13041 tree v4sf_ftype_v4sf_pv2si
13042 = build_function_type_list (V4SF_type_node
,
13043 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
13044 tree void_ftype_pv2si_v4sf
13045 = build_function_type_list (void_type_node
,
13046 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
13047 tree void_ftype_pfloat_v4sf
13048 = build_function_type_list (void_type_node
,
13049 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
13050 tree void_ftype_pdi_di
13051 = build_function_type_list (void_type_node
,
13052 pdi_type_node
, long_long_unsigned_type_node
,
13054 tree void_ftype_pv2di_v2di
13055 = build_function_type_list (void_type_node
,
13056 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
13057 /* Normal vector unops. */
13058 tree v4sf_ftype_v4sf
13059 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13061 /* Normal vector binops. */
13062 tree v4sf_ftype_v4sf_v4sf
13063 = build_function_type_list (V4SF_type_node
,
13064 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
13065 tree v8qi_ftype_v8qi_v8qi
13066 = build_function_type_list (V8QI_type_node
,
13067 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13068 tree v4hi_ftype_v4hi_v4hi
13069 = build_function_type_list (V4HI_type_node
,
13070 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
13071 tree v2si_ftype_v2si_v2si
13072 = build_function_type_list (V2SI_type_node
,
13073 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13074 tree di_ftype_di_di
13075 = build_function_type_list (long_long_unsigned_type_node
,
13076 long_long_unsigned_type_node
,
13077 long_long_unsigned_type_node
, NULL_TREE
);
13079 tree v2si_ftype_v2sf
13080 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
13081 tree v2sf_ftype_v2si
13082 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
13083 tree v2si_ftype_v2si
13084 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
13085 tree v2sf_ftype_v2sf
13086 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13087 tree v2sf_ftype_v2sf_v2sf
13088 = build_function_type_list (V2SF_type_node
,
13089 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13090 tree v2si_ftype_v2sf_v2sf
13091 = build_function_type_list (V2SI_type_node
,
13092 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
13093 tree pint_type_node
= build_pointer_type (integer_type_node
);
13094 tree pcint_type_node
= build_pointer_type (
13095 build_type_variant (integer_type_node
, 1, 0));
13096 tree pdouble_type_node
= build_pointer_type (double_type_node
);
13097 tree pcdouble_type_node
= build_pointer_type (
13098 build_type_variant (double_type_node
, 1, 0));
13099 tree int_ftype_v2df_v2df
13100 = build_function_type_list (integer_type_node
,
13101 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13104 = build_function_type (intTI_type_node
, void_list_node
);
13105 tree v2di_ftype_void
13106 = build_function_type (V2DI_type_node
, void_list_node
);
13107 tree ti_ftype_ti_ti
13108 = build_function_type_list (intTI_type_node
,
13109 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13110 tree void_ftype_pcvoid
13111 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13113 = build_function_type_list (V2DI_type_node
,
13114 long_long_unsigned_type_node
, NULL_TREE
);
13116 = build_function_type_list (long_long_unsigned_type_node
,
13117 V2DI_type_node
, NULL_TREE
);
13118 tree v4sf_ftype_v4si
13119 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13120 tree v4si_ftype_v4sf
13121 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13122 tree v2df_ftype_v4si
13123 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13124 tree v4si_ftype_v2df
13125 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13126 tree v2si_ftype_v2df
13127 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13128 tree v4sf_ftype_v2df
13129 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13130 tree v2df_ftype_v2si
13131 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13132 tree v2df_ftype_v4sf
13133 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13134 tree int_ftype_v2df
13135 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13136 tree int64_ftype_v2df
13137 = build_function_type_list (long_long_integer_type_node
,
13138 V2DF_type_node
, NULL_TREE
);
13139 tree v2df_ftype_v2df_int
13140 = build_function_type_list (V2DF_type_node
,
13141 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13142 tree v2df_ftype_v2df_int64
13143 = build_function_type_list (V2DF_type_node
,
13144 V2DF_type_node
, long_long_integer_type_node
,
13146 tree v4sf_ftype_v4sf_v2df
13147 = build_function_type_list (V4SF_type_node
,
13148 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13149 tree v2df_ftype_v2df_v4sf
13150 = build_function_type_list (V2DF_type_node
,
13151 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13152 tree v2df_ftype_v2df_v2df_int
13153 = build_function_type_list (V2DF_type_node
,
13154 V2DF_type_node
, V2DF_type_node
,
13157 tree v2df_ftype_v2df_pv2si
13158 = build_function_type_list (V2DF_type_node
,
13159 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
13160 tree void_ftype_pv2si_v2df
13161 = build_function_type_list (void_type_node
,
13162 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
13163 tree void_ftype_pdouble_v2df
13164 = build_function_type_list (void_type_node
,
13165 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13166 tree void_ftype_pint_int
13167 = build_function_type_list (void_type_node
,
13168 pint_type_node
, integer_type_node
, NULL_TREE
);
13169 tree void_ftype_v16qi_v16qi_pchar
13170 = build_function_type_list (void_type_node
,
13171 V16QI_type_node
, V16QI_type_node
,
13172 pchar_type_node
, NULL_TREE
);
13173 tree v2df_ftype_pcdouble
13174 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13175 tree v2df_ftype_v2df_v2df
13176 = build_function_type_list (V2DF_type_node
,
13177 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13178 tree v16qi_ftype_v16qi_v16qi
13179 = build_function_type_list (V16QI_type_node
,
13180 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13181 tree v8hi_ftype_v8hi_v8hi
13182 = build_function_type_list (V8HI_type_node
,
13183 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13184 tree v4si_ftype_v4si_v4si
13185 = build_function_type_list (V4SI_type_node
,
13186 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13187 tree v2di_ftype_v2di_v2di
13188 = build_function_type_list (V2DI_type_node
,
13189 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13190 tree v2di_ftype_v2df_v2df
13191 = build_function_type_list (V2DI_type_node
,
13192 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13193 tree v2df_ftype_v2df
13194 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13195 tree v2df_ftype_double
13196 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
13197 tree v2df_ftype_double_double
13198 = build_function_type_list (V2DF_type_node
,
13199 double_type_node
, double_type_node
, NULL_TREE
);
13200 tree int_ftype_v8hi_int
13201 = build_function_type_list (integer_type_node
,
13202 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13203 tree v8hi_ftype_v8hi_int_int
13204 = build_function_type_list (V8HI_type_node
,
13205 V8HI_type_node
, integer_type_node
,
13206 integer_type_node
, NULL_TREE
);
13207 tree v2di_ftype_v2di_int
13208 = build_function_type_list (V2DI_type_node
,
13209 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13210 tree v4si_ftype_v4si_int
13211 = build_function_type_list (V4SI_type_node
,
13212 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13213 tree v8hi_ftype_v8hi_int
13214 = build_function_type_list (V8HI_type_node
,
13215 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13216 tree v8hi_ftype_v8hi_v2di
13217 = build_function_type_list (V8HI_type_node
,
13218 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13219 tree v4si_ftype_v4si_v2di
13220 = build_function_type_list (V4SI_type_node
,
13221 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13222 tree v4si_ftype_v8hi_v8hi
13223 = build_function_type_list (V4SI_type_node
,
13224 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13225 tree di_ftype_v8qi_v8qi
13226 = build_function_type_list (long_long_unsigned_type_node
,
13227 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13228 tree v2di_ftype_v16qi_v16qi
13229 = build_function_type_list (V2DI_type_node
,
13230 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13231 tree int_ftype_v16qi
13232 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13233 tree v16qi_ftype_pcchar
13234 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13235 tree void_ftype_pchar_v16qi
13236 = build_function_type_list (void_type_node
,
13237 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13238 tree v4si_ftype_pcint
13239 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
13240 tree void_ftype_pcint_v4si
13241 = build_function_type_list (void_type_node
,
13242 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
13243 tree v2di_ftype_v2di
13244 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13247 tree float128_type
;
13249 /* The __float80 type. */
13250 if (TYPE_MODE (long_double_type_node
) == XFmode
)
13251 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
13255 /* The __float80 type. */
13256 float80_type
= make_node (REAL_TYPE
);
13257 TYPE_PRECISION (float80_type
) = 96;
13258 layout_type (float80_type
);
13259 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
13262 float128_type
= make_node (REAL_TYPE
);
13263 TYPE_PRECISION (float128_type
) = 128;
13264 layout_type (float128_type
);
13265 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
13267 /* Add all builtins that are more or less simple operations on two
13269 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13271 /* Use one of the operands; the target can have a different mode for
13272 mask-generating compares. */
13273 enum machine_mode mode
;
13278 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13283 type
= v16qi_ftype_v16qi_v16qi
;
13286 type
= v8hi_ftype_v8hi_v8hi
;
13289 type
= v4si_ftype_v4si_v4si
;
13292 type
= v2di_ftype_v2di_v2di
;
13295 type
= v2df_ftype_v2df_v2df
;
13298 type
= ti_ftype_ti_ti
;
13301 type
= v4sf_ftype_v4sf_v4sf
;
13304 type
= v8qi_ftype_v8qi_v8qi
;
13307 type
= v4hi_ftype_v4hi_v4hi
;
13310 type
= v2si_ftype_v2si_v2si
;
13313 type
= di_ftype_di_di
;
13320 /* Override for comparisons. */
13321 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13322 || d
->icode
== CODE_FOR_maskncmpv4sf3
13323 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13324 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
13325 type
= v4si_ftype_v4sf_v4sf
;
13327 if (d
->icode
== CODE_FOR_maskcmpv2df3
13328 || d
->icode
== CODE_FOR_maskncmpv2df3
13329 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13330 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13331 type
= v2di_ftype_v2df_v2df
;
13333 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13336 /* Add the remaining MMX insns with somewhat more complicated types. */
13337 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
13338 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13339 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13340 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
13341 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
13343 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
13344 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
13345 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
13347 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
13348 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
13350 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
13351 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
13353 /* comi/ucomi insns. */
13354 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13355 if (d
->mask
== MASK_SSE2
)
13356 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
13358 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
13360 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
13361 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
13362 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
13364 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
13365 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
13366 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
13367 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
13368 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
13369 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
13370 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
13371 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
13372 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
13373 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
13374 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
13376 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
13377 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
13379 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
13381 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
13382 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
13383 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
13384 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
13385 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
13386 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
13388 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
13389 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
13390 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
13391 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13393 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13394 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13395 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13396 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13398 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13400 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13402 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13403 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13404 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13405 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13406 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13407 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13409 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13411 /* Original 3DNow! */
13412 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13413 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13414 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13415 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13416 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13417 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13418 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13419 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13420 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13421 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13422 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13423 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13424 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13425 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13426 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13427 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13428 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13429 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13430 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13431 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13433 /* 3DNow! extension as used in the Athlon CPU. */
13434 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13435 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13436 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13437 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13438 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13439 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13441 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13444 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13445 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13447 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13448 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13449 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13451 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13452 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13453 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13454 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13455 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13456 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13458 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
13459 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
13460 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
13461 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
13463 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13464 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13465 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13466 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13467 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13469 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13470 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13471 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13472 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13474 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13475 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13477 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13479 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13480 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13482 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13483 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13484 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13485 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13486 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13488 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13490 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13491 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13492 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13493 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13495 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13496 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13497 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13499 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13500 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13501 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13502 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13504 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13505 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13506 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13507 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13508 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13509 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13510 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13512 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13513 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13514 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13516 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13517 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13518 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13519 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13520 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13521 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13522 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13524 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13526 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13527 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13528 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13530 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13531 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13532 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13534 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13535 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13537 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13538 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13539 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13540 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13542 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13543 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13544 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13545 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13547 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13548 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13550 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13552 /* Prescott New Instructions. */
13553 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
13554 void_ftype_pcvoid_unsigned_unsigned
,
13555 IX86_BUILTIN_MONITOR
);
13556 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
13557 void_ftype_unsigned_unsigned
,
13558 IX86_BUILTIN_MWAIT
);
13559 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
13561 IX86_BUILTIN_MOVSHDUP
);
13562 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
13564 IX86_BUILTIN_MOVSLDUP
);
13565 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
13566 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
13567 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
13568 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
13569 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
13570 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
13573 /* Errors in the source file can cause expand_expr to return const0_rtx
13574 where we expect a vector. To avoid crashing, use one of the vector
13575 clear instructions. */
13577 safe_vector_operand (rtx x
, enum machine_mode mode
)
13579 if (x
!= const0_rtx
)
13581 x
= gen_reg_rtx (mode
);
13583 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
13584 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
13585 : gen_rtx_SUBREG (DImode
, x
, 0)));
13587 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
13588 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
13589 CONST0_RTX (V4SFmode
)));
13593 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13596 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13599 tree arg0
= TREE_VALUE (arglist
);
13600 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13601 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13602 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13603 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13604 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13605 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13607 if (VECTOR_MODE_P (mode0
))
13608 op0
= safe_vector_operand (op0
, mode0
);
13609 if (VECTOR_MODE_P (mode1
))
13610 op1
= safe_vector_operand (op1
, mode1
);
13613 || GET_MODE (target
) != tmode
13614 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13615 target
= gen_reg_rtx (tmode
);
13617 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
13619 rtx x
= gen_reg_rtx (V4SImode
);
13620 emit_insn (gen_sse2_loadd (x
, op1
));
13621 op1
= gen_lowpart (TImode
, x
);
13624 /* In case the insn wants input operands in modes different from
13625 the result, abort. */
13626 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
13627 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
13630 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13631 op0
= copy_to_mode_reg (mode0
, op0
);
13632 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13633 op1
= copy_to_mode_reg (mode1
, op1
);
13635 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13636 yet one of the two must not be a memory. This is normally enforced
13637 by expanders, but we didn't bother to create one here. */
13638 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
13639 op0
= copy_to_mode_reg (mode0
, op0
);
13641 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13648 /* Subroutine of ix86_expand_builtin to take care of stores. */
13651 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
13654 tree arg0
= TREE_VALUE (arglist
);
13655 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13656 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13657 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13658 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13659 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13661 if (VECTOR_MODE_P (mode1
))
13662 op1
= safe_vector_operand (op1
, mode1
);
13664 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13665 op1
= copy_to_mode_reg (mode1
, op1
);
13667 pat
= GEN_FCN (icode
) (op0
, op1
);
13673 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13676 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
13677 rtx target
, int do_load
)
13680 tree arg0
= TREE_VALUE (arglist
);
13681 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13682 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13683 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13686 || GET_MODE (target
) != tmode
13687 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13688 target
= gen_reg_rtx (tmode
);
13690 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13693 if (VECTOR_MODE_P (mode0
))
13694 op0
= safe_vector_operand (op0
, mode0
);
13696 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13697 op0
= copy_to_mode_reg (mode0
, op0
);
13700 pat
= GEN_FCN (icode
) (target
, op0
);
13707 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13708 sqrtss, rsqrtss, rcpss. */
13711 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13714 tree arg0
= TREE_VALUE (arglist
);
13715 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13716 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13717 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13720 || GET_MODE (target
) != tmode
13721 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13722 target
= gen_reg_rtx (tmode
);
13724 if (VECTOR_MODE_P (mode0
))
13725 op0
= safe_vector_operand (op0
, mode0
);
13727 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13728 op0
= copy_to_mode_reg (mode0
, op0
);
13731 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13732 op1
= copy_to_mode_reg (mode0
, op1
);
13734 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13741 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13744 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
13748 tree arg0
= TREE_VALUE (arglist
);
13749 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13750 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13751 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13753 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13754 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13755 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13756 enum rtx_code comparison
= d
->comparison
;
13758 if (VECTOR_MODE_P (mode0
))
13759 op0
= safe_vector_operand (op0
, mode0
);
13760 if (VECTOR_MODE_P (mode1
))
13761 op1
= safe_vector_operand (op1
, mode1
);
13763 /* Swap operands if we have a comparison that isn't available in
13767 rtx tmp
= gen_reg_rtx (mode1
);
13768 emit_move_insn (tmp
, op1
);
13774 || GET_MODE (target
) != tmode
13775 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13776 target
= gen_reg_rtx (tmode
);
13778 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13779 op0
= copy_to_mode_reg (mode0
, op0
);
13780 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13781 op1
= copy_to_mode_reg (mode1
, op1
);
13783 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13784 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13791 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13794 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
13798 tree arg0
= TREE_VALUE (arglist
);
13799 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13800 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13801 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13803 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13804 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13805 enum rtx_code comparison
= d
->comparison
;
13807 if (VECTOR_MODE_P (mode0
))
13808 op0
= safe_vector_operand (op0
, mode0
);
13809 if (VECTOR_MODE_P (mode1
))
13810 op1
= safe_vector_operand (op1
, mode1
);
13812 /* Swap operands if we have a comparison that isn't available in
13821 target
= gen_reg_rtx (SImode
);
13822 emit_move_insn (target
, const0_rtx
);
13823 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13825 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13826 op0
= copy_to_mode_reg (mode0
, op0
);
13827 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13828 op1
= copy_to_mode_reg (mode1
, op1
);
13830 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13831 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13835 emit_insn (gen_rtx_SET (VOIDmode
,
13836 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13837 gen_rtx_fmt_ee (comparison
, QImode
,
13841 return SUBREG_REG (target
);
13844 /* Expand an expression EXP that calls a built-in function,
13845 with result going to TARGET if that's convenient
13846 (and in mode MODE if that's convenient).
13847 SUBTARGET may be used as the target for computing one of EXP's operands.
13848 IGNORE is nonzero if the value is to be ignored. */
13851 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
13852 enum machine_mode mode ATTRIBUTE_UNUSED
,
13853 int ignore ATTRIBUTE_UNUSED
)
13855 const struct builtin_description
*d
;
13857 enum insn_code icode
;
13858 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13859 tree arglist
= TREE_OPERAND (exp
, 1);
13860 tree arg0
, arg1
, arg2
;
13861 rtx op0
, op1
, op2
, pat
;
13862 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13863 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13867 case IX86_BUILTIN_EMMS
:
13868 emit_insn (gen_emms ());
13871 case IX86_BUILTIN_SFENCE
:
13872 emit_insn (gen_sfence ());
13875 case IX86_BUILTIN_PEXTRW
:
13876 case IX86_BUILTIN_PEXTRW128
:
13877 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13878 ? CODE_FOR_mmx_pextrw
13879 : CODE_FOR_sse2_pextrw
);
13880 arg0
= TREE_VALUE (arglist
);
13881 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13882 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13883 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13884 tmode
= insn_data
[icode
].operand
[0].mode
;
13885 mode0
= insn_data
[icode
].operand
[1].mode
;
13886 mode1
= insn_data
[icode
].operand
[2].mode
;
13888 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13889 op0
= copy_to_mode_reg (mode0
, op0
);
13890 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13892 error ("selector must be an integer constant in the range 0..%i",
13893 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
13894 return gen_reg_rtx (tmode
);
13897 || GET_MODE (target
) != tmode
13898 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13899 target
= gen_reg_rtx (tmode
);
13900 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13906 case IX86_BUILTIN_PINSRW
:
13907 case IX86_BUILTIN_PINSRW128
:
13908 icode
= (fcode
== IX86_BUILTIN_PINSRW
13909 ? CODE_FOR_mmx_pinsrw
13910 : CODE_FOR_sse2_pinsrw
);
13911 arg0
= TREE_VALUE (arglist
);
13912 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13913 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13914 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13915 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13916 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13917 tmode
= insn_data
[icode
].operand
[0].mode
;
13918 mode0
= insn_data
[icode
].operand
[1].mode
;
13919 mode1
= insn_data
[icode
].operand
[2].mode
;
13920 mode2
= insn_data
[icode
].operand
[3].mode
;
13922 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13923 op0
= copy_to_mode_reg (mode0
, op0
);
13924 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13925 op1
= copy_to_mode_reg (mode1
, op1
);
13926 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13928 error ("selector must be an integer constant in the range 0..%i",
13929 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
13933 || GET_MODE (target
) != tmode
13934 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13935 target
= gen_reg_rtx (tmode
);
13936 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13942 case IX86_BUILTIN_MASKMOVQ
:
13943 case IX86_BUILTIN_MASKMOVDQU
:
13944 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13945 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13946 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13947 : CODE_FOR_sse2_maskmovdqu
));
13948 /* Note the arg order is different from the operand order. */
13949 arg1
= TREE_VALUE (arglist
);
13950 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13951 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13952 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13953 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13954 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13955 mode0
= insn_data
[icode
].operand
[0].mode
;
13956 mode1
= insn_data
[icode
].operand
[1].mode
;
13957 mode2
= insn_data
[icode
].operand
[2].mode
;
13959 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13960 op0
= copy_to_mode_reg (mode0
, op0
);
13961 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13962 op1
= copy_to_mode_reg (mode1
, op1
);
13963 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13964 op2
= copy_to_mode_reg (mode2
, op2
);
13965 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13971 case IX86_BUILTIN_SQRTSS
:
13972 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13973 case IX86_BUILTIN_RSQRTSS
:
13974 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13975 case IX86_BUILTIN_RCPSS
:
13976 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13978 case IX86_BUILTIN_LOADAPS
:
13979 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13981 case IX86_BUILTIN_LOADUPS
:
13982 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13984 case IX86_BUILTIN_STOREAPS
:
13985 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13987 case IX86_BUILTIN_STOREUPS
:
13988 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13990 case IX86_BUILTIN_LOADSS
:
13991 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13993 case IX86_BUILTIN_STORESS
:
13994 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13996 case IX86_BUILTIN_LOADHPS
:
13997 case IX86_BUILTIN_LOADLPS
:
13998 case IX86_BUILTIN_LOADHPD
:
13999 case IX86_BUILTIN_LOADLPD
:
14000 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
14001 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
14002 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
14003 : CODE_FOR_sse2_movsd
);
14004 arg0
= TREE_VALUE (arglist
);
14005 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14006 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14007 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14008 tmode
= insn_data
[icode
].operand
[0].mode
;
14009 mode0
= insn_data
[icode
].operand
[1].mode
;
14010 mode1
= insn_data
[icode
].operand
[2].mode
;
14012 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14013 op0
= copy_to_mode_reg (mode0
, op0
);
14014 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
14016 || GET_MODE (target
) != tmode
14017 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14018 target
= gen_reg_rtx (tmode
);
14019 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14025 case IX86_BUILTIN_STOREHPS
:
14026 case IX86_BUILTIN_STORELPS
:
14027 case IX86_BUILTIN_STOREHPD
:
14028 case IX86_BUILTIN_STORELPD
:
14029 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
14030 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
14031 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
14032 : CODE_FOR_sse2_movsd
);
14033 arg0
= TREE_VALUE (arglist
);
14034 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14035 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14036 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14037 mode0
= insn_data
[icode
].operand
[1].mode
;
14038 mode1
= insn_data
[icode
].operand
[2].mode
;
14040 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
14041 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14042 op1
= copy_to_mode_reg (mode1
, op1
);
14044 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
14050 case IX86_BUILTIN_MOVNTPS
:
14051 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
14052 case IX86_BUILTIN_MOVNTQ
:
14053 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
14055 case IX86_BUILTIN_LDMXCSR
:
14056 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
14057 target
= assign_386_stack_local (SImode
, 0);
14058 emit_move_insn (target
, op0
);
14059 emit_insn (gen_ldmxcsr (target
));
14062 case IX86_BUILTIN_STMXCSR
:
14063 target
= assign_386_stack_local (SImode
, 0);
14064 emit_insn (gen_stmxcsr (target
));
14065 return copy_to_mode_reg (SImode
, target
);
14067 case IX86_BUILTIN_SHUFPS
:
14068 case IX86_BUILTIN_SHUFPD
:
14069 icode
= (fcode
== IX86_BUILTIN_SHUFPS
14070 ? CODE_FOR_sse_shufps
14071 : CODE_FOR_sse2_shufpd
);
14072 arg0
= TREE_VALUE (arglist
);
14073 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14074 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14075 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14076 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14077 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14078 tmode
= insn_data
[icode
].operand
[0].mode
;
14079 mode0
= insn_data
[icode
].operand
[1].mode
;
14080 mode1
= insn_data
[icode
].operand
[2].mode
;
14081 mode2
= insn_data
[icode
].operand
[3].mode
;
14083 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14084 op0
= copy_to_mode_reg (mode0
, op0
);
14085 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14086 op1
= copy_to_mode_reg (mode1
, op1
);
14087 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14089 /* @@@ better error message */
14090 error ("mask must be an immediate");
14091 return gen_reg_rtx (tmode
);
14094 || GET_MODE (target
) != tmode
14095 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14096 target
= gen_reg_rtx (tmode
);
14097 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
14103 case IX86_BUILTIN_PSHUFW
:
14104 case IX86_BUILTIN_PSHUFD
:
14105 case IX86_BUILTIN_PSHUFHW
:
14106 case IX86_BUILTIN_PSHUFLW
:
14107 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
14108 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
14109 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
14110 : CODE_FOR_mmx_pshufw
);
14111 arg0
= TREE_VALUE (arglist
);
14112 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14113 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14114 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14115 tmode
= insn_data
[icode
].operand
[0].mode
;
14116 mode1
= insn_data
[icode
].operand
[1].mode
;
14117 mode2
= insn_data
[icode
].operand
[2].mode
;
14119 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14120 op0
= copy_to_mode_reg (mode1
, op0
);
14121 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14123 /* @@@ better error message */
14124 error ("mask must be an immediate");
14128 || GET_MODE (target
) != tmode
14129 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14130 target
= gen_reg_rtx (tmode
);
14131 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14137 case IX86_BUILTIN_PSLLDQI128
:
14138 case IX86_BUILTIN_PSRLDQI128
:
14139 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
14140 : CODE_FOR_sse2_lshrti3
);
14141 arg0
= TREE_VALUE (arglist
);
14142 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14143 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14144 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14145 tmode
= insn_data
[icode
].operand
[0].mode
;
14146 mode1
= insn_data
[icode
].operand
[1].mode
;
14147 mode2
= insn_data
[icode
].operand
[2].mode
;
14149 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14151 op0
= copy_to_reg (op0
);
14152 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14154 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14156 error ("shift must be an immediate");
14159 target
= gen_reg_rtx (V2DImode
);
14160 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14166 case IX86_BUILTIN_FEMMS
:
14167 emit_insn (gen_femms ());
14170 case IX86_BUILTIN_PAVGUSB
:
14171 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
14173 case IX86_BUILTIN_PF2ID
:
14174 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
14176 case IX86_BUILTIN_PFACC
:
14177 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
14179 case IX86_BUILTIN_PFADD
:
14180 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
14182 case IX86_BUILTIN_PFCMPEQ
:
14183 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
14185 case IX86_BUILTIN_PFCMPGE
:
14186 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
14188 case IX86_BUILTIN_PFCMPGT
:
14189 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
14191 case IX86_BUILTIN_PFMAX
:
14192 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
14194 case IX86_BUILTIN_PFMIN
:
14195 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
14197 case IX86_BUILTIN_PFMUL
:
14198 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
14200 case IX86_BUILTIN_PFRCP
:
14201 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
14203 case IX86_BUILTIN_PFRCPIT1
:
14204 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
14206 case IX86_BUILTIN_PFRCPIT2
:
14207 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
14209 case IX86_BUILTIN_PFRSQIT1
:
14210 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
14212 case IX86_BUILTIN_PFRSQRT
:
14213 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
14215 case IX86_BUILTIN_PFSUB
:
14216 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
14218 case IX86_BUILTIN_PFSUBR
:
14219 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
14221 case IX86_BUILTIN_PI2FD
:
14222 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
14224 case IX86_BUILTIN_PMULHRW
:
14225 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
14227 case IX86_BUILTIN_PF2IW
:
14228 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
14230 case IX86_BUILTIN_PFNACC
:
14231 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
14233 case IX86_BUILTIN_PFPNACC
:
14234 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
14236 case IX86_BUILTIN_PI2FW
:
14237 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
14239 case IX86_BUILTIN_PSWAPDSI
:
14240 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
14242 case IX86_BUILTIN_PSWAPDSF
:
14243 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
14245 case IX86_BUILTIN_SSE_ZERO
:
14246 target
= gen_reg_rtx (V4SFmode
);
14247 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
14250 case IX86_BUILTIN_MMX_ZERO
:
14251 target
= gen_reg_rtx (DImode
);
14252 emit_insn (gen_mmx_clrdi (target
));
14255 case IX86_BUILTIN_CLRTI
:
14256 target
= gen_reg_rtx (V2DImode
);
14257 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
14261 case IX86_BUILTIN_SQRTSD
:
14262 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
14263 case IX86_BUILTIN_LOADAPD
:
14264 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
14265 case IX86_BUILTIN_LOADUPD
:
14266 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14268 case IX86_BUILTIN_STOREAPD
:
14269 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14270 case IX86_BUILTIN_STOREUPD
:
14271 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14273 case IX86_BUILTIN_LOADSD
:
14274 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
14276 case IX86_BUILTIN_STORESD
:
14277 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
14279 case IX86_BUILTIN_SETPD1
:
14280 target
= assign_386_stack_local (DFmode
, 0);
14281 arg0
= TREE_VALUE (arglist
);
14282 emit_move_insn (adjust_address (target
, DFmode
, 0),
14283 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14284 op0
= gen_reg_rtx (V2DFmode
);
14285 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
14286 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
14289 case IX86_BUILTIN_SETPD
:
14290 target
= assign_386_stack_local (V2DFmode
, 0);
14291 arg0
= TREE_VALUE (arglist
);
14292 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14293 emit_move_insn (adjust_address (target
, DFmode
, 0),
14294 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14295 emit_move_insn (adjust_address (target
, DFmode
, 8),
14296 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
14297 op0
= gen_reg_rtx (V2DFmode
);
14298 emit_insn (gen_sse2_movapd (op0
, target
));
14301 case IX86_BUILTIN_LOADRPD
:
14302 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
14303 gen_reg_rtx (V2DFmode
), 1);
14304 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
14307 case IX86_BUILTIN_LOADPD1
:
14308 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
14309 gen_reg_rtx (V2DFmode
), 1);
14310 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
14313 case IX86_BUILTIN_STOREPD1
:
14314 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14315 case IX86_BUILTIN_STORERPD
:
14316 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14318 case IX86_BUILTIN_CLRPD
:
14319 target
= gen_reg_rtx (V2DFmode
);
14320 emit_insn (gen_sse_clrv2df (target
));
14323 case IX86_BUILTIN_MFENCE
:
14324 emit_insn (gen_sse2_mfence ());
14326 case IX86_BUILTIN_LFENCE
:
14327 emit_insn (gen_sse2_lfence ());
14330 case IX86_BUILTIN_CLFLUSH
:
14331 arg0
= TREE_VALUE (arglist
);
14332 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14333 icode
= CODE_FOR_sse2_clflush
;
14334 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
14335 op0
= copy_to_mode_reg (Pmode
, op0
);
14337 emit_insn (gen_sse2_clflush (op0
));
14340 case IX86_BUILTIN_MOVNTPD
:
14341 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
14342 case IX86_BUILTIN_MOVNTDQ
:
14343 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
14344 case IX86_BUILTIN_MOVNTI
:
14345 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
14347 case IX86_BUILTIN_LOADDQA
:
14348 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
14349 case IX86_BUILTIN_LOADDQU
:
14350 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
14351 case IX86_BUILTIN_LOADD
:
14352 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
14354 case IX86_BUILTIN_STOREDQA
:
14355 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
14356 case IX86_BUILTIN_STOREDQU
:
14357 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
14358 case IX86_BUILTIN_STORED
:
14359 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
14361 case IX86_BUILTIN_MONITOR
:
14362 arg0
= TREE_VALUE (arglist
);
14363 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14364 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
14365 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14366 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14367 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
14369 op0
= copy_to_mode_reg (SImode
, op0
);
14371 op1
= copy_to_mode_reg (SImode
, op1
);
14373 op2
= copy_to_mode_reg (SImode
, op2
);
14374 emit_insn (gen_monitor (op0
, op1
, op2
));
14377 case IX86_BUILTIN_MWAIT
:
14378 arg0
= TREE_VALUE (arglist
);
14379 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14380 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14381 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
14383 op0
= copy_to_mode_reg (SImode
, op0
);
14385 op1
= copy_to_mode_reg (SImode
, op1
);
14386 emit_insn (gen_mwait (op0
, op1
));
14389 case IX86_BUILTIN_LOADDDUP
:
14390 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
14392 case IX86_BUILTIN_LDDQU
:
14393 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
14400 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14401 if (d
->code
== fcode
)
14403 /* Compares are treated specially. */
14404 if (d
->icode
== CODE_FOR_maskcmpv4sf3
14405 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
14406 || d
->icode
== CODE_FOR_maskncmpv4sf3
14407 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
14408 || d
->icode
== CODE_FOR_maskcmpv2df3
14409 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
14410 || d
->icode
== CODE_FOR_maskncmpv2df3
14411 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
14412 return ix86_expand_sse_compare (d
, arglist
, target
);
14414 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14417 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14418 if (d
->code
== fcode
)
14419 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14421 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14422 if (d
->code
== fcode
)
14423 return ix86_expand_sse_comi (d
, arglist
, target
);
14425 /* @@@ Should really do something sensible here. */
14429 /* Store OPERAND to the memory after reload is completed. This means
14430 that we can't easily use assign_stack_local. */
14432 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
14435 if (!reload_completed
)
14437 if (TARGET_RED_ZONE
)
14439 result
= gen_rtx_MEM (mode
,
14440 gen_rtx_PLUS (Pmode
,
14442 GEN_INT (-RED_ZONE_SIZE
)));
14443 emit_move_insn (result
, operand
);
14445 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
14451 operand
= gen_lowpart (DImode
, operand
);
14455 gen_rtx_SET (VOIDmode
,
14456 gen_rtx_MEM (DImode
,
14457 gen_rtx_PRE_DEC (DImode
,
14458 stack_pointer_rtx
)),
14464 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14473 split_di (&operand
, 1, operands
, operands
+ 1);
14475 gen_rtx_SET (VOIDmode
,
14476 gen_rtx_MEM (SImode
,
14477 gen_rtx_PRE_DEC (Pmode
,
14478 stack_pointer_rtx
)),
14481 gen_rtx_SET (VOIDmode
,
14482 gen_rtx_MEM (SImode
,
14483 gen_rtx_PRE_DEC (Pmode
,
14484 stack_pointer_rtx
)),
14489 /* It is better to store HImodes as SImodes. */
14490 if (!TARGET_PARTIAL_REG_STALL
)
14491 operand
= gen_lowpart (SImode
, operand
);
14495 gen_rtx_SET (VOIDmode
,
14496 gen_rtx_MEM (GET_MODE (operand
),
14497 gen_rtx_PRE_DEC (SImode
,
14498 stack_pointer_rtx
)),
14504 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14509 /* Free operand from the memory. */
14511 ix86_free_from_memory (enum machine_mode mode
)
14513 if (!TARGET_RED_ZONE
)
14517 if (mode
== DImode
|| TARGET_64BIT
)
14519 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14523 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14524 to pop or add instruction if registers are available. */
14525 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14526 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14531 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14532 QImode must go into class Q_REGS.
14533 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14534 movdf to do mem-to-mem moves through integer regs. */
14536 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
14538 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14540 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14542 /* SSE can't load any constant directly yet. */
14543 if (SSE_CLASS_P (class))
14545 /* Floats can load 0 and 1. */
14546 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14548 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14549 if (MAYBE_SSE_CLASS_P (class))
14550 return (reg_class_subset_p (class, GENERAL_REGS
)
14551 ? GENERAL_REGS
: FLOAT_REGS
);
14555 /* General regs can load everything. */
14556 if (reg_class_subset_p (class, GENERAL_REGS
))
14557 return GENERAL_REGS
;
14558 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14559 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14562 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14564 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14569 /* If we are copying between general and FP registers, we need a memory
14570 location. The same is true for SSE and MMX registers.
14572 The macro can't work reliably when one of the CLASSES is class containing
14573 registers from multiple units (SSE, MMX, integer). We avoid this by never
14574 combining those units in single alternative in the machine description.
14575 Ensure that this constraint holds to avoid unexpected surprises.
14577 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14578 enforce these sanity checks. */
14580 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
14581 enum machine_mode mode
, int strict
)
14583 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14584 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14585 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14586 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14587 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14588 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14595 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14596 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14597 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
14598 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
14599 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
14601 /* Return the cost of moving data from a register in class CLASS1 to
14602 one in class CLASS2.
14604 It is not required that the cost always equal 2 when FROM is the same as TO;
14605 on some machines it is expensive to move between registers if they are not
14606 general registers. */
14608 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
14609 enum reg_class class2
)
14611 /* In case we require secondary memory, compute cost of the store followed
14612 by load. In order to avoid bad register allocation choices, we need
14613 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14615 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14619 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14620 MEMORY_MOVE_COST (mode
, class1
, 1));
14621 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14622 MEMORY_MOVE_COST (mode
, class2
, 1));
14624 /* In case of copying from general_purpose_register we may emit multiple
14625 stores followed by single load causing memory size mismatch stall.
14626 Count this as arbitrarily high cost of 20. */
14627 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14630 /* In the case of FP/MMX moves, the registers actually overlap, and we
14631 have to switch modes in order to treat them differently. */
14632 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14633 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14639 /* Moves between SSE/MMX and integer unit are expensive. */
14640 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14641 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14642 return ix86_cost
->mmxsse_to_integer
;
14643 if (MAYBE_FLOAT_CLASS_P (class1
))
14644 return ix86_cost
->fp_move
;
14645 if (MAYBE_SSE_CLASS_P (class1
))
14646 return ix86_cost
->sse_move
;
14647 if (MAYBE_MMX_CLASS_P (class1
))
14648 return ix86_cost
->mmx_move
;
14652 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14654 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
14656 /* Flags and only flags can only hold CCmode values. */
14657 if (CC_REGNO_P (regno
))
14658 return GET_MODE_CLASS (mode
) == MODE_CC
;
14659 if (GET_MODE_CLASS (mode
) == MODE_CC
14660 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14661 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14663 if (FP_REGNO_P (regno
))
14664 return VALID_FP_MODE_P (mode
);
14665 if (SSE_REGNO_P (regno
))
14666 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
14667 if (MMX_REGNO_P (regno
))
14669 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
14670 /* We handle both integer and floats in the general purpose registers.
14671 In future we should be able to handle vector modes as well. */
14672 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14674 /* Take care for QImode values - they can be in non-QI regs, but then
14675 they do cause partial register stalls. */
14676 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14678 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14681 /* Return the cost of moving data of mode M between a
14682 register and memory. A value of 2 is the default; this cost is
14683 relative to those in `REGISTER_MOVE_COST'.
14685 If moving between registers and memory is more expensive than
14686 between two registers, you should define this macro to express the
14689 Model also increased moving costs of QImode registers in non
14693 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
14695 if (FLOAT_CLASS_P (class))
14712 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14714 if (SSE_CLASS_P (class))
14717 switch (GET_MODE_SIZE (mode
))
14731 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14733 if (MMX_CLASS_P (class))
14736 switch (GET_MODE_SIZE (mode
))
14747 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14749 switch (GET_MODE_SIZE (mode
))
14753 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14754 : ix86_cost
->movzbl_load
);
14756 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14757 : ix86_cost
->int_store
[0] + 4);
14760 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14762 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14763 if (mode
== TFmode
)
14765 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14766 * (((int) GET_MODE_SIZE (mode
)
14767 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
14771 /* Compute a (partial) cost for rtx X. Return true if the complete
14772 cost has been computed, and false if subexpressions should be
14773 scanned. In either case, *TOTAL contains the cost result. */
14776 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
14778 enum machine_mode mode
= GET_MODE (x
);
14786 if (TARGET_64BIT
&& !x86_64_sign_extended_value (x
))
14788 else if (TARGET_64BIT
&& !x86_64_zero_extended_value (x
))
14790 else if (flag_pic
&& SYMBOLIC_CONST (x
)
14792 || (!GET_CODE (x
) != LABEL_REF
14793 && (GET_CODE (x
) != SYMBOL_REF
14794 || !SYMBOL_REF_LOCAL_P (x
)))))
14801 if (mode
== VOIDmode
)
14804 switch (standard_80387_constant_p (x
))
14809 default: /* Other constants */
14814 /* Start with (MEM (SYMBOL_REF)), since that's where
14815 it'll probably end up. Add a penalty for size. */
14816 *total
= (COSTS_N_INSNS (1)
14817 + (flag_pic
!= 0 && !TARGET_64BIT
)
14818 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
14824 /* The zero extensions is often completely free on x86_64, so make
14825 it as cheap as possible. */
14826 if (TARGET_64BIT
&& mode
== DImode
14827 && GET_MODE (XEXP (x
, 0)) == SImode
)
14829 else if (TARGET_ZERO_EXTEND_WITH_AND
)
14830 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14832 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
14836 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
14840 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
14841 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
14843 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14846 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14849 if ((value
== 2 || value
== 3)
14850 && !TARGET_DECOMPOSE_LEA
14851 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
14853 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14863 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
14865 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14867 if (INTVAL (XEXP (x
, 1)) > 32)
14868 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
14870 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
14874 if (GET_CODE (XEXP (x
, 1)) == AND
)
14875 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
14877 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
14882 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14883 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
14885 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
14890 if (FLOAT_MODE_P (mode
))
14892 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
14897 rtx op0
= XEXP (x
, 0);
14898 rtx op1
= XEXP (x
, 1);
14900 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14902 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14903 for (nbits
= 0; value
!= 0; value
&= value
- 1)
14907 /* This is arbitrary. */
14910 /* Compute costs correctly for widening multiplication. */
14911 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
14912 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
14913 == GET_MODE_SIZE (mode
))
14915 int is_mulwiden
= 0;
14916 enum machine_mode inner_mode
= GET_MODE (op0
);
14918 if (GET_CODE (op0
) == GET_CODE (op1
))
14919 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
14920 else if (GET_CODE (op1
) == CONST_INT
)
14922 if (GET_CODE (op0
) == SIGN_EXTEND
)
14923 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
14926 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
14930 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
14933 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14934 + nbits
* ix86_cost
->mult_bit
)
14935 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
14944 if (FLOAT_MODE_P (mode
))
14945 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
14947 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
14951 if (FLOAT_MODE_P (mode
))
14952 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14953 else if (!TARGET_DECOMPOSE_LEA
14954 && GET_MODE_CLASS (mode
) == MODE_INT
14955 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
14957 if (GET_CODE (XEXP (x
, 0)) == PLUS
14958 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
14959 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
14960 && CONSTANT_P (XEXP (x
, 1)))
14962 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
14963 if (val
== 2 || val
== 4 || val
== 8)
14965 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14966 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14967 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
14969 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14973 else if (GET_CODE (XEXP (x
, 0)) == MULT
14974 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
14976 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
14977 if (val
== 2 || val
== 4 || val
== 8)
14979 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14980 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14981 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14985 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14987 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14988 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14989 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14990 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14997 if (FLOAT_MODE_P (mode
))
14999 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
15007 if (!TARGET_64BIT
&& mode
== DImode
)
15009 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
15010 + (rtx_cost (XEXP (x
, 0), outer_code
)
15011 << (GET_MODE (XEXP (x
, 0)) != DImode
))
15012 + (rtx_cost (XEXP (x
, 1), outer_code
)
15013 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
15019 if (FLOAT_MODE_P (mode
))
15021 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
15027 if (!TARGET_64BIT
&& mode
== DImode
)
15028 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
15030 *total
= COSTS_N_INSNS (ix86_cost
->add
);
15034 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
15039 if (FLOAT_MODE_P (mode
))
15040 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
15044 if (FLOAT_MODE_P (mode
))
15045 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
15049 if (XINT (x
, 1) == UNSPEC_TP
)
15058 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15060 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
15063 fputs ("\tpushl $", asm_out_file
);
15064 assemble_name (asm_out_file
, XSTR (symbol
, 0));
15065 fputc ('\n', asm_out_file
);
15071 static int current_machopic_label_num
;
15073 /* Given a symbol name and its associated stub, write out the
15074 definition of the stub. */
15077 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
15079 unsigned int length
;
15080 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
15081 int label
= ++current_machopic_label_num
;
15083 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15084 symb
= (*targetm
.strip_name_encoding
) (symb
);
15086 length
= strlen (stub
);
15087 binder_name
= alloca (length
+ 32);
15088 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
15090 length
= strlen (symb
);
15091 symbol_name
= alloca (length
+ 32);
15092 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
15094 sprintf (lazy_ptr_name
, "L%d$lz", label
);
15097 machopic_picsymbol_stub_section ();
15099 machopic_symbol_stub_section ();
15101 fprintf (file
, "%s:\n", stub
);
15102 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15106 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
15107 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
15108 fprintf (file
, "\tjmp %%edx\n");
15111 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
15113 fprintf (file
, "%s:\n", binder_name
);
15117 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
15118 fprintf (file
, "\tpushl %%eax\n");
15121 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
15123 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
15125 machopic_lazy_symbol_ptr_section ();
15126 fprintf (file
, "%s:\n", lazy_ptr_name
);
15127 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
15128 fprintf (file
, "\t.long %s\n", binder_name
);
15130 #endif /* TARGET_MACHO */
15132 /* Order the registers for register allocator. */
15135 x86_order_regs_for_local_alloc (void)
15140 /* First allocate the local general purpose registers. */
15141 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15142 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
15143 reg_alloc_order
[pos
++] = i
;
15145 /* Global general purpose registers. */
15146 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
15147 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
15148 reg_alloc_order
[pos
++] = i
;
15150 /* x87 registers come first in case we are doing FP math
15152 if (!TARGET_SSE_MATH
)
15153 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15154 reg_alloc_order
[pos
++] = i
;
15156 /* SSE registers. */
15157 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15158 reg_alloc_order
[pos
++] = i
;
15159 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15160 reg_alloc_order
[pos
++] = i
;
15162 /* x87 registers. */
15163 if (TARGET_SSE_MATH
)
15164 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
15165 reg_alloc_order
[pos
++] = i
;
15167 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
15168 reg_alloc_order
[pos
++] = i
;
15170 /* Initialize the rest of array as we do not allocate some registers
15172 while (pos
< FIRST_PSEUDO_REGISTER
)
15173 reg_alloc_order
[pos
++] = 0;
15176 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15177 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15180 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15181 struct attribute_spec.handler. */
15183 ix86_handle_struct_attribute (tree
*node
, tree name
,
15184 tree args ATTRIBUTE_UNUSED
,
15185 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
15188 if (DECL_P (*node
))
15190 if (TREE_CODE (*node
) == TYPE_DECL
)
15191 type
= &TREE_TYPE (*node
);
15196 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
15197 || TREE_CODE (*type
) == UNION_TYPE
)))
15199 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
15200 *no_add_attrs
= true;
15203 else if ((is_attribute_p ("ms_struct", name
)
15204 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
15205 || ((is_attribute_p ("gcc_struct", name
)
15206 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
15208 warning ("`%s' incompatible attribute ignored",
15209 IDENTIFIER_POINTER (name
));
15210 *no_add_attrs
= true;
15217 ix86_ms_bitfield_layout_p (tree record_type
)
15219 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
15220 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
15221 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
15224 /* Returns an expression indicating where the this parameter is
15225 located on entry to the FUNCTION. */
15228 x86_this_parameter (tree function
)
15230 tree type
= TREE_TYPE (function
);
15234 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
15235 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
15238 if (ix86_function_regparm (type
, function
) > 0)
15242 parm
= TYPE_ARG_TYPES (type
);
15243 /* Figure out whether or not the function has a variable number of
15245 for (; parm
; parm
= TREE_CHAIN (parm
))
15246 if (TREE_VALUE (parm
) == void_type_node
)
15248 /* If not, the this parameter is in the first argument. */
15252 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
15254 return gen_rtx_REG (SImode
, regno
);
15258 if (aggregate_value_p (TREE_TYPE (type
), type
))
15259 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
15261 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
15264 /* Determine whether x86_output_mi_thunk can succeed. */
15267 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
15268 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
15269 HOST_WIDE_INT vcall_offset
, tree function
)
15271 /* 64-bit can handle anything. */
15275 /* For 32-bit, everything's fine if we have one free register. */
15276 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
15279 /* Need a free register for vcall_offset. */
15283 /* Need a free register for GOT references. */
15284 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
15287 /* Otherwise ok. */
15291 /* Output the assembler code for a thunk function. THUNK_DECL is the
15292 declaration for the thunk function itself, FUNCTION is the decl for
15293 the target function. DELTA is an immediate constant offset to be
15294 added to THIS. If VCALL_OFFSET is nonzero, the word at
15295 *(*this + vcall_offset) should be added to THIS. */
15298 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
15299 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
15300 HOST_WIDE_INT vcall_offset
, tree function
)
15303 rtx
this = x86_this_parameter (function
);
15306 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15307 pull it in now and let DELTA benefit. */
15310 else if (vcall_offset
)
15312 /* Put the this parameter into %eax. */
15314 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
15315 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15318 this_reg
= NULL_RTX
;
15320 /* Adjust the this parameter by a fixed constant. */
15323 xops
[0] = GEN_INT (delta
);
15324 xops
[1] = this_reg
? this_reg
: this;
15327 if (!x86_64_general_operand (xops
[0], DImode
))
15329 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15331 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
15335 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15338 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15341 /* Adjust the this parameter by a value stored in the vtable. */
15345 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15348 int tmp_regno
= 2 /* ECX */;
15349 if (lookup_attribute ("fastcall",
15350 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
15351 tmp_regno
= 0 /* EAX */;
15352 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
15355 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
15358 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15360 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15362 /* Adjust the this parameter. */
15363 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
15364 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
15366 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
15367 xops
[0] = GEN_INT (vcall_offset
);
15369 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15370 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
15372 xops
[1] = this_reg
;
15374 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15376 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15379 /* If necessary, drop THIS back to its stack slot. */
15380 if (this_reg
&& this_reg
!= this)
15382 xops
[0] = this_reg
;
15384 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15387 xops
[0] = XEXP (DECL_RTL (function
), 0);
15390 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15391 output_asm_insn ("jmp\t%P0", xops
);
15394 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
15395 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15396 tmp
= gen_rtx_MEM (QImode
, tmp
);
15398 output_asm_insn ("jmp\t%A0", xops
);
15403 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15404 output_asm_insn ("jmp\t%P0", xops
);
15409 const char *ip
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function
));
15410 tmp
= gen_rtx_SYMBOL_REF (Pmode
, machopic_stub_name (ip
));
15411 tmp
= gen_rtx_MEM (QImode
, tmp
);
15413 output_asm_insn ("jmp\t%0", xops
);
15416 #endif /* TARGET_MACHO */
15418 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15419 output_set_got (tmp
);
15422 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15423 output_asm_insn ("jmp\t{*}%1", xops
);
15429 x86_file_start (void)
15431 default_file_start ();
15432 if (X86_FILE_START_VERSION_DIRECTIVE
)
15433 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
15434 if (X86_FILE_START_FLTUSED
)
15435 fputs ("\t.global\t__fltused\n", asm_out_file
);
15436 if (ix86_asm_dialect
== ASM_INTEL
)
15437 fputs ("\t.intel_syntax\n", asm_out_file
);
15441 x86_field_alignment (tree field
, int computed
)
15443 enum machine_mode mode
;
15444 tree type
= TREE_TYPE (field
);
15446 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
15448 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
15449 ? get_inner_array_type (type
) : type
);
15450 if (mode
== DFmode
|| mode
== DCmode
15451 || GET_MODE_CLASS (mode
) == MODE_INT
15452 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
15453 return MIN (32, computed
);
15457 /* Output assembler code to FILE to increment profiler label # LABELNO
15458 for profiling a function entry. */
15460 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
15465 #ifndef NO_PROFILE_COUNTERS
15466 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
15468 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
15472 #ifndef NO_PROFILE_COUNTERS
15473 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
15475 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15479 #ifndef NO_PROFILE_COUNTERS
15480 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15481 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
15483 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
15487 #ifndef NO_PROFILE_COUNTERS
15488 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
15489 PROFILE_COUNT_REGISTER
);
15491 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15495 /* We don't have exact information about the insn sizes, but we may assume
15496 quite safely that we are informed about all 1 byte insns and memory
15497 address sizes. This is enough to eliminate unnecessary padding in
15501 min_insn_size (rtx insn
)
15505 if (!INSN_P (insn
) || !active_insn_p (insn
))
15508 /* Discard alignments we've emit and jump instructions. */
15509 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
15510 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
15512 if (GET_CODE (insn
) == JUMP_INSN
15513 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
15514 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
15517 /* Important case - calls are always 5 bytes.
15518 It is common to have many calls in the row. */
15519 if (GET_CODE (insn
) == CALL_INSN
15520 && symbolic_reference_mentioned_p (PATTERN (insn
))
15521 && !SIBLING_CALL_P (insn
))
15523 if (get_attr_length (insn
) <= 1)
15526 /* For normal instructions we may rely on the sizes of addresses
15527 and the presence of symbol to require 4 bytes of encoding.
15528 This is not the case for jumps where references are PC relative. */
15529 if (GET_CODE (insn
) != JUMP_INSN
)
15531 l
= get_attr_length_address (insn
);
15532 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
15541 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15545 ix86_avoid_jump_misspredicts (void)
15547 rtx insn
, start
= get_insns ();
15548 int nbytes
= 0, njumps
= 0;
15551 /* Look for all minimal intervals of instructions containing 4 jumps.
15552 The intervals are bounded by START and INSN. NBYTES is the total
15553 size of instructions in the interval including INSN and not including
15554 START. When the NBYTES is smaller than 16 bytes, it is possible
15555 that the end of START and INSN ends up in the same 16byte page.
15557 The smallest offset in the page INSN can start is the case where START
15558 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15559 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15561 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15564 nbytes
+= min_insn_size (insn
);
15566 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
15567 INSN_UID (insn
), min_insn_size (insn
));
15568 if ((GET_CODE (insn
) == JUMP_INSN
15569 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
15570 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
15571 || GET_CODE (insn
) == CALL_INSN
)
15578 start
= NEXT_INSN (start
);
15579 if ((GET_CODE (start
) == JUMP_INSN
15580 && GET_CODE (PATTERN (start
)) != ADDR_VEC
15581 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
15582 || GET_CODE (start
) == CALL_INSN
)
15583 njumps
--, isjump
= 1;
15586 nbytes
-= min_insn_size (start
);
15591 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
15592 INSN_UID (start
), INSN_UID (insn
), nbytes
);
15594 if (njumps
== 3 && isjump
&& nbytes
< 16)
15596 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
15599 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
15600 INSN_UID (insn
), padsize
);
15601 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
15606 /* AMD Athlon works faster
15607 when RET is not destination of conditional jump or directly preceded
15608 by other jump instruction. We avoid the penalty by inserting NOP just
15609 before the RET instructions in such cases. */
15611 ix86_pad_returns (void)
15615 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
15617 basic_block bb
= e
->src
;
15618 rtx ret
= BB_END (bb
);
15620 bool replace
= false;
15622 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
15623 || !maybe_hot_bb_p (bb
))
15625 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
15626 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
15628 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
15631 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
15632 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
15633 && !(e
->flags
& EDGE_FALLTHRU
))
15638 prev
= prev_active_insn (ret
);
15640 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
15641 || GET_CODE (prev
) == CALL_INSN
))
15643 /* Empty functions get branch mispredict even when the jump destination
15644 is not visible to us. */
15645 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
15650 emit_insn_before (gen_return_internal_long (), ret
);
15656 /* Implement machine specific optimizations. We implement padding of returns
15657 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15661 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
15662 ix86_pad_returns ();
15663 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
15664 ix86_avoid_jump_misspredicts ();
15667 /* Return nonzero when QImode register that must be represented via REX prefix
15670 x86_extended_QIreg_mentioned_p (rtx insn
)
15673 extract_insn_cached (insn
);
15674 for (i
= 0; i
< recog_data
.n_operands
; i
++)
15675 if (REG_P (recog_data
.operand
[i
])
15676 && REGNO (recog_data
.operand
[i
]) >= 4)
15681 /* Return nonzero when P points to register encoded via REX prefix.
15682 Called via for_each_rtx. */
15684 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
15686 unsigned int regno
;
15689 regno
= REGNO (*p
);
15690 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
15693 /* Return true when INSN mentions register that must be encoded using REX
15696 x86_extended_reg_mentioned_p (rtx insn
)
15698 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
15701 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15702 optabs would emit if we didn't have TFmode patterns. */
15705 x86_emit_floatuns (rtx operands
[2])
15707 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
15708 enum machine_mode mode
, inmode
;
15710 inmode
= GET_MODE (operands
[1]);
15711 if (inmode
!= SImode
15712 && inmode
!= DImode
)
15716 in
= force_reg (inmode
, operands
[1]);
15717 mode
= GET_MODE (out
);
15718 neglab
= gen_label_rtx ();
15719 donelab
= gen_label_rtx ();
15720 i1
= gen_reg_rtx (Pmode
);
15721 f0
= gen_reg_rtx (mode
);
15723 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
15725 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
15726 emit_jump_insn (gen_jump (donelab
));
15729 emit_label (neglab
);
15731 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15732 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15733 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
15734 expand_float (f0
, i0
, 0);
15735 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
15737 emit_label (donelab
);
15740 /* Return if we do not know how to pass TYPE solely in registers. */
15742 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
15744 if (default_must_pass_in_stack (mode
, type
))
15746 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
15749 /* Initialize vector TARGET via VALS. */
15751 ix86_expand_vector_init (rtx target
, rtx vals
)
15753 enum machine_mode mode
= GET_MODE (target
);
15754 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15755 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
15758 for (i
= n_elts
- 1; i
>= 0; i
--)
15759 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
15760 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
15763 /* Few special cases first...
15764 ... constants are best loaded from constant pool. */
15767 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15771 /* ... values where only first field is non-constant are best loaded
15772 from the pool and overwriten via move later. */
15775 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
15776 GET_MODE_INNER (mode
), 0);
15778 op
= force_reg (mode
, op
);
15779 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
15780 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15781 switch (GET_MODE (target
))
15784 emit_insn (gen_sse2_movsd (target
, target
, op
));
15787 emit_insn (gen_sse_movss (target
, target
, op
));
15795 /* And the busy sequence doing rotations. */
15796 switch (GET_MODE (target
))
15801 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
15803 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
15805 vecop0
= force_reg (V2DFmode
, vecop0
);
15806 vecop1
= force_reg (V2DFmode
, vecop1
);
15807 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
15813 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
15815 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
15817 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
15819 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
15820 rtx tmp1
= gen_reg_rtx (V4SFmode
);
15821 rtx tmp2
= gen_reg_rtx (V4SFmode
);
15823 vecop0
= force_reg (V4SFmode
, vecop0
);
15824 vecop1
= force_reg (V4SFmode
, vecop1
);
15825 vecop2
= force_reg (V4SFmode
, vecop2
);
15826 vecop3
= force_reg (V4SFmode
, vecop3
);
15827 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
15828 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
15829 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
15837 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15839 We do this in the new i386 backend to maintain source compatibility
15840 with the old cc0-based compiler. */
15843 ix86_md_asm_clobbers (tree clobbers
)
15845 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
15847 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
15849 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
15854 /* Worker function for REVERSE_CONDITION. */
15857 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
15859 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
15860 ? reverse_condition (code
)
15861 : reverse_condition_maybe_unordered (code
));
15864 #include "gt-i386.h"