This commit was manufactured by cvs2svn to create branch
[official-gcc.git] / gcc / config / i386 / i386.c
blob15bfee88511369bc408dc21c728e3fcacd97cc92
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
98 1, /* Branch cost */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
120 3, /* MOVE_RATIO */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
143 1, /* Branch cost */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
152 static const
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
164 3, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
187 1, /* Branch cost */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
196 static const
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
208 6, /* MOVE_RATIO */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
231 2, /* Branch cost */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
240 static const
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
275 2, /* Branch cost */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
284 static const
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
296 4, /* MOVE_RATIO */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
319 1, /* Branch cost */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
328 static const
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
340 9, /* MOVE_RATIO */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
363 2, /* Branch cost */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
372 static const
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
416 static const
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
543 /* ax, dx, cx, bx */
544 AREG, DREG, CREG, BREG,
545 /* si, di, bp, sp */
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
550 /* arg pointer */
551 NON_Q_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
688 [arguments]
689 <- ARG_POINTER
690 saved pc
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
696 [padding1] \
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
702 [padding2] /
704 struct ix86_frame
706 int nregs;
707 int padding1;
708 int va_arg_size;
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
712 int red_zone_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
729 /* Parsed value. */
730 enum cmodel ix86_cmodel;
731 /* Asm dialect. */
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
734 /* TLS dialext. */
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
758 int ix86_regparm;
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
789 int, int, FILE *);
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 rtx *);
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
798 enum machine_mode);
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_sched_reorder_ppro (rtx *, rtx *);
817 static HOST_WIDE_INT ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
819 static rtx ix86_expand_aligntest (rtx, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx, rtx, rtx, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx x86_this_parameter (tree);
830 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree);
832 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
836 static tree ix86_build_builtin_va_list (void);
838 struct ix86_address
840 rtx base, index, disp;
841 HOST_WIDE_INT scale;
842 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
845 static int ix86_decompose_address (rtx, struct ix86_address *);
846 static int ix86_address_cost (rtx);
847 static bool ix86_cannot_force_const_mem (rtx);
848 static rtx ix86_delegitimize_address (rtx);
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi (const struct builtin_description *,
852 tree, rtx);
853 static rtx ix86_expand_sse_compare (const struct builtin_description *,
854 tree, rtx);
855 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
857 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
858 static rtx ix86_expand_store_builtin (enum insn_code, tree);
859 static rtx safe_vector_operand (rtx, enum machine_mode);
860 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
861 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
862 enum rtx_code *, enum rtx_code *);
863 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
864 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
865 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
866 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
867 static int ix86_fp_comparison_cost (enum rtx_code code);
868 static unsigned int ix86_select_alt_pic_regnum (void);
869 static int ix86_save_reg (unsigned int, int);
870 static void ix86_compute_frame_layout (struct ix86_frame *);
871 static int ix86_comp_type_attributes (tree, tree);
872 static int ix86_function_regparm (tree, tree);
873 const struct attribute_spec ix86_attribute_table[];
874 static bool ix86_function_ok_for_sibcall (tree, tree);
875 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
876 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
877 static int ix86_value_regno (enum machine_mode);
878 static bool contains_128bit_aligned_vector_p (tree);
879 static bool ix86_ms_bitfield_layout_p (tree);
880 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
881 static int extended_reg_mentioned_1 (rtx *, void *);
882 static bool ix86_rtx_costs (rtx, int, int, int *);
883 static int min_insn_size (rtx);
884 static void k8_avoid_jump_misspredicts (void);
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor (rtx, int);
888 #endif
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
900 X86_64_NO_CLASS,
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
903 X86_64_SSE_CLASS,
904 X86_64_SSESF_CLASS,
905 X86_64_SSEDF_CLASS,
906 X86_64_SSEUP_CLASS,
907 X86_64_X87_CLASS,
908 X86_64_X87UP_CLASS,
909 X86_64_MEMORY_CLASS
911 static const char * const x86_64_reg_class_name[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument (enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES], int);
917 static int examine_argument (enum machine_mode, tree, int, int *, int *);
918 static rtx construct_container (enum machine_mode, tree, int, int, int,
919 const int *, int);
920 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
921 enum x86_64_reg_class);
923 /* Table of constants used by fldpi, fldln2, etc.... */
924 static REAL_VALUE_TYPE ext_80387_constants_table [5];
925 static bool ext_80387_constants_init = 0;
926 static void init_ext_80387_constants (void);
928 /* Initialize the GCC target structure. */
929 #undef TARGET_ATTRIBUTE_TABLE
930 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
931 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
932 # undef TARGET_MERGE_DECL_ATTRIBUTES
933 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #endif
936 #undef TARGET_COMP_TYPE_ATTRIBUTES
937 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
939 #undef TARGET_INIT_BUILTINS
940 #define TARGET_INIT_BUILTINS ix86_init_builtins
942 #undef TARGET_EXPAND_BUILTIN
943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
945 #undef TARGET_ASM_FUNCTION_EPILOGUE
946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
948 #undef TARGET_ASM_OPEN_PAREN
949 #define TARGET_ASM_OPEN_PAREN ""
950 #undef TARGET_ASM_CLOSE_PAREN
951 #define TARGET_ASM_CLOSE_PAREN ""
953 #undef TARGET_ASM_ALIGNED_HI_OP
954 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
955 #undef TARGET_ASM_ALIGNED_SI_OP
956 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
957 #ifdef ASM_QUAD
958 #undef TARGET_ASM_ALIGNED_DI_OP
959 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #endif
962 #undef TARGET_ASM_UNALIGNED_HI_OP
963 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
964 #undef TARGET_ASM_UNALIGNED_SI_OP
965 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
966 #undef TARGET_ASM_UNALIGNED_DI_OP
967 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
969 #undef TARGET_SCHED_ADJUST_COST
970 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
971 #undef TARGET_SCHED_ISSUE_RATE
972 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
973 #undef TARGET_SCHED_VARIABLE_ISSUE
974 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
975 #undef TARGET_SCHED_INIT
976 #define TARGET_SCHED_INIT ix86_sched_init
977 #undef TARGET_SCHED_REORDER
978 #define TARGET_SCHED_REORDER ix86_sched_reorder
979 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
980 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
981 ia32_use_dfa_pipeline_interface
982 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
983 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
984 ia32_multipass_dfa_lookahead
986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
987 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
989 #ifdef HAVE_AS_TLS
990 #undef TARGET_HAVE_TLS
991 #define TARGET_HAVE_TLS true
992 #endif
993 #undef TARGET_CANNOT_FORCE_CONST_MEM
994 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
996 #undef TARGET_DELEGITIMIZE_ADDRESS
997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
999 #undef TARGET_MS_BITFIELD_LAYOUT_P
1000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1002 #undef TARGET_ASM_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1004 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1007 #undef TARGET_ASM_FILE_START
1008 #define TARGET_ASM_FILE_START x86_file_start
1010 #undef TARGET_RTX_COSTS
1011 #define TARGET_RTX_COSTS ix86_rtx_costs
1012 #undef TARGET_ADDRESS_COST
1013 #define TARGET_ADDRESS_COST ix86_address_cost
1015 #undef TARGET_FIXED_CONDITION_CODE_REGS
1016 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1017 #undef TARGET_CC_MODES_COMPATIBLE
1018 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1020 #undef TARGET_MACHINE_DEPENDENT_REORG
1021 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1023 #undef TARGET_BUILD_BUILTIN_VA_LIST
1024 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1026 struct gcc_target targetm = TARGET_INITIALIZER;
1028 /* The svr4 ABI for the i386 says that records and unions are returned
1029 in memory. */
1030 #ifndef DEFAULT_PCC_STRUCT_RETURN
1031 #define DEFAULT_PCC_STRUCT_RETURN 1
1032 #endif
1034 /* Sometimes certain combinations of command options do not make
1035 sense on a particular target machine. You can define a macro
1036 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1037 defined, is executed once just after all the command options have
1038 been parsed.
1040 Don't use this macro to turn on various extra optimizations for
1041 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1043 void
1044 override_options (void)
1046 int i;
1047 /* Comes from final.c -- no real reason to change it. */
1048 #define MAX_CODE_ALIGN 16
1050 static struct ptt
1052 const struct processor_costs *cost; /* Processor costs */
1053 const int target_enable; /* Target flags to enable. */
1054 const int target_disable; /* Target flags to disable. */
1055 const int align_loop; /* Default alignments. */
1056 const int align_loop_max_skip;
1057 const int align_jump;
1058 const int align_jump_max_skip;
1059 const int align_func;
1061 const processor_target_table[PROCESSOR_max] =
1063 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1064 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1065 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1066 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1067 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1068 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1069 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1070 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1073 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1074 static struct pta
1076 const char *const name; /* processor name or nickname. */
1077 const enum processor_type processor;
1078 const enum pta_flags
1080 PTA_SSE = 1,
1081 PTA_SSE2 = 2,
1082 PTA_MMX = 4,
1083 PTA_PREFETCH_SSE = 8,
1084 PTA_3DNOW = 16,
1085 PTA_3DNOW_A = 64,
1086 PTA_64BIT = 128
1087 } flags;
1089 const processor_alias_table[] =
1091 {"i386", PROCESSOR_I386, 0},
1092 {"i486", PROCESSOR_I486, 0},
1093 {"i586", PROCESSOR_PENTIUM, 0},
1094 {"pentium", PROCESSOR_PENTIUM, 0},
1095 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1096 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1097 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1098 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1099 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1100 {"i686", PROCESSOR_PENTIUMPRO, 0},
1101 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1102 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1103 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1104 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1105 PTA_MMX | PTA_PREFETCH_SSE},
1106 {"k6", PROCESSOR_K6, PTA_MMX},
1107 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1108 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1109 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1110 | PTA_3DNOW_A},
1111 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1112 | PTA_3DNOW | PTA_3DNOW_A},
1113 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1114 | PTA_3DNOW_A | PTA_SSE},
1115 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1116 | PTA_3DNOW_A | PTA_SSE},
1117 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1118 | PTA_3DNOW_A | PTA_SSE},
1119 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1120 | PTA_SSE | PTA_SSE2 },
1121 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1122 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1123 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1124 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1125 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1126 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1127 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1128 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1131 int const pta_size = ARRAY_SIZE (processor_alias_table);
1133 /* Set the default values for switches whose default depends on TARGET_64BIT
1134 in case they weren't overwritten by command line options. */
1135 if (TARGET_64BIT)
1137 if (flag_omit_frame_pointer == 2)
1138 flag_omit_frame_pointer = 1;
1139 if (flag_asynchronous_unwind_tables == 2)
1140 flag_asynchronous_unwind_tables = 1;
1141 if (flag_pcc_struct_return == 2)
1142 flag_pcc_struct_return = 0;
1144 else
1146 if (flag_omit_frame_pointer == 2)
1147 flag_omit_frame_pointer = 0;
1148 if (flag_asynchronous_unwind_tables == 2)
1149 flag_asynchronous_unwind_tables = 0;
1150 if (flag_pcc_struct_return == 2)
1151 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1154 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1155 SUBTARGET_OVERRIDE_OPTIONS;
1156 #endif
1158 if (!ix86_tune_string && ix86_arch_string)
1159 ix86_tune_string = ix86_arch_string;
1160 if (!ix86_tune_string)
1161 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1162 if (!ix86_arch_string)
1163 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1165 if (ix86_cmodel_string != 0)
1167 if (!strcmp (ix86_cmodel_string, "small"))
1168 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1169 else if (flag_pic)
1170 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1171 else if (!strcmp (ix86_cmodel_string, "32"))
1172 ix86_cmodel = CM_32;
1173 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1174 ix86_cmodel = CM_KERNEL;
1175 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1176 ix86_cmodel = CM_MEDIUM;
1177 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1178 ix86_cmodel = CM_LARGE;
1179 else
1180 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1182 else
1184 ix86_cmodel = CM_32;
1185 if (TARGET_64BIT)
1186 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1188 if (ix86_asm_string != 0)
1190 if (!strcmp (ix86_asm_string, "intel"))
1191 ix86_asm_dialect = ASM_INTEL;
1192 else if (!strcmp (ix86_asm_string, "att"))
1193 ix86_asm_dialect = ASM_ATT;
1194 else
1195 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1197 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1198 error ("code model `%s' not supported in the %s bit mode",
1199 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1200 if (ix86_cmodel == CM_LARGE)
1201 sorry ("code model `large' not supported yet");
1202 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1203 sorry ("%i-bit mode not compiled in",
1204 (target_flags & MASK_64BIT) ? 64 : 32);
1206 for (i = 0; i < pta_size; i++)
1207 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1209 ix86_arch = processor_alias_table[i].processor;
1210 /* Default cpu tuning to the architecture. */
1211 ix86_tune = ix86_arch;
1212 if (processor_alias_table[i].flags & PTA_MMX
1213 && !(target_flags_explicit & MASK_MMX))
1214 target_flags |= MASK_MMX;
1215 if (processor_alias_table[i].flags & PTA_3DNOW
1216 && !(target_flags_explicit & MASK_3DNOW))
1217 target_flags |= MASK_3DNOW;
1218 if (processor_alias_table[i].flags & PTA_3DNOW_A
1219 && !(target_flags_explicit & MASK_3DNOW_A))
1220 target_flags |= MASK_3DNOW_A;
1221 if (processor_alias_table[i].flags & PTA_SSE
1222 && !(target_flags_explicit & MASK_SSE))
1223 target_flags |= MASK_SSE;
1224 if (processor_alias_table[i].flags & PTA_SSE2
1225 && !(target_flags_explicit & MASK_SSE2))
1226 target_flags |= MASK_SSE2;
1227 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1228 x86_prefetch_sse = true;
1229 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1230 error ("CPU you selected does not support x86-64 instruction set");
1231 break;
1234 if (i == pta_size)
1235 error ("bad value (%s) for -march= switch", ix86_arch_string);
1237 for (i = 0; i < pta_size; i++)
1238 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1240 ix86_tune = processor_alias_table[i].processor;
1241 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1242 error ("CPU you selected does not support x86-64 instruction set");
1243 break;
1245 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1246 x86_prefetch_sse = true;
1247 if (i == pta_size)
1248 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1250 if (optimize_size)
1251 ix86_cost = &size_cost;
1252 else
1253 ix86_cost = processor_target_table[ix86_tune].cost;
1254 target_flags |= processor_target_table[ix86_tune].target_enable;
1255 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1257 /* Arrange to set up i386_stack_locals for all functions. */
1258 init_machine_status = ix86_init_machine_status;
1260 /* Validate -mregparm= value. */
1261 if (ix86_regparm_string)
1263 i = atoi (ix86_regparm_string);
1264 if (i < 0 || i > REGPARM_MAX)
1265 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1266 else
1267 ix86_regparm = i;
1269 else
1270 if (TARGET_64BIT)
1271 ix86_regparm = REGPARM_MAX;
1273 /* If the user has provided any of the -malign-* options,
1274 warn and use that value only if -falign-* is not set.
1275 Remove this code in GCC 3.2 or later. */
1276 if (ix86_align_loops_string)
1278 warning ("-malign-loops is obsolete, use -falign-loops");
1279 if (align_loops == 0)
1281 i = atoi (ix86_align_loops_string);
1282 if (i < 0 || i > MAX_CODE_ALIGN)
1283 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1284 else
1285 align_loops = 1 << i;
1289 if (ix86_align_jumps_string)
1291 warning ("-malign-jumps is obsolete, use -falign-jumps");
1292 if (align_jumps == 0)
1294 i = atoi (ix86_align_jumps_string);
1295 if (i < 0 || i > MAX_CODE_ALIGN)
1296 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1297 else
1298 align_jumps = 1 << i;
1302 if (ix86_align_funcs_string)
1304 warning ("-malign-functions is obsolete, use -falign-functions");
1305 if (align_functions == 0)
1307 i = atoi (ix86_align_funcs_string);
1308 if (i < 0 || i > MAX_CODE_ALIGN)
1309 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1310 else
1311 align_functions = 1 << i;
1315 /* Default align_* from the processor table. */
1316 if (align_loops == 0)
1318 align_loops = processor_target_table[ix86_tune].align_loop;
1319 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1321 if (align_jumps == 0)
1323 align_jumps = processor_target_table[ix86_tune].align_jump;
1324 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1326 if (align_functions == 0)
1328 align_functions = processor_target_table[ix86_tune].align_func;
1331 /* Validate -mpreferred-stack-boundary= value, or provide default.
1332 The default of 128 bits is for Pentium III's SSE __m128, but we
1333 don't want additional code to keep the stack aligned when
1334 optimizing for code size. */
1335 ix86_preferred_stack_boundary = (optimize_size
1336 ? TARGET_64BIT ? 128 : 32
1337 : 128);
1338 if (ix86_preferred_stack_boundary_string)
1340 i = atoi (ix86_preferred_stack_boundary_string);
1341 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1342 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1343 TARGET_64BIT ? 4 : 2);
1344 else
1345 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1348 /* Validate -mbranch-cost= value, or provide default. */
1349 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1350 if (ix86_branch_cost_string)
1352 i = atoi (ix86_branch_cost_string);
1353 if (i < 0 || i > 5)
1354 error ("-mbranch-cost=%d is not between 0 and 5", i);
1355 else
1356 ix86_branch_cost = i;
1359 if (ix86_tls_dialect_string)
1361 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1362 ix86_tls_dialect = TLS_DIALECT_GNU;
1363 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1364 ix86_tls_dialect = TLS_DIALECT_SUN;
1365 else
1366 error ("bad value (%s) for -mtls-dialect= switch",
1367 ix86_tls_dialect_string);
1370 /* Keep nonleaf frame pointers. */
1371 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1372 flag_omit_frame_pointer = 1;
1374 /* If we're doing fast math, we don't care about comparison order
1375 wrt NaNs. This lets us use a shorter comparison sequence. */
1376 if (flag_unsafe_math_optimizations)
1377 target_flags &= ~MASK_IEEE_FP;
1379 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1380 since the insns won't need emulation. */
1381 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1382 target_flags &= ~MASK_NO_FANCY_MATH_387;
1384 /* Turn on SSE2 builtins for -msse3. */
1385 if (TARGET_SSE3)
1386 target_flags |= MASK_SSE2;
1388 /* Turn on SSE builtins for -msse2. */
1389 if (TARGET_SSE2)
1390 target_flags |= MASK_SSE;
1392 if (TARGET_64BIT)
1394 if (TARGET_ALIGN_DOUBLE)
1395 error ("-malign-double makes no sense in the 64bit mode");
1396 if (TARGET_RTD)
1397 error ("-mrtd calling convention not supported in the 64bit mode");
1398 /* Enable by default the SSE and MMX builtins. */
1399 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1400 ix86_fpmath = FPMATH_SSE;
1402 else
1404 ix86_fpmath = FPMATH_387;
1405 /* i386 ABI does not specify red zone. It still makes sense to use it
1406 when programmer takes care to stack from being destroyed. */
1407 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1408 target_flags |= MASK_NO_RED_ZONE;
1411 if (ix86_fpmath_string != 0)
1413 if (! strcmp (ix86_fpmath_string, "387"))
1414 ix86_fpmath = FPMATH_387;
1415 else if (! strcmp (ix86_fpmath_string, "sse"))
1417 if (!TARGET_SSE)
1419 warning ("SSE instruction set disabled, using 387 arithmetics");
1420 ix86_fpmath = FPMATH_387;
1422 else
1423 ix86_fpmath = FPMATH_SSE;
1425 else if (! strcmp (ix86_fpmath_string, "387,sse")
1426 || ! strcmp (ix86_fpmath_string, "sse,387"))
1428 if (!TARGET_SSE)
1430 warning ("SSE instruction set disabled, using 387 arithmetics");
1431 ix86_fpmath = FPMATH_387;
1433 else if (!TARGET_80387)
1435 warning ("387 instruction set disabled, using SSE arithmetics");
1436 ix86_fpmath = FPMATH_SSE;
1438 else
1439 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1441 else
1442 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1445 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1446 on by -msse. */
1447 if (TARGET_SSE)
1449 target_flags |= MASK_MMX;
1450 x86_prefetch_sse = true;
1453 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1454 if (TARGET_3DNOW)
1456 target_flags |= MASK_MMX;
1457 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1458 extensions it adds. */
1459 if (x86_3dnow_a & (1 << ix86_arch))
1460 target_flags |= MASK_3DNOW_A;
1462 if ((x86_accumulate_outgoing_args & TUNEMASK)
1463 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1464 && !optimize_size)
1465 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1467 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1469 char *p;
1470 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1471 p = strchr (internal_label_prefix, 'X');
1472 internal_label_prefix_len = p - internal_label_prefix;
1473 *p = '\0';
1477 void
1478 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1480 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1481 make the problem with not enough registers even worse. */
1482 #ifdef INSN_SCHEDULING
1483 if (level > 1)
1484 flag_schedule_insns = 0;
1485 #endif
1487 /* The default values of these switches depend on the TARGET_64BIT
1488 that is not known at this moment. Mark these values with 2 and
1489 let user the to override these. In case there is no command line option
1490 specifying them, we will set the defaults in override_options. */
1491 if (optimize >= 1)
1492 flag_omit_frame_pointer = 2;
1493 flag_pcc_struct_return = 2;
1494 flag_asynchronous_unwind_tables = 2;
1497 /* Table of valid machine attributes. */
1498 const struct attribute_spec ix86_attribute_table[] =
1500 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1501 /* Stdcall attribute says callee is responsible for popping arguments
1502 if they are not variable. */
1503 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1504 /* Fastcall attribute says callee is responsible for popping arguments
1505 if they are not variable. */
1506 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1507 /* Cdecl attribute says the callee is a normal C declaration */
1508 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1509 /* Regparm attribute specifies how many integer arguments are to be
1510 passed in registers. */
1511 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1512 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1513 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1514 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1515 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1516 #endif
1517 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1518 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1519 { NULL, 0, 0, false, false, false, NULL }
1522 /* Decide whether we can make a sibling call to a function. DECL is the
1523 declaration of the function being targeted by the call and EXP is the
1524 CALL_EXPR representing the call. */
1526 static bool
1527 ix86_function_ok_for_sibcall (tree decl, tree exp)
1529 /* If we are generating position-independent code, we cannot sibcall
1530 optimize any indirect call, or a direct call to a global function,
1531 as the PLT requires %ebx be live. */
1532 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1533 return false;
1535 /* If we are returning floats on the 80387 register stack, we cannot
1536 make a sibcall from a function that doesn't return a float to a
1537 function that does or, conversely, from a function that does return
1538 a float to a function that doesn't; the necessary stack adjustment
1539 would not be executed. */
1540 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1541 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1542 return false;
1544 /* If this call is indirect, we'll need to be able to use a call-clobbered
1545 register for the address of the target function. Make sure that all
1546 such registers are not used for passing parameters. */
1547 if (!decl && !TARGET_64BIT)
1549 tree type;
1551 /* We're looking at the CALL_EXPR, we need the type of the function. */
1552 type = TREE_OPERAND (exp, 0); /* pointer expression */
1553 type = TREE_TYPE (type); /* pointer type */
1554 type = TREE_TYPE (type); /* function type */
1556 if (ix86_function_regparm (type, NULL) >= 3)
1558 /* ??? Need to count the actual number of registers to be used,
1559 not the possible number of registers. Fix later. */
1560 return false;
1564 /* Otherwise okay. That also includes certain types of indirect calls. */
1565 return true;
1568 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1569 arguments as in struct attribute_spec.handler. */
1570 static tree
1571 ix86_handle_cdecl_attribute (tree *node, tree name,
1572 tree args ATTRIBUTE_UNUSED,
1573 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1575 if (TREE_CODE (*node) != FUNCTION_TYPE
1576 && TREE_CODE (*node) != METHOD_TYPE
1577 && TREE_CODE (*node) != FIELD_DECL
1578 && TREE_CODE (*node) != TYPE_DECL)
1580 warning ("`%s' attribute only applies to functions",
1581 IDENTIFIER_POINTER (name));
1582 *no_add_attrs = true;
1584 else
1586 if (is_attribute_p ("fastcall", name))
1588 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1590 error ("fastcall and stdcall attributes are not compatible");
1592 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1594 error ("fastcall and regparm attributes are not compatible");
1597 else if (is_attribute_p ("stdcall", name))
1599 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1601 error ("fastcall and stdcall attributes are not compatible");
1606 if (TARGET_64BIT)
1608 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1609 *no_add_attrs = true;
1612 return NULL_TREE;
1615 /* Handle a "regparm" attribute;
1616 arguments as in struct attribute_spec.handler. */
1617 static tree
1618 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1619 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1621 if (TREE_CODE (*node) != FUNCTION_TYPE
1622 && TREE_CODE (*node) != METHOD_TYPE
1623 && TREE_CODE (*node) != FIELD_DECL
1624 && TREE_CODE (*node) != TYPE_DECL)
1626 warning ("`%s' attribute only applies to functions",
1627 IDENTIFIER_POINTER (name));
1628 *no_add_attrs = true;
1630 else
1632 tree cst;
1634 cst = TREE_VALUE (args);
1635 if (TREE_CODE (cst) != INTEGER_CST)
1637 warning ("`%s' attribute requires an integer constant argument",
1638 IDENTIFIER_POINTER (name));
1639 *no_add_attrs = true;
1641 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1643 warning ("argument to `%s' attribute larger than %d",
1644 IDENTIFIER_POINTER (name), REGPARM_MAX);
1645 *no_add_attrs = true;
1648 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1650 error ("fastcall and regparm attributes are not compatible");
1654 return NULL_TREE;
1657 /* Return 0 if the attributes for two types are incompatible, 1 if they
1658 are compatible, and 2 if they are nearly compatible (which causes a
1659 warning to be generated). */
1661 static int
1662 ix86_comp_type_attributes (tree type1, tree type2)
1664 /* Check for mismatch of non-default calling convention. */
1665 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1667 if (TREE_CODE (type1) != FUNCTION_TYPE)
1668 return 1;
1670 /* Check for mismatched fastcall types */
1671 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1672 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1673 return 0;
1675 /* Check for mismatched return types (cdecl vs stdcall). */
1676 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1677 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1678 return 0;
1679 if (ix86_function_regparm (type1, NULL)
1680 != ix86_function_regparm (type2, NULL))
1681 return 0;
1682 return 1;
1685 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1686 DECL may be NULL when calling function indirectly
1687 or considering a libcall. */
1689 static int
1690 ix86_function_regparm (tree type, tree decl)
1692 tree attr;
1693 int regparm = ix86_regparm;
1694 bool user_convention = false;
1696 if (!TARGET_64BIT)
1698 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1699 if (attr)
1701 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1702 user_convention = true;
1705 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1707 regparm = 2;
1708 user_convention = true;
1711 /* Use register calling convention for local functions when possible. */
1712 if (!TARGET_64BIT && !user_convention && decl
1713 && flag_unit_at_a_time && !profile_flag)
1715 struct cgraph_local_info *i = cgraph_local_info (decl);
1716 if (i && i->local)
1718 /* We can't use regparm(3) for nested functions as these use
1719 static chain pointer in third argument. */
1720 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1721 regparm = 2;
1722 else
1723 regparm = 3;
1727 return regparm;
1730 /* Return true if EAX is live at the start of the function. Used by
1731 ix86_expand_prologue to determine if we need special help before
1732 calling allocate_stack_worker. */
1734 static bool
1735 ix86_eax_live_at_start_p (void)
1737 /* Cheat. Don't bother working forward from ix86_function_regparm
1738 to the function type to whether an actual argument is located in
1739 eax. Instead just look at cfg info, which is still close enough
1740 to correct at this point. This gives false positives for broken
1741 functions that might use uninitialized data that happens to be
1742 allocated in eax, but who cares? */
1743 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1746 /* Value is the number of bytes of arguments automatically
1747 popped when returning from a subroutine call.
1748 FUNDECL is the declaration node of the function (as a tree),
1749 FUNTYPE is the data type of the function (as a tree),
1750 or for a library call it is an identifier node for the subroutine name.
1751 SIZE is the number of bytes of arguments passed on the stack.
1753 On the 80386, the RTD insn may be used to pop them if the number
1754 of args is fixed, but if the number is variable then the caller
1755 must pop them all. RTD can't be used for library calls now
1756 because the library is compiled with the Unix compiler.
1757 Use of RTD is a selectable option, since it is incompatible with
1758 standard Unix calling sequences. If the option is not selected,
1759 the caller must always pop the args.
1761 The attribute stdcall is equivalent to RTD on a per module basis. */
1764 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1766 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1768 /* Cdecl functions override -mrtd, and never pop the stack. */
1769 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1771 /* Stdcall and fastcall functions will pop the stack if not
1772 variable args. */
1773 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1774 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1775 rtd = 1;
1777 if (rtd
1778 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1779 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1780 == void_type_node)))
1781 return size;
1784 /* Lose any fake structure return argument if it is passed on the stack. */
1785 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1786 && !TARGET_64BIT)
1788 int nregs = ix86_function_regparm (funtype, fundecl);
1790 if (!nregs)
1791 return GET_MODE_SIZE (Pmode);
1794 return 0;
1797 /* Argument support functions. */
1799 /* Return true when register may be used to pass function parameters. */
1800 bool
1801 ix86_function_arg_regno_p (int regno)
1803 int i;
1804 if (!TARGET_64BIT)
1805 return (regno < REGPARM_MAX
1806 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1807 if (SSE_REGNO_P (regno) && TARGET_SSE)
1808 return true;
1809 /* RAX is used as hidden argument to va_arg functions. */
1810 if (!regno)
1811 return true;
1812 for (i = 0; i < REGPARM_MAX; i++)
1813 if (regno == x86_64_int_parameter_registers[i])
1814 return true;
1815 return false;
1818 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1819 for a call to a function whose data type is FNTYPE.
1820 For a library call, FNTYPE is 0. */
1822 void
1823 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1824 tree fntype, /* tree ptr for function decl */
1825 rtx libname, /* SYMBOL_REF of library name or 0 */
1826 tree fndecl)
1828 static CUMULATIVE_ARGS zero_cum;
1829 tree param, next_param;
1831 if (TARGET_DEBUG_ARG)
1833 fprintf (stderr, "\ninit_cumulative_args (");
1834 if (fntype)
1835 fprintf (stderr, "fntype code = %s, ret code = %s",
1836 tree_code_name[(int) TREE_CODE (fntype)],
1837 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1838 else
1839 fprintf (stderr, "no fntype");
1841 if (libname)
1842 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1845 *cum = zero_cum;
1847 /* Set up the number of registers to use for passing arguments. */
1848 if (fntype)
1849 cum->nregs = ix86_function_regparm (fntype, fndecl);
1850 else
1851 cum->nregs = ix86_regparm;
1852 cum->sse_nregs = SSE_REGPARM_MAX;
1853 cum->mmx_nregs = MMX_REGPARM_MAX;
1854 cum->warn_sse = true;
1855 cum->warn_mmx = true;
1856 cum->maybe_vaarg = false;
1858 /* Use ecx and edx registers if function has fastcall attribute */
1859 if (fntype && !TARGET_64BIT)
1861 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1863 cum->nregs = 2;
1864 cum->fastcall = 1;
1869 /* Determine if this function has variable arguments. This is
1870 indicated by the last argument being 'void_type_mode' if there
1871 are no variable arguments. If there are variable arguments, then
1872 we won't pass anything in registers */
1874 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1876 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1877 param != 0; param = next_param)
1879 next_param = TREE_CHAIN (param);
1880 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1882 if (!TARGET_64BIT)
1884 cum->nregs = 0;
1885 cum->sse_nregs = 0;
1886 cum->mmx_nregs = 0;
1887 cum->warn_sse = 0;
1888 cum->warn_mmx = 0;
1889 cum->fastcall = 0;
1891 cum->maybe_vaarg = true;
1895 if ((!fntype && !libname)
1896 || (fntype && !TYPE_ARG_TYPES (fntype)))
1897 cum->maybe_vaarg = 1;
1899 if (TARGET_DEBUG_ARG)
1900 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1902 return;
1905 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1906 of this code is to classify each 8bytes of incoming argument by the register
1907 class and assign registers accordingly. */
1909 /* Return the union class of CLASS1 and CLASS2.
1910 See the x86-64 PS ABI for details. */
1912 static enum x86_64_reg_class
1913 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1915 /* Rule #1: If both classes are equal, this is the resulting class. */
1916 if (class1 == class2)
1917 return class1;
1919 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1920 the other class. */
1921 if (class1 == X86_64_NO_CLASS)
1922 return class2;
1923 if (class2 == X86_64_NO_CLASS)
1924 return class1;
1926 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1927 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1928 return X86_64_MEMORY_CLASS;
1930 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1931 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1932 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1933 return X86_64_INTEGERSI_CLASS;
1934 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1935 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1936 return X86_64_INTEGER_CLASS;
1938 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1939 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1940 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1941 return X86_64_MEMORY_CLASS;
1943 /* Rule #6: Otherwise class SSE is used. */
1944 return X86_64_SSE_CLASS;
1947 /* Classify the argument of type TYPE and mode MODE.
1948 CLASSES will be filled by the register class used to pass each word
1949 of the operand. The number of words is returned. In case the parameter
1950 should be passed in memory, 0 is returned. As a special case for zero
1951 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1953 BIT_OFFSET is used internally for handling records and specifies offset
1954 of the offset in bits modulo 256 to avoid overflow cases.
1956 See the x86-64 PS ABI for details.
1959 static int
1960 classify_argument (enum machine_mode mode, tree type,
1961 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1963 HOST_WIDE_INT bytes =
1964 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1965 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1967 /* Variable sized entities are always passed/returned in memory. */
1968 if (bytes < 0)
1969 return 0;
1971 if (mode != VOIDmode
1972 && MUST_PASS_IN_STACK (mode, type))
1973 return 0;
1975 if (type && AGGREGATE_TYPE_P (type))
1977 int i;
1978 tree field;
1979 enum x86_64_reg_class subclasses[MAX_CLASSES];
1981 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1982 if (bytes > 16)
1983 return 0;
1985 for (i = 0; i < words; i++)
1986 classes[i] = X86_64_NO_CLASS;
1988 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1989 signalize memory class, so handle it as special case. */
1990 if (!words)
1992 classes[0] = X86_64_NO_CLASS;
1993 return 1;
1996 /* Classify each field of record and merge classes. */
1997 if (TREE_CODE (type) == RECORD_TYPE)
1999 /* For classes first merge in the field of the subclasses. */
2000 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2002 tree bases = TYPE_BINFO_BASETYPES (type);
2003 int n_bases = TREE_VEC_LENGTH (bases);
2004 int i;
2006 for (i = 0; i < n_bases; ++i)
2008 tree binfo = TREE_VEC_ELT (bases, i);
2009 int num;
2010 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2011 tree type = BINFO_TYPE (binfo);
2013 num = classify_argument (TYPE_MODE (type),
2014 type, subclasses,
2015 (offset + bit_offset) % 256);
2016 if (!num)
2017 return 0;
2018 for (i = 0; i < num; i++)
2020 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2021 classes[i + pos] =
2022 merge_classes (subclasses[i], classes[i + pos]);
2026 /* And now merge the fields of structure. */
2027 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2029 if (TREE_CODE (field) == FIELD_DECL)
2031 int num;
2033 /* Bitfields are always classified as integer. Handle them
2034 early, since later code would consider them to be
2035 misaligned integers. */
2036 if (DECL_BIT_FIELD (field))
2038 for (i = int_bit_position (field) / 8 / 8;
2039 i < (int_bit_position (field)
2040 + tree_low_cst (DECL_SIZE (field), 0)
2041 + 63) / 8 / 8; i++)
2042 classes[i] =
2043 merge_classes (X86_64_INTEGER_CLASS,
2044 classes[i]);
2046 else
2048 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2049 TREE_TYPE (field), subclasses,
2050 (int_bit_position (field)
2051 + bit_offset) % 256);
2052 if (!num)
2053 return 0;
2054 for (i = 0; i < num; i++)
2056 int pos =
2057 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2058 classes[i + pos] =
2059 merge_classes (subclasses[i], classes[i + pos]);
2065 /* Arrays are handled as small records. */
2066 else if (TREE_CODE (type) == ARRAY_TYPE)
2068 int num;
2069 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2070 TREE_TYPE (type), subclasses, bit_offset);
2071 if (!num)
2072 return 0;
2074 /* The partial classes are now full classes. */
2075 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2076 subclasses[0] = X86_64_SSE_CLASS;
2077 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2078 subclasses[0] = X86_64_INTEGER_CLASS;
2080 for (i = 0; i < words; i++)
2081 classes[i] = subclasses[i % num];
2083 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2084 else if (TREE_CODE (type) == UNION_TYPE
2085 || TREE_CODE (type) == QUAL_UNION_TYPE)
2087 /* For classes first merge in the field of the subclasses. */
2088 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2090 tree bases = TYPE_BINFO_BASETYPES (type);
2091 int n_bases = TREE_VEC_LENGTH (bases);
2092 int i;
2094 for (i = 0; i < n_bases; ++i)
2096 tree binfo = TREE_VEC_ELT (bases, i);
2097 int num;
2098 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2099 tree type = BINFO_TYPE (binfo);
2101 num = classify_argument (TYPE_MODE (type),
2102 type, subclasses,
2103 (offset + (bit_offset % 64)) % 256);
2104 if (!num)
2105 return 0;
2106 for (i = 0; i < num; i++)
2108 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2109 classes[i + pos] =
2110 merge_classes (subclasses[i], classes[i + pos]);
2114 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2116 if (TREE_CODE (field) == FIELD_DECL)
2118 int num;
2119 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2120 TREE_TYPE (field), subclasses,
2121 bit_offset);
2122 if (!num)
2123 return 0;
2124 for (i = 0; i < num; i++)
2125 classes[i] = merge_classes (subclasses[i], classes[i]);
2129 else if (TREE_CODE (type) == SET_TYPE)
2131 if (bytes <= 4)
2133 classes[0] = X86_64_INTEGERSI_CLASS;
2134 return 1;
2136 else if (bytes <= 8)
2138 classes[0] = X86_64_INTEGER_CLASS;
2139 return 1;
2141 else if (bytes <= 12)
2143 classes[0] = X86_64_INTEGER_CLASS;
2144 classes[1] = X86_64_INTEGERSI_CLASS;
2145 return 2;
2147 else
2149 classes[0] = X86_64_INTEGER_CLASS;
2150 classes[1] = X86_64_INTEGER_CLASS;
2151 return 2;
2154 else
2155 abort ();
2157 /* Final merger cleanup. */
2158 for (i = 0; i < words; i++)
2160 /* If one class is MEMORY, everything should be passed in
2161 memory. */
2162 if (classes[i] == X86_64_MEMORY_CLASS)
2163 return 0;
2165 /* The X86_64_SSEUP_CLASS should be always preceded by
2166 X86_64_SSE_CLASS. */
2167 if (classes[i] == X86_64_SSEUP_CLASS
2168 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2169 classes[i] = X86_64_SSE_CLASS;
2171 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2172 if (classes[i] == X86_64_X87UP_CLASS
2173 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2174 classes[i] = X86_64_SSE_CLASS;
2176 return words;
2179 /* Compute alignment needed. We align all types to natural boundaries with
2180 exception of XFmode that is aligned to 64bits. */
2181 if (mode != VOIDmode && mode != BLKmode)
2183 int mode_alignment = GET_MODE_BITSIZE (mode);
2185 if (mode == XFmode)
2186 mode_alignment = 128;
2187 else if (mode == XCmode)
2188 mode_alignment = 256;
2189 /* Misaligned fields are always returned in memory. */
2190 if (bit_offset % mode_alignment)
2191 return 0;
2194 /* Classification of atomic types. */
2195 switch (mode)
2197 case DImode:
2198 case SImode:
2199 case HImode:
2200 case QImode:
2201 case CSImode:
2202 case CHImode:
2203 case CQImode:
2204 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2205 classes[0] = X86_64_INTEGERSI_CLASS;
2206 else
2207 classes[0] = X86_64_INTEGER_CLASS;
2208 return 1;
2209 case CDImode:
2210 case TImode:
2211 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2212 return 2;
2213 case CTImode:
2214 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2215 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2216 return 4;
2217 case SFmode:
2218 if (!(bit_offset % 64))
2219 classes[0] = X86_64_SSESF_CLASS;
2220 else
2221 classes[0] = X86_64_SSE_CLASS;
2222 return 1;
2223 case DFmode:
2224 classes[0] = X86_64_SSEDF_CLASS;
2225 return 1;
2226 case XFmode:
2227 classes[0] = X86_64_X87_CLASS;
2228 classes[1] = X86_64_X87UP_CLASS;
2229 return 2;
2230 case TFmode:
2231 case TCmode:
2232 return 0;
2233 case XCmode:
2234 classes[0] = X86_64_X87_CLASS;
2235 classes[1] = X86_64_X87UP_CLASS;
2236 classes[2] = X86_64_X87_CLASS;
2237 classes[3] = X86_64_X87UP_CLASS;
2238 return 4;
2239 case DCmode:
2240 classes[0] = X86_64_SSEDF_CLASS;
2241 classes[1] = X86_64_SSEDF_CLASS;
2242 return 2;
2243 case SCmode:
2244 classes[0] = X86_64_SSE_CLASS;
2245 return 1;
2246 case V4SFmode:
2247 case V4SImode:
2248 case V16QImode:
2249 case V8HImode:
2250 case V2DFmode:
2251 case V2DImode:
2252 classes[0] = X86_64_SSE_CLASS;
2253 classes[1] = X86_64_SSEUP_CLASS;
2254 return 2;
2255 case V2SFmode:
2256 case V2SImode:
2257 case V4HImode:
2258 case V8QImode:
2259 return 0;
2260 case BLKmode:
2261 case VOIDmode:
2262 return 0;
2263 default:
2264 abort ();
2268 /* Examine the argument and return set number of register required in each
2269 class. Return 0 iff parameter should be passed in memory. */
2270 static int
2271 examine_argument (enum machine_mode mode, tree type, int in_return,
2272 int *int_nregs, int *sse_nregs)
2274 enum x86_64_reg_class class[MAX_CLASSES];
2275 int n = classify_argument (mode, type, class, 0);
2277 *int_nregs = 0;
2278 *sse_nregs = 0;
2279 if (!n)
2280 return 0;
2281 for (n--; n >= 0; n--)
2282 switch (class[n])
2284 case X86_64_INTEGER_CLASS:
2285 case X86_64_INTEGERSI_CLASS:
2286 (*int_nregs)++;
2287 break;
2288 case X86_64_SSE_CLASS:
2289 case X86_64_SSESF_CLASS:
2290 case X86_64_SSEDF_CLASS:
2291 (*sse_nregs)++;
2292 break;
2293 case X86_64_NO_CLASS:
2294 case X86_64_SSEUP_CLASS:
2295 break;
2296 case X86_64_X87_CLASS:
2297 case X86_64_X87UP_CLASS:
2298 if (!in_return)
2299 return 0;
2300 break;
2301 case X86_64_MEMORY_CLASS:
2302 abort ();
2304 return 1;
2306 /* Construct container for the argument used by GCC interface. See
2307 FUNCTION_ARG for the detailed description. */
2308 static rtx
2309 construct_container (enum machine_mode mode, tree type, int in_return,
2310 int nintregs, int nsseregs, const int * intreg,
2311 int sse_regno)
2313 enum machine_mode tmpmode;
2314 int bytes =
2315 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2316 enum x86_64_reg_class class[MAX_CLASSES];
2317 int n;
2318 int i;
2319 int nexps = 0;
2320 int needed_sseregs, needed_intregs;
2321 rtx exp[MAX_CLASSES];
2322 rtx ret;
2324 n = classify_argument (mode, type, class, 0);
2325 if (TARGET_DEBUG_ARG)
2327 if (!n)
2328 fprintf (stderr, "Memory class\n");
2329 else
2331 fprintf (stderr, "Classes:");
2332 for (i = 0; i < n; i++)
2334 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2336 fprintf (stderr, "\n");
2339 if (!n)
2340 return NULL;
2341 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2342 return NULL;
2343 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2344 return NULL;
2346 /* First construct simple cases. Avoid SCmode, since we want to use
2347 single register to pass this type. */
2348 if (n == 1 && mode != SCmode)
2349 switch (class[0])
2351 case X86_64_INTEGER_CLASS:
2352 case X86_64_INTEGERSI_CLASS:
2353 return gen_rtx_REG (mode, intreg[0]);
2354 case X86_64_SSE_CLASS:
2355 case X86_64_SSESF_CLASS:
2356 case X86_64_SSEDF_CLASS:
2357 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2358 case X86_64_X87_CLASS:
2359 return gen_rtx_REG (mode, FIRST_STACK_REG);
2360 case X86_64_NO_CLASS:
2361 /* Zero sized array, struct or class. */
2362 return NULL;
2363 default:
2364 abort ();
2366 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2367 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2368 if (n == 2
2369 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2370 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2371 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2372 && class[1] == X86_64_INTEGER_CLASS
2373 && (mode == CDImode || mode == TImode || mode == TFmode)
2374 && intreg[0] + 1 == intreg[1])
2375 return gen_rtx_REG (mode, intreg[0]);
2376 if (n == 4
2377 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2378 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2379 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2381 /* Otherwise figure out the entries of the PARALLEL. */
2382 for (i = 0; i < n; i++)
2384 switch (class[i])
2386 case X86_64_NO_CLASS:
2387 break;
2388 case X86_64_INTEGER_CLASS:
2389 case X86_64_INTEGERSI_CLASS:
2390 /* Merge TImodes on aligned occasions here too. */
2391 if (i * 8 + 8 > bytes)
2392 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2393 else if (class[i] == X86_64_INTEGERSI_CLASS)
2394 tmpmode = SImode;
2395 else
2396 tmpmode = DImode;
2397 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2398 if (tmpmode == BLKmode)
2399 tmpmode = DImode;
2400 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2401 gen_rtx_REG (tmpmode, *intreg),
2402 GEN_INT (i*8));
2403 intreg++;
2404 break;
2405 case X86_64_SSESF_CLASS:
2406 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2407 gen_rtx_REG (SFmode,
2408 SSE_REGNO (sse_regno)),
2409 GEN_INT (i*8));
2410 sse_regno++;
2411 break;
2412 case X86_64_SSEDF_CLASS:
2413 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2414 gen_rtx_REG (DFmode,
2415 SSE_REGNO (sse_regno)),
2416 GEN_INT (i*8));
2417 sse_regno++;
2418 break;
2419 case X86_64_SSE_CLASS:
2420 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2421 tmpmode = TImode;
2422 else
2423 tmpmode = DImode;
2424 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2425 gen_rtx_REG (tmpmode,
2426 SSE_REGNO (sse_regno)),
2427 GEN_INT (i*8));
2428 if (tmpmode == TImode)
2429 i++;
2430 sse_regno++;
2431 break;
2432 default:
2433 abort ();
2436 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2437 for (i = 0; i < nexps; i++)
2438 XVECEXP (ret, 0, i) = exp [i];
2439 return ret;
2442 /* Update the data in CUM to advance over an argument
2443 of mode MODE and data type TYPE.
2444 (TYPE is null for libcalls where that information may not be available.) */
2446 void
2447 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2448 enum machine_mode mode, /* current arg mode */
2449 tree type, /* type of the argument or 0 if lib support */
2450 int named) /* whether or not the argument was named */
2452 int bytes =
2453 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2454 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2456 if (TARGET_DEBUG_ARG)
2457 fprintf (stderr,
2458 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2459 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2460 if (TARGET_64BIT)
2462 int int_nregs, sse_nregs;
2463 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2464 cum->words += words;
2465 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2467 cum->nregs -= int_nregs;
2468 cum->sse_nregs -= sse_nregs;
2469 cum->regno += int_nregs;
2470 cum->sse_regno += sse_nregs;
2472 else
2473 cum->words += words;
2475 else
2477 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2478 && (!type || !AGGREGATE_TYPE_P (type)))
2480 cum->sse_words += words;
2481 cum->sse_nregs -= 1;
2482 cum->sse_regno += 1;
2483 if (cum->sse_nregs <= 0)
2485 cum->sse_nregs = 0;
2486 cum->sse_regno = 0;
2489 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2490 && (!type || !AGGREGATE_TYPE_P (type)))
2492 cum->mmx_words += words;
2493 cum->mmx_nregs -= 1;
2494 cum->mmx_regno += 1;
2495 if (cum->mmx_nregs <= 0)
2497 cum->mmx_nregs = 0;
2498 cum->mmx_regno = 0;
2501 else
2503 cum->words += words;
2504 cum->nregs -= words;
2505 cum->regno += words;
2507 if (cum->nregs <= 0)
2509 cum->nregs = 0;
2510 cum->regno = 0;
2514 return;
2517 /* Define where to put the arguments to a function.
2518 Value is zero to push the argument on the stack,
2519 or a hard register in which to store the argument.
2521 MODE is the argument's machine mode.
2522 TYPE is the data type of the argument (as a tree).
2523 This is null for libcalls where that information may
2524 not be available.
2525 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2526 the preceding args and about the function being called.
2527 NAMED is nonzero if this argument is a named parameter
2528 (otherwise it is an extra parameter matching an ellipsis). */
2531 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2532 enum machine_mode mode, /* current arg mode */
2533 tree type, /* type of the argument or 0 if lib support */
2534 int named) /* != 0 for normal args, == 0 for ... args */
2536 rtx ret = NULL_RTX;
2537 int bytes =
2538 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2539 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2540 static bool warnedsse, warnedmmx;
2542 /* Handle a hidden AL argument containing number of registers for varargs
2543 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2544 any AL settings. */
2545 if (mode == VOIDmode)
2547 if (TARGET_64BIT)
2548 return GEN_INT (cum->maybe_vaarg
2549 ? (cum->sse_nregs < 0
2550 ? SSE_REGPARM_MAX
2551 : cum->sse_regno)
2552 : -1);
2553 else
2554 return constm1_rtx;
2556 if (TARGET_64BIT)
2557 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2558 &x86_64_int_parameter_registers [cum->regno],
2559 cum->sse_regno);
2560 else
2561 switch (mode)
2563 /* For now, pass fp/complex values on the stack. */
2564 default:
2565 break;
2567 case BLKmode:
2568 if (bytes < 0)
2569 break;
2570 /* FALLTHRU */
2571 case DImode:
2572 case SImode:
2573 case HImode:
2574 case QImode:
2575 if (words <= cum->nregs)
2577 int regno = cum->regno;
2579 /* Fastcall allocates the first two DWORD (SImode) or
2580 smaller arguments to ECX and EDX. */
2581 if (cum->fastcall)
2583 if (mode == BLKmode || mode == DImode)
2584 break;
2586 /* ECX not EAX is the first allocated register. */
2587 if (regno == 0)
2588 regno = 2;
2590 ret = gen_rtx_REG (mode, regno);
2592 break;
2593 case TImode:
2594 case V16QImode:
2595 case V8HImode:
2596 case V4SImode:
2597 case V2DImode:
2598 case V4SFmode:
2599 case V2DFmode:
2600 if (!type || !AGGREGATE_TYPE_P (type))
2602 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2604 warnedsse = true;
2605 warning ("SSE vector argument without SSE enabled "
2606 "changes the ABI");
2608 if (cum->sse_nregs)
2609 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2611 break;
2612 case V8QImode:
2613 case V4HImode:
2614 case V2SImode:
2615 case V2SFmode:
2616 if (!type || !AGGREGATE_TYPE_P (type))
2618 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2620 warnedmmx = true;
2621 warning ("MMX vector argument without MMX enabled "
2622 "changes the ABI");
2624 if (cum->mmx_nregs)
2625 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2627 break;
2630 if (TARGET_DEBUG_ARG)
2632 fprintf (stderr,
2633 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2634 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2636 if (ret)
2637 print_simple_rtl (stderr, ret);
2638 else
2639 fprintf (stderr, ", stack");
2641 fprintf (stderr, " )\n");
2644 return ret;
2647 /* A C expression that indicates when an argument must be passed by
2648 reference. If nonzero for an argument, a copy of that argument is
2649 made in memory and a pointer to the argument is passed instead of
2650 the argument itself. The pointer is passed in whatever way is
2651 appropriate for passing a pointer to that type. */
2654 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2655 enum machine_mode mode ATTRIBUTE_UNUSED,
2656 tree type, int named ATTRIBUTE_UNUSED)
2658 if (!TARGET_64BIT)
2659 return 0;
2661 if (type && int_size_in_bytes (type) == -1)
2663 if (TARGET_DEBUG_ARG)
2664 fprintf (stderr, "function_arg_pass_by_reference\n");
2665 return 1;
2668 return 0;
2671 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2672 ABI */
2673 static bool
2674 contains_128bit_aligned_vector_p (tree type)
2676 enum machine_mode mode = TYPE_MODE (type);
2677 if (SSE_REG_MODE_P (mode)
2678 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2679 return true;
2680 if (TYPE_ALIGN (type) < 128)
2681 return false;
2683 if (AGGREGATE_TYPE_P (type))
2685 /* Walk the aggregates recursively. */
2686 if (TREE_CODE (type) == RECORD_TYPE
2687 || TREE_CODE (type) == UNION_TYPE
2688 || TREE_CODE (type) == QUAL_UNION_TYPE)
2690 tree field;
2692 if (TYPE_BINFO (type) != NULL
2693 && TYPE_BINFO_BASETYPES (type) != NULL)
2695 tree bases = TYPE_BINFO_BASETYPES (type);
2696 int n_bases = TREE_VEC_LENGTH (bases);
2697 int i;
2699 for (i = 0; i < n_bases; ++i)
2701 tree binfo = TREE_VEC_ELT (bases, i);
2702 tree type = BINFO_TYPE (binfo);
2704 if (contains_128bit_aligned_vector_p (type))
2705 return true;
2708 /* And now merge the fields of structure. */
2709 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2711 if (TREE_CODE (field) == FIELD_DECL
2712 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2713 return true;
2716 /* Just for use if some languages passes arrays by value. */
2717 else if (TREE_CODE (type) == ARRAY_TYPE)
2719 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2720 return true;
2722 else
2723 abort ();
2725 return false;
2728 /* Gives the alignment boundary, in bits, of an argument with the
2729 specified mode and type. */
2732 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2734 int align;
2735 if (type)
2736 align = TYPE_ALIGN (type);
2737 else
2738 align = GET_MODE_ALIGNMENT (mode);
2739 if (align < PARM_BOUNDARY)
2740 align = PARM_BOUNDARY;
2741 if (!TARGET_64BIT)
2743 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2744 make an exception for SSE modes since these require 128bit
2745 alignment.
2747 The handling here differs from field_alignment. ICC aligns MMX
2748 arguments to 4 byte boundaries, while structure fields are aligned
2749 to 8 byte boundaries. */
2750 if (!type)
2752 if (!SSE_REG_MODE_P (mode))
2753 align = PARM_BOUNDARY;
2755 else
2757 if (!contains_128bit_aligned_vector_p (type))
2758 align = PARM_BOUNDARY;
2761 if (align > 128)
2762 align = 128;
2763 return align;
2766 /* Return true if N is a possible register number of function value. */
2767 bool
2768 ix86_function_value_regno_p (int regno)
2770 if (!TARGET_64BIT)
2772 return ((regno) == 0
2773 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2774 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2776 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2777 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2778 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2781 /* Define how to find the value returned by a function.
2782 VALTYPE is the data type of the value (as a tree).
2783 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2784 otherwise, FUNC is 0. */
2786 ix86_function_value (tree valtype)
2788 if (TARGET_64BIT)
2790 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2791 REGPARM_MAX, SSE_REGPARM_MAX,
2792 x86_64_int_return_registers, 0);
2793 /* For zero sized structures, construct_container return NULL, but we need
2794 to keep rest of compiler happy by returning meaningful value. */
2795 if (!ret)
2796 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2797 return ret;
2799 else
2800 return gen_rtx_REG (TYPE_MODE (valtype),
2801 ix86_value_regno (TYPE_MODE (valtype)));
2804 /* Return false iff type is returned in memory. */
2806 ix86_return_in_memory (tree type)
2808 int needed_intregs, needed_sseregs, size;
2809 enum machine_mode mode = TYPE_MODE (type);
2811 if (TARGET_64BIT)
2812 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2814 if (mode == BLKmode)
2815 return 1;
2817 size = int_size_in_bytes (type);
2819 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2820 return 0;
2822 if (VECTOR_MODE_P (mode) || mode == TImode)
2824 /* User-created vectors small enough to fit in EAX. */
2825 if (size < 8)
2826 return 0;
2828 /* MMX/3dNow values are returned on the stack, since we've
2829 got to EMMS/FEMMS before returning. */
2830 if (size == 8)
2831 return 1;
2833 /* SSE values are returned in XMM0. */
2834 /* ??? Except when it doesn't exist? We have a choice of
2835 either (1) being abi incompatible with a -march switch,
2836 or (2) generating an error here. Given no good solution,
2837 I think the safest thing is one warning. The user won't
2838 be able to use -Werror, but.... */
2839 if (size == 16)
2841 static bool warned;
2843 if (TARGET_SSE)
2844 return 0;
2846 if (!warned)
2848 warned = true;
2849 warning ("SSE vector return without SSE enabled "
2850 "changes the ABI");
2852 return 1;
2856 if (mode == XFmode)
2857 return 0;
2859 if (size > 12)
2860 return 1;
2861 return 0;
2864 /* Define how to find the value returned by a library function
2865 assuming the value has mode MODE. */
2867 ix86_libcall_value (enum machine_mode mode)
2869 if (TARGET_64BIT)
2871 switch (mode)
2873 case SFmode:
2874 case SCmode:
2875 case DFmode:
2876 case DCmode:
2877 return gen_rtx_REG (mode, FIRST_SSE_REG);
2878 case XFmode:
2879 case XCmode:
2880 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2881 case TFmode:
2882 case TCmode:
2883 return NULL;
2884 default:
2885 return gen_rtx_REG (mode, 0);
2888 else
2889 return gen_rtx_REG (mode, ix86_value_regno (mode));
2892 /* Given a mode, return the register to use for a return value. */
2894 static int
2895 ix86_value_regno (enum machine_mode mode)
2897 /* Floating point return values in %st(0). */
2898 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2899 return FIRST_FLOAT_REG;
2900 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2901 we prevent this case when sse is not available. */
2902 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2903 return FIRST_SSE_REG;
2904 /* Everything else in %eax. */
2905 return 0;
2908 /* Create the va_list data type. */
2910 static tree
2911 ix86_build_builtin_va_list (void)
2913 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2915 /* For i386 we use plain pointer to argument area. */
2916 if (!TARGET_64BIT)
2917 return build_pointer_type (char_type_node);
2919 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2920 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2922 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2923 unsigned_type_node);
2924 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2925 unsigned_type_node);
2926 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2927 ptr_type_node);
2928 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2929 ptr_type_node);
2931 DECL_FIELD_CONTEXT (f_gpr) = record;
2932 DECL_FIELD_CONTEXT (f_fpr) = record;
2933 DECL_FIELD_CONTEXT (f_ovf) = record;
2934 DECL_FIELD_CONTEXT (f_sav) = record;
2936 TREE_CHAIN (record) = type_decl;
2937 TYPE_NAME (record) = type_decl;
2938 TYPE_FIELDS (record) = f_gpr;
2939 TREE_CHAIN (f_gpr) = f_fpr;
2940 TREE_CHAIN (f_fpr) = f_ovf;
2941 TREE_CHAIN (f_ovf) = f_sav;
2943 layout_type (record);
2945 /* The correct type is an array type of one element. */
2946 return build_array_type (record, build_index_type (size_zero_node));
2949 /* Perform any needed actions needed for a function that is receiving a
2950 variable number of arguments.
2952 CUM is as above.
2954 MODE and TYPE are the mode and type of the current parameter.
2956 PRETEND_SIZE is a variable that should be set to the amount of stack
2957 that must be pushed by the prolog to pretend that our caller pushed
2960 Normally, this macro will push all remaining incoming registers on the
2961 stack and set PRETEND_SIZE to the length of the registers pushed. */
2963 void
2964 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2965 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2966 int no_rtl)
2968 CUMULATIVE_ARGS next_cum;
2969 rtx save_area = NULL_RTX, mem;
2970 rtx label;
2971 rtx label_ref;
2972 rtx tmp_reg;
2973 rtx nsse_reg;
2974 int set;
2975 tree fntype;
2976 int stdarg_p;
2977 int i;
2979 if (!TARGET_64BIT)
2980 return;
2982 /* Indicate to allocate space on the stack for varargs save area. */
2983 ix86_save_varrargs_registers = 1;
2985 cfun->stack_alignment_needed = 128;
2987 fntype = TREE_TYPE (current_function_decl);
2988 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2989 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2990 != void_type_node));
2992 /* For varargs, we do not want to skip the dummy va_dcl argument.
2993 For stdargs, we do want to skip the last named argument. */
2994 next_cum = *cum;
2995 if (stdarg_p)
2996 function_arg_advance (&next_cum, mode, type, 1);
2998 if (!no_rtl)
2999 save_area = frame_pointer_rtx;
3001 set = get_varargs_alias_set ();
3003 for (i = next_cum.regno; i < ix86_regparm; i++)
3005 mem = gen_rtx_MEM (Pmode,
3006 plus_constant (save_area, i * UNITS_PER_WORD));
3007 set_mem_alias_set (mem, set);
3008 emit_move_insn (mem, gen_rtx_REG (Pmode,
3009 x86_64_int_parameter_registers[i]));
3012 if (next_cum.sse_nregs)
3014 /* Now emit code to save SSE registers. The AX parameter contains number
3015 of SSE parameter registers used to call this function. We use
3016 sse_prologue_save insn template that produces computed jump across
3017 SSE saves. We need some preparation work to get this working. */
3019 label = gen_label_rtx ();
3020 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3022 /* Compute address to jump to :
3023 label - 5*eax + nnamed_sse_arguments*5 */
3024 tmp_reg = gen_reg_rtx (Pmode);
3025 nsse_reg = gen_reg_rtx (Pmode);
3026 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3027 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3028 gen_rtx_MULT (Pmode, nsse_reg,
3029 GEN_INT (4))));
3030 if (next_cum.sse_regno)
3031 emit_move_insn
3032 (nsse_reg,
3033 gen_rtx_CONST (DImode,
3034 gen_rtx_PLUS (DImode,
3035 label_ref,
3036 GEN_INT (next_cum.sse_regno * 4))));
3037 else
3038 emit_move_insn (nsse_reg, label_ref);
3039 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3041 /* Compute address of memory block we save into. We always use pointer
3042 pointing 127 bytes after first byte to store - this is needed to keep
3043 instruction size limited by 4 bytes. */
3044 tmp_reg = gen_reg_rtx (Pmode);
3045 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3046 plus_constant (save_area,
3047 8 * REGPARM_MAX + 127)));
3048 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3049 set_mem_alias_set (mem, set);
3050 set_mem_align (mem, BITS_PER_WORD);
3052 /* And finally do the dirty job! */
3053 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3054 GEN_INT (next_cum.sse_regno), label));
3059 /* Implement va_start. */
3061 void
3062 ix86_va_start (tree valist, rtx nextarg)
3064 HOST_WIDE_INT words, n_gpr, n_fpr;
3065 tree f_gpr, f_fpr, f_ovf, f_sav;
3066 tree gpr, fpr, ovf, sav, t;
3068 /* Only 64bit target needs something special. */
3069 if (!TARGET_64BIT)
3071 std_expand_builtin_va_start (valist, nextarg);
3072 return;
3075 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3076 f_fpr = TREE_CHAIN (f_gpr);
3077 f_ovf = TREE_CHAIN (f_fpr);
3078 f_sav = TREE_CHAIN (f_ovf);
3080 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3081 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3082 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3083 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3084 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3086 /* Count number of gp and fp argument registers used. */
3087 words = current_function_args_info.words;
3088 n_gpr = current_function_args_info.regno;
3089 n_fpr = current_function_args_info.sse_regno;
3091 if (TARGET_DEBUG_ARG)
3092 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3093 (int) words, (int) n_gpr, (int) n_fpr);
3095 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3096 build_int_2 (n_gpr * 8, 0));
3097 TREE_SIDE_EFFECTS (t) = 1;
3098 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3100 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3101 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3102 TREE_SIDE_EFFECTS (t) = 1;
3103 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3105 /* Find the overflow area. */
3106 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3107 if (words != 0)
3108 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3109 build_int_2 (words * UNITS_PER_WORD, 0));
3110 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3111 TREE_SIDE_EFFECTS (t) = 1;
3112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3114 /* Find the register save area.
3115 Prologue of the function save it right above stack frame. */
3116 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3117 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3118 TREE_SIDE_EFFECTS (t) = 1;
3119 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3122 /* Implement va_arg. */
3124 ix86_va_arg (tree valist, tree type)
3126 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3127 tree f_gpr, f_fpr, f_ovf, f_sav;
3128 tree gpr, fpr, ovf, sav, t;
3129 int size, rsize;
3130 rtx lab_false, lab_over = NULL_RTX;
3131 rtx addr_rtx, r;
3132 rtx container;
3133 int indirect_p = 0;
3135 /* Only 64bit target needs something special. */
3136 if (!TARGET_64BIT)
3138 return std_expand_builtin_va_arg (valist, type);
3141 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3142 f_fpr = TREE_CHAIN (f_gpr);
3143 f_ovf = TREE_CHAIN (f_fpr);
3144 f_sav = TREE_CHAIN (f_ovf);
3146 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3147 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3148 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3149 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3150 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3152 size = int_size_in_bytes (type);
3153 if (size == -1)
3155 /* Passed by reference. */
3156 indirect_p = 1;
3157 type = build_pointer_type (type);
3158 size = int_size_in_bytes (type);
3160 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3162 container = construct_container (TYPE_MODE (type), type, 0,
3163 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3165 * Pull the value out of the saved registers ...
3168 addr_rtx = gen_reg_rtx (Pmode);
3170 if (container)
3172 rtx int_addr_rtx, sse_addr_rtx;
3173 int needed_intregs, needed_sseregs;
3174 int need_temp;
3176 lab_over = gen_label_rtx ();
3177 lab_false = gen_label_rtx ();
3179 examine_argument (TYPE_MODE (type), type, 0,
3180 &needed_intregs, &needed_sseregs);
3183 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3184 || TYPE_ALIGN (type) > 128);
3186 /* In case we are passing structure, verify that it is consecutive block
3187 on the register save area. If not we need to do moves. */
3188 if (!need_temp && !REG_P (container))
3190 /* Verify that all registers are strictly consecutive */
3191 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3193 int i;
3195 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3197 rtx slot = XVECEXP (container, 0, i);
3198 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3199 || INTVAL (XEXP (slot, 1)) != i * 16)
3200 need_temp = 1;
3203 else
3205 int i;
3207 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3209 rtx slot = XVECEXP (container, 0, i);
3210 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3211 || INTVAL (XEXP (slot, 1)) != i * 8)
3212 need_temp = 1;
3216 if (!need_temp)
3218 int_addr_rtx = addr_rtx;
3219 sse_addr_rtx = addr_rtx;
3221 else
3223 int_addr_rtx = gen_reg_rtx (Pmode);
3224 sse_addr_rtx = gen_reg_rtx (Pmode);
3226 /* First ensure that we fit completely in registers. */
3227 if (needed_intregs)
3229 emit_cmp_and_jump_insns (expand_expr
3230 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3231 GEN_INT ((REGPARM_MAX - needed_intregs +
3232 1) * 8), GE, const1_rtx, SImode,
3233 1, lab_false);
3235 if (needed_sseregs)
3237 emit_cmp_and_jump_insns (expand_expr
3238 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3239 GEN_INT ((SSE_REGPARM_MAX -
3240 needed_sseregs + 1) * 16 +
3241 REGPARM_MAX * 8), GE, const1_rtx,
3242 SImode, 1, lab_false);
3245 /* Compute index to start of area used for integer regs. */
3246 if (needed_intregs)
3248 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3249 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3250 if (r != int_addr_rtx)
3251 emit_move_insn (int_addr_rtx, r);
3253 if (needed_sseregs)
3255 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3256 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3257 if (r != sse_addr_rtx)
3258 emit_move_insn (sse_addr_rtx, r);
3260 if (need_temp)
3262 int i;
3263 rtx mem;
3264 rtx x;
3266 /* Never use the memory itself, as it has the alias set. */
3267 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3268 mem = gen_rtx_MEM (BLKmode, x);
3269 force_operand (x, addr_rtx);
3270 set_mem_alias_set (mem, get_varargs_alias_set ());
3271 set_mem_align (mem, BITS_PER_UNIT);
3273 for (i = 0; i < XVECLEN (container, 0); i++)
3275 rtx slot = XVECEXP (container, 0, i);
3276 rtx reg = XEXP (slot, 0);
3277 enum machine_mode mode = GET_MODE (reg);
3278 rtx src_addr;
3279 rtx src_mem;
3280 int src_offset;
3281 rtx dest_mem;
3283 if (SSE_REGNO_P (REGNO (reg)))
3285 src_addr = sse_addr_rtx;
3286 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3288 else
3290 src_addr = int_addr_rtx;
3291 src_offset = REGNO (reg) * 8;
3293 src_mem = gen_rtx_MEM (mode, src_addr);
3294 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3295 src_mem = adjust_address (src_mem, mode, src_offset);
3296 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3297 emit_move_insn (dest_mem, src_mem);
3301 if (needed_intregs)
3304 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3305 build_int_2 (needed_intregs * 8, 0));
3306 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3307 TREE_SIDE_EFFECTS (t) = 1;
3308 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3310 if (needed_sseregs)
3313 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3314 build_int_2 (needed_sseregs * 16, 0));
3315 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3316 TREE_SIDE_EFFECTS (t) = 1;
3317 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3320 emit_jump_insn (gen_jump (lab_over));
3321 emit_barrier ();
3322 emit_label (lab_false);
3325 /* ... otherwise out of the overflow area. */
3327 /* Care for on-stack alignment if needed. */
3328 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3329 t = ovf;
3330 else
3332 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3333 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3334 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3336 t = save_expr (t);
3338 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3339 if (r != addr_rtx)
3340 emit_move_insn (addr_rtx, r);
3343 build (PLUS_EXPR, TREE_TYPE (t), t,
3344 build_int_2 (rsize * UNITS_PER_WORD, 0));
3345 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3346 TREE_SIDE_EFFECTS (t) = 1;
3347 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3349 if (container)
3350 emit_label (lab_over);
3352 if (indirect_p)
3354 r = gen_rtx_MEM (Pmode, addr_rtx);
3355 set_mem_alias_set (r, get_varargs_alias_set ());
3356 emit_move_insn (addr_rtx, r);
3359 return addr_rtx;
3362 /* Return nonzero if OP is either a i387 or SSE fp register. */
3364 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3366 return ANY_FP_REG_P (op);
3369 /* Return nonzero if OP is an i387 fp register. */
3371 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3373 return FP_REG_P (op);
3376 /* Return nonzero if OP is a non-fp register_operand. */
3378 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3380 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3383 /* Return nonzero if OP is a register operand other than an
3384 i387 fp register. */
3386 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3388 return register_operand (op, mode) && !FP_REG_P (op);
3391 /* Return nonzero if OP is general operand representable on x86_64. */
3394 x86_64_general_operand (rtx op, enum machine_mode mode)
3396 if (!TARGET_64BIT)
3397 return general_operand (op, mode);
3398 if (nonimmediate_operand (op, mode))
3399 return 1;
3400 return x86_64_sign_extended_value (op);
3403 /* Return nonzero if OP is general operand representable on x86_64
3404 as either sign extended or zero extended constant. */
3407 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3409 if (!TARGET_64BIT)
3410 return general_operand (op, mode);
3411 if (nonimmediate_operand (op, mode))
3412 return 1;
3413 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3416 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3419 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3421 if (!TARGET_64BIT)
3422 return nonmemory_operand (op, mode);
3423 if (register_operand (op, mode))
3424 return 1;
3425 return x86_64_sign_extended_value (op);
3428 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3431 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3433 if (!TARGET_64BIT || !flag_pic)
3434 return nonmemory_operand (op, mode);
3435 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3436 return 1;
3437 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3438 return 1;
3439 return 0;
3442 /* Return nonzero if OPNUM's MEM should be matched
3443 in movabs* patterns. */
3446 ix86_check_movabs (rtx insn, int opnum)
3448 rtx set, mem;
3450 set = PATTERN (insn);
3451 if (GET_CODE (set) == PARALLEL)
3452 set = XVECEXP (set, 0, 0);
3453 if (GET_CODE (set) != SET)
3454 abort ();
3455 mem = XEXP (set, opnum);
3456 while (GET_CODE (mem) == SUBREG)
3457 mem = SUBREG_REG (mem);
3458 if (GET_CODE (mem) != MEM)
3459 abort ();
3460 return (volatile_ok || !MEM_VOLATILE_P (mem));
3463 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3466 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3468 if (!TARGET_64BIT)
3469 return nonmemory_operand (op, mode);
3470 if (register_operand (op, mode))
3471 return 1;
3472 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3475 /* Return nonzero if OP is immediate operand representable on x86_64. */
3478 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3480 if (!TARGET_64BIT)
3481 return immediate_operand (op, mode);
3482 return x86_64_sign_extended_value (op);
3485 /* Return nonzero if OP is immediate operand representable on x86_64. */
3488 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3490 return x86_64_zero_extended_value (op);
3493 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3494 for shift & compare patterns, as shifting by 0 does not change flags),
3495 else return zero. */
3498 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3500 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3503 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3504 reference and a constant. */
3507 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3509 switch (GET_CODE (op))
3511 case SYMBOL_REF:
3512 case LABEL_REF:
3513 return 1;
3515 case CONST:
3516 op = XEXP (op, 0);
3517 if (GET_CODE (op) == SYMBOL_REF
3518 || GET_CODE (op) == LABEL_REF
3519 || (GET_CODE (op) == UNSPEC
3520 && (XINT (op, 1) == UNSPEC_GOT
3521 || XINT (op, 1) == UNSPEC_GOTOFF
3522 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3523 return 1;
3524 if (GET_CODE (op) != PLUS
3525 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3526 return 0;
3528 op = XEXP (op, 0);
3529 if (GET_CODE (op) == SYMBOL_REF
3530 || GET_CODE (op) == LABEL_REF)
3531 return 1;
3532 /* Only @GOTOFF gets offsets. */
3533 if (GET_CODE (op) != UNSPEC
3534 || XINT (op, 1) != UNSPEC_GOTOFF)
3535 return 0;
3537 op = XVECEXP (op, 0, 0);
3538 if (GET_CODE (op) == SYMBOL_REF
3539 || GET_CODE (op) == LABEL_REF)
3540 return 1;
3541 return 0;
3543 default:
3544 return 0;
3548 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3551 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3553 if (GET_CODE (op) != CONST)
3554 return 0;
3555 op = XEXP (op, 0);
3556 if (TARGET_64BIT)
3558 if (GET_CODE (op) == UNSPEC
3559 && XINT (op, 1) == UNSPEC_GOTPCREL)
3560 return 1;
3561 if (GET_CODE (op) == PLUS
3562 && GET_CODE (XEXP (op, 0)) == UNSPEC
3563 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3564 return 1;
3566 else
3568 if (GET_CODE (op) == UNSPEC)
3569 return 1;
3570 if (GET_CODE (op) != PLUS
3571 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3572 return 0;
3573 op = XEXP (op, 0);
3574 if (GET_CODE (op) == UNSPEC)
3575 return 1;
3577 return 0;
3580 /* Return true if OP is a symbolic operand that resolves locally. */
3582 static int
3583 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3585 if (GET_CODE (op) == CONST
3586 && GET_CODE (XEXP (op, 0)) == PLUS
3587 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3588 op = XEXP (XEXP (op, 0), 0);
3590 if (GET_CODE (op) == LABEL_REF)
3591 return 1;
3593 if (GET_CODE (op) != SYMBOL_REF)
3594 return 0;
3596 if (SYMBOL_REF_LOCAL_P (op))
3597 return 1;
3599 /* There is, however, a not insubstantial body of code in the rest of
3600 the compiler that assumes it can just stick the results of
3601 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3602 /* ??? This is a hack. Should update the body of the compiler to
3603 always create a DECL an invoke targetm.encode_section_info. */
3604 if (strncmp (XSTR (op, 0), internal_label_prefix,
3605 internal_label_prefix_len) == 0)
3606 return 1;
3608 return 0;
3611 /* Test for various thread-local symbols. */
3614 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3616 if (GET_CODE (op) != SYMBOL_REF)
3617 return 0;
3618 return SYMBOL_REF_TLS_MODEL (op);
3621 static inline int
3622 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3624 if (GET_CODE (op) != SYMBOL_REF)
3625 return 0;
3626 return SYMBOL_REF_TLS_MODEL (op) == kind;
3630 global_dynamic_symbolic_operand (rtx op,
3631 enum machine_mode mode ATTRIBUTE_UNUSED)
3633 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3637 local_dynamic_symbolic_operand (rtx op,
3638 enum machine_mode mode ATTRIBUTE_UNUSED)
3640 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3644 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3646 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3650 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3652 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3655 /* Test for a valid operand for a call instruction. Don't allow the
3656 arg pointer register or virtual regs since they may decay into
3657 reg + const, which the patterns can't handle. */
3660 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3662 /* Disallow indirect through a virtual register. This leads to
3663 compiler aborts when trying to eliminate them. */
3664 if (GET_CODE (op) == REG
3665 && (op == arg_pointer_rtx
3666 || op == frame_pointer_rtx
3667 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3668 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3669 return 0;
3671 /* Disallow `call 1234'. Due to varying assembler lameness this
3672 gets either rejected or translated to `call .+1234'. */
3673 if (GET_CODE (op) == CONST_INT)
3674 return 0;
3676 /* Explicitly allow SYMBOL_REF even if pic. */
3677 if (GET_CODE (op) == SYMBOL_REF)
3678 return 1;
3680 /* Otherwise we can allow any general_operand in the address. */
3681 return general_operand (op, Pmode);
3684 /* Test for a valid operand for a call instruction. Don't allow the
3685 arg pointer register or virtual regs since they may decay into
3686 reg + const, which the patterns can't handle. */
3689 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3691 /* Disallow indirect through a virtual register. This leads to
3692 compiler aborts when trying to eliminate them. */
3693 if (GET_CODE (op) == REG
3694 && (op == arg_pointer_rtx
3695 || op == frame_pointer_rtx
3696 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3697 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3698 return 0;
3700 /* Explicitly allow SYMBOL_REF even if pic. */
3701 if (GET_CODE (op) == SYMBOL_REF)
3702 return 1;
3704 /* Otherwise we can only allow register operands. */
3705 return register_operand (op, Pmode);
3709 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3711 if (GET_CODE (op) == CONST
3712 && GET_CODE (XEXP (op, 0)) == PLUS
3713 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3714 op = XEXP (XEXP (op, 0), 0);
3715 return GET_CODE (op) == SYMBOL_REF;
3718 /* Match exactly zero and one. */
3721 const0_operand (rtx op, enum machine_mode mode)
3723 return op == CONST0_RTX (mode);
3727 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3729 return op == const1_rtx;
3732 /* Match 2, 4, or 8. Used for leal multiplicands. */
3735 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3737 return (GET_CODE (op) == CONST_INT
3738 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3742 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3744 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3748 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3750 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3754 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3756 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3760 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3762 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3766 /* True if this is a constant appropriate for an increment or decrement. */
3769 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3771 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3772 registers, since carry flag is not set. */
3773 if (TARGET_PENTIUM4 && !optimize_size)
3774 return 0;
3775 return op == const1_rtx || op == constm1_rtx;
3778 /* Return nonzero if OP is acceptable as operand of DImode shift
3779 expander. */
3782 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3784 if (TARGET_64BIT)
3785 return nonimmediate_operand (op, mode);
3786 else
3787 return register_operand (op, mode);
3790 /* Return false if this is the stack pointer, or any other fake
3791 register eliminable to the stack pointer. Otherwise, this is
3792 a register operand.
3794 This is used to prevent esp from being used as an index reg.
3795 Which would only happen in pathological cases. */
3798 reg_no_sp_operand (rtx op, enum machine_mode mode)
3800 rtx t = op;
3801 if (GET_CODE (t) == SUBREG)
3802 t = SUBREG_REG (t);
3803 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3804 return 0;
3806 return register_operand (op, mode);
3810 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3812 return MMX_REG_P (op);
3815 /* Return false if this is any eliminable register. Otherwise
3816 general_operand. */
3819 general_no_elim_operand (rtx op, enum machine_mode mode)
3821 rtx t = op;
3822 if (GET_CODE (t) == SUBREG)
3823 t = SUBREG_REG (t);
3824 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3825 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3826 || t == virtual_stack_dynamic_rtx)
3827 return 0;
3828 if (REG_P (t)
3829 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3830 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3831 return 0;
3833 return general_operand (op, mode);
3836 /* Return false if this is any eliminable register. Otherwise
3837 register_operand or const_int. */
3840 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3842 rtx t = op;
3843 if (GET_CODE (t) == SUBREG)
3844 t = SUBREG_REG (t);
3845 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3846 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3847 || t == virtual_stack_dynamic_rtx)
3848 return 0;
3850 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3853 /* Return false if this is any eliminable register or stack register,
3854 otherwise work like register_operand. */
3857 index_register_operand (rtx op, enum machine_mode mode)
3859 rtx t = op;
3860 if (GET_CODE (t) == SUBREG)
3861 t = SUBREG_REG (t);
3862 if (!REG_P (t))
3863 return 0;
3864 if (t == arg_pointer_rtx
3865 || t == frame_pointer_rtx
3866 || t == virtual_incoming_args_rtx
3867 || t == virtual_stack_vars_rtx
3868 || t == virtual_stack_dynamic_rtx
3869 || REGNO (t) == STACK_POINTER_REGNUM)
3870 return 0;
3872 return general_operand (op, mode);
3875 /* Return true if op is a Q_REGS class register. */
3878 q_regs_operand (rtx op, enum machine_mode mode)
3880 if (mode != VOIDmode && GET_MODE (op) != mode)
3881 return 0;
3882 if (GET_CODE (op) == SUBREG)
3883 op = SUBREG_REG (op);
3884 return ANY_QI_REG_P (op);
3887 /* Return true if op is an flags register. */
3890 flags_reg_operand (rtx op, enum machine_mode mode)
3892 if (mode != VOIDmode && GET_MODE (op) != mode)
3893 return 0;
3894 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3897 /* Return true if op is a NON_Q_REGS class register. */
3900 non_q_regs_operand (rtx op, enum machine_mode mode)
3902 if (mode != VOIDmode && GET_MODE (op) != mode)
3903 return 0;
3904 if (GET_CODE (op) == SUBREG)
3905 op = SUBREG_REG (op);
3906 return NON_QI_REG_P (op);
3910 zero_extended_scalar_load_operand (rtx op,
3911 enum machine_mode mode ATTRIBUTE_UNUSED)
3913 unsigned n_elts;
3914 if (GET_CODE (op) != MEM)
3915 return 0;
3916 op = maybe_get_pool_constant (op);
3917 if (!op)
3918 return 0;
3919 if (GET_CODE (op) != CONST_VECTOR)
3920 return 0;
3921 n_elts =
3922 (GET_MODE_SIZE (GET_MODE (op)) /
3923 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3924 for (n_elts--; n_elts > 0; n_elts--)
3926 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3927 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3928 return 0;
3930 return 1;
3933 /* Return 1 when OP is operand acceptable for standard SSE move. */
3935 vector_move_operand (rtx op, enum machine_mode mode)
3937 if (nonimmediate_operand (op, mode))
3938 return 1;
3939 if (GET_MODE (op) != mode && mode != VOIDmode)
3940 return 0;
3941 return (op == CONST0_RTX (GET_MODE (op)));
3944 /* Return true if op if a valid address, and does not contain
3945 a segment override. */
3948 no_seg_address_operand (rtx op, enum machine_mode mode)
3950 struct ix86_address parts;
3952 if (! address_operand (op, mode))
3953 return 0;
3955 if (! ix86_decompose_address (op, &parts))
3956 abort ();
3958 return parts.seg == SEG_DEFAULT;
3961 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3962 insns. */
3964 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3966 enum rtx_code code = GET_CODE (op);
3967 switch (code)
3969 /* Operations supported directly. */
3970 case EQ:
3971 case LT:
3972 case LE:
3973 case UNORDERED:
3974 case NE:
3975 case UNGE:
3976 case UNGT:
3977 case ORDERED:
3978 return 1;
3979 /* These are equivalent to ones above in non-IEEE comparisons. */
3980 case UNEQ:
3981 case UNLT:
3982 case UNLE:
3983 case LTGT:
3984 case GE:
3985 case GT:
3986 return !TARGET_IEEE_FP;
3987 default:
3988 return 0;
3991 /* Return 1 if OP is a valid comparison operator in valid mode. */
3993 ix86_comparison_operator (rtx op, enum machine_mode mode)
3995 enum machine_mode inmode;
3996 enum rtx_code code = GET_CODE (op);
3997 if (mode != VOIDmode && GET_MODE (op) != mode)
3998 return 0;
3999 if (GET_RTX_CLASS (code) != '<')
4000 return 0;
4001 inmode = GET_MODE (XEXP (op, 0));
4003 if (inmode == CCFPmode || inmode == CCFPUmode)
4005 enum rtx_code second_code, bypass_code;
4006 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4007 return (bypass_code == NIL && second_code == NIL);
4009 switch (code)
4011 case EQ: case NE:
4012 return 1;
4013 case LT: case GE:
4014 if (inmode == CCmode || inmode == CCGCmode
4015 || inmode == CCGOCmode || inmode == CCNOmode)
4016 return 1;
4017 return 0;
4018 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4019 if (inmode == CCmode)
4020 return 1;
4021 return 0;
4022 case GT: case LE:
4023 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4024 return 1;
4025 return 0;
4026 default:
4027 return 0;
4031 /* Return 1 if OP is a valid comparison operator testing carry flag
4032 to be set. */
4034 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4036 enum machine_mode inmode;
4037 enum rtx_code code = GET_CODE (op);
4039 if (mode != VOIDmode && GET_MODE (op) != mode)
4040 return 0;
4041 if (GET_RTX_CLASS (code) != '<')
4042 return 0;
4043 inmode = GET_MODE (XEXP (op, 0));
4044 if (GET_CODE (XEXP (op, 0)) != REG
4045 || REGNO (XEXP (op, 0)) != 17
4046 || XEXP (op, 1) != const0_rtx)
4047 return 0;
4049 if (inmode == CCFPmode || inmode == CCFPUmode)
4051 enum rtx_code second_code, bypass_code;
4053 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4054 if (bypass_code != NIL || second_code != NIL)
4055 return 0;
4056 code = ix86_fp_compare_code_to_integer (code);
4058 else if (inmode != CCmode)
4059 return 0;
4060 return code == LTU;
4063 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4066 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4068 enum machine_mode inmode;
4069 enum rtx_code code = GET_CODE (op);
4071 if (mode != VOIDmode && GET_MODE (op) != mode)
4072 return 0;
4073 if (GET_RTX_CLASS (code) != '<')
4074 return 0;
4075 inmode = GET_MODE (XEXP (op, 0));
4076 if (inmode == CCFPmode || inmode == CCFPUmode)
4078 enum rtx_code second_code, bypass_code;
4080 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4081 if (bypass_code != NIL || second_code != NIL)
4082 return 0;
4083 code = ix86_fp_compare_code_to_integer (code);
4085 /* i387 supports just limited amount of conditional codes. */
4086 switch (code)
4088 case LTU: case GTU: case LEU: case GEU:
4089 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4090 return 1;
4091 return 0;
4092 case ORDERED: case UNORDERED:
4093 case EQ: case NE:
4094 return 1;
4095 default:
4096 return 0;
4100 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4103 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4105 switch (GET_CODE (op))
4107 case MULT:
4108 /* Modern CPUs have same latency for HImode and SImode multiply,
4109 but 386 and 486 do HImode multiply faster. */
4110 return ix86_tune > PROCESSOR_I486;
4111 case PLUS:
4112 case AND:
4113 case IOR:
4114 case XOR:
4115 case ASHIFT:
4116 return 1;
4117 default:
4118 return 0;
4122 /* Nearly general operand, but accept any const_double, since we wish
4123 to be able to drop them into memory rather than have them get pulled
4124 into registers. */
4127 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4129 if (mode != VOIDmode && mode != GET_MODE (op))
4130 return 0;
4131 if (GET_CODE (op) == CONST_DOUBLE)
4132 return 1;
4133 return general_operand (op, mode);
4136 /* Match an SI or HImode register for a zero_extract. */
4139 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4141 int regno;
4142 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4143 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4144 return 0;
4146 if (!register_operand (op, VOIDmode))
4147 return 0;
4149 /* Be careful to accept only registers having upper parts. */
4150 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4151 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4154 /* Return 1 if this is a valid binary floating-point operation.
4155 OP is the expression matched, and MODE is its mode. */
4158 binary_fp_operator (rtx op, enum machine_mode mode)
4160 if (mode != VOIDmode && mode != GET_MODE (op))
4161 return 0;
4163 switch (GET_CODE (op))
4165 case PLUS:
4166 case MINUS:
4167 case MULT:
4168 case DIV:
4169 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4171 default:
4172 return 0;
4177 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4179 return GET_CODE (op) == MULT;
4183 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4185 return GET_CODE (op) == DIV;
4189 arith_or_logical_operator (rtx op, enum machine_mode mode)
4191 return ((mode == VOIDmode || GET_MODE (op) == mode)
4192 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4193 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4196 /* Returns 1 if OP is memory operand with a displacement. */
4199 memory_displacement_operand (rtx op, enum machine_mode mode)
4201 struct ix86_address parts;
4203 if (! memory_operand (op, mode))
4204 return 0;
4206 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4207 abort ();
4209 return parts.disp != NULL_RTX;
4212 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4213 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4215 ??? It seems likely that this will only work because cmpsi is an
4216 expander, and no actual insns use this. */
4219 cmpsi_operand (rtx op, enum machine_mode mode)
4221 if (nonimmediate_operand (op, mode))
4222 return 1;
4224 if (GET_CODE (op) == AND
4225 && GET_MODE (op) == SImode
4226 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4227 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4228 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4229 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4230 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4231 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4232 return 1;
4234 return 0;
4237 /* Returns 1 if OP is memory operand that can not be represented by the
4238 modRM array. */
4241 long_memory_operand (rtx op, enum machine_mode mode)
4243 if (! memory_operand (op, mode))
4244 return 0;
4246 return memory_address_length (op) != 0;
4249 /* Return nonzero if the rtx is known aligned. */
4252 aligned_operand (rtx op, enum machine_mode mode)
4254 struct ix86_address parts;
4256 if (!general_operand (op, mode))
4257 return 0;
4259 /* Registers and immediate operands are always "aligned". */
4260 if (GET_CODE (op) != MEM)
4261 return 1;
4263 /* Don't even try to do any aligned optimizations with volatiles. */
4264 if (MEM_VOLATILE_P (op))
4265 return 0;
4267 op = XEXP (op, 0);
4269 /* Pushes and pops are only valid on the stack pointer. */
4270 if (GET_CODE (op) == PRE_DEC
4271 || GET_CODE (op) == POST_INC)
4272 return 1;
4274 /* Decode the address. */
4275 if (! ix86_decompose_address (op, &parts))
4276 abort ();
4278 /* Look for some component that isn't known to be aligned. */
4279 if (parts.index)
4281 if (parts.scale < 4
4282 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4283 return 0;
4285 if (parts.base)
4287 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4288 return 0;
4290 if (parts.disp)
4292 if (GET_CODE (parts.disp) != CONST_INT
4293 || (INTVAL (parts.disp) & 3) != 0)
4294 return 0;
4297 /* Didn't find one -- this must be an aligned address. */
4298 return 1;
4301 /* Initialize the table of extra 80387 mathematical constants. */
4303 static void
4304 init_ext_80387_constants (void)
4306 static const char * cst[5] =
4308 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4309 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4310 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4311 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4312 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4314 int i;
4316 for (i = 0; i < 5; i++)
4318 real_from_string (&ext_80387_constants_table[i], cst[i]);
4319 /* Ensure each constant is rounded to XFmode precision. */
4320 real_convert (&ext_80387_constants_table[i],
4321 XFmode, &ext_80387_constants_table[i]);
4324 ext_80387_constants_init = 1;
4327 /* Return true if the constant is something that can be loaded with
4328 a special instruction. */
4331 standard_80387_constant_p (rtx x)
4333 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4334 return -1;
4336 if (x == CONST0_RTX (GET_MODE (x)))
4337 return 1;
4338 if (x == CONST1_RTX (GET_MODE (x)))
4339 return 2;
4341 /* For XFmode constants, try to find a special 80387 instruction on
4342 those CPUs that benefit from them. */
4343 if (GET_MODE (x) == XFmode
4344 && x86_ext_80387_constants & TUNEMASK)
4346 REAL_VALUE_TYPE r;
4347 int i;
4349 if (! ext_80387_constants_init)
4350 init_ext_80387_constants ();
4352 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4353 for (i = 0; i < 5; i++)
4354 if (real_identical (&r, &ext_80387_constants_table[i]))
4355 return i + 3;
4358 return 0;
4361 /* Return the opcode of the special instruction to be used to load
4362 the constant X. */
4364 const char *
4365 standard_80387_constant_opcode (rtx x)
4367 switch (standard_80387_constant_p (x))
4369 case 1:
4370 return "fldz";
4371 case 2:
4372 return "fld1";
4373 case 3:
4374 return "fldlg2";
4375 case 4:
4376 return "fldln2";
4377 case 5:
4378 return "fldl2e";
4379 case 6:
4380 return "fldl2t";
4381 case 7:
4382 return "fldpi";
4384 abort ();
4387 /* Return the CONST_DOUBLE representing the 80387 constant that is
4388 loaded by the specified special instruction. The argument IDX
4389 matches the return value from standard_80387_constant_p. */
4392 standard_80387_constant_rtx (int idx)
4394 int i;
4396 if (! ext_80387_constants_init)
4397 init_ext_80387_constants ();
4399 switch (idx)
4401 case 3:
4402 case 4:
4403 case 5:
4404 case 6:
4405 case 7:
4406 i = idx - 3;
4407 break;
4409 default:
4410 abort ();
4413 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4414 XFmode);
4417 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4420 standard_sse_constant_p (rtx x)
4422 if (x == const0_rtx)
4423 return 1;
4424 return (x == CONST0_RTX (GET_MODE (x)));
4427 /* Returns 1 if OP contains a symbol reference */
4430 symbolic_reference_mentioned_p (rtx op)
4432 const char *fmt;
4433 int i;
4435 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4436 return 1;
4438 fmt = GET_RTX_FORMAT (GET_CODE (op));
4439 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4441 if (fmt[i] == 'E')
4443 int j;
4445 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4446 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4447 return 1;
4450 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4451 return 1;
4454 return 0;
4457 /* Return 1 if it is appropriate to emit `ret' instructions in the
4458 body of a function. Do this only if the epilogue is simple, needing a
4459 couple of insns. Prior to reloading, we can't tell how many registers
4460 must be saved, so return 0 then. Return 0 if there is no frame
4461 marker to de-allocate.
4463 If NON_SAVING_SETJMP is defined and true, then it is not possible
4464 for the epilogue to be simple, so return 0. This is a special case
4465 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4466 until final, but jump_optimize may need to know sooner if a
4467 `return' is OK. */
4470 ix86_can_use_return_insn_p (void)
4472 struct ix86_frame frame;
4474 #ifdef NON_SAVING_SETJMP
4475 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4476 return 0;
4477 #endif
4479 if (! reload_completed || frame_pointer_needed)
4480 return 0;
4482 /* Don't allow more than 32 pop, since that's all we can do
4483 with one instruction. */
4484 if (current_function_pops_args
4485 && current_function_args_size >= 32768)
4486 return 0;
4488 ix86_compute_frame_layout (&frame);
4489 return frame.to_allocate == 0 && frame.nregs == 0;
4492 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4494 x86_64_sign_extended_value (rtx value)
4496 switch (GET_CODE (value))
4498 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4499 to be at least 32 and this all acceptable constants are
4500 represented as CONST_INT. */
4501 case CONST_INT:
4502 if (HOST_BITS_PER_WIDE_INT == 32)
4503 return 1;
4504 else
4506 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4507 return trunc_int_for_mode (val, SImode) == val;
4509 break;
4511 /* For certain code models, the symbolic references are known to fit.
4512 in CM_SMALL_PIC model we know it fits if it is local to the shared
4513 library. Don't count TLS SYMBOL_REFs here, since they should fit
4514 only if inside of UNSPEC handled below. */
4515 case SYMBOL_REF:
4516 /* TLS symbols are not constant. */
4517 if (tls_symbolic_operand (value, Pmode))
4518 return false;
4519 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4521 /* For certain code models, the code is near as well. */
4522 case LABEL_REF:
4523 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4524 || ix86_cmodel == CM_KERNEL);
4526 /* We also may accept the offsetted memory references in certain special
4527 cases. */
4528 case CONST:
4529 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4530 switch (XINT (XEXP (value, 0), 1))
4532 case UNSPEC_GOTPCREL:
4533 case UNSPEC_DTPOFF:
4534 case UNSPEC_GOTNTPOFF:
4535 case UNSPEC_NTPOFF:
4536 return 1;
4537 default:
4538 break;
4540 if (GET_CODE (XEXP (value, 0)) == PLUS)
4542 rtx op1 = XEXP (XEXP (value, 0), 0);
4543 rtx op2 = XEXP (XEXP (value, 0), 1);
4544 HOST_WIDE_INT offset;
4546 if (ix86_cmodel == CM_LARGE)
4547 return 0;
4548 if (GET_CODE (op2) != CONST_INT)
4549 return 0;
4550 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4551 switch (GET_CODE (op1))
4553 case SYMBOL_REF:
4554 /* For CM_SMALL assume that latest object is 16MB before
4555 end of 31bits boundary. We may also accept pretty
4556 large negative constants knowing that all objects are
4557 in the positive half of address space. */
4558 if (ix86_cmodel == CM_SMALL
4559 && offset < 16*1024*1024
4560 && trunc_int_for_mode (offset, SImode) == offset)
4561 return 1;
4562 /* For CM_KERNEL we know that all object resist in the
4563 negative half of 32bits address space. We may not
4564 accept negative offsets, since they may be just off
4565 and we may accept pretty large positive ones. */
4566 if (ix86_cmodel == CM_KERNEL
4567 && offset > 0
4568 && trunc_int_for_mode (offset, SImode) == offset)
4569 return 1;
4570 break;
4571 case LABEL_REF:
4572 /* These conditions are similar to SYMBOL_REF ones, just the
4573 constraints for code models differ. */
4574 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4575 && offset < 16*1024*1024
4576 && trunc_int_for_mode (offset, SImode) == offset)
4577 return 1;
4578 if (ix86_cmodel == CM_KERNEL
4579 && offset > 0
4580 && trunc_int_for_mode (offset, SImode) == offset)
4581 return 1;
4582 break;
4583 case UNSPEC:
4584 switch (XINT (op1, 1))
4586 case UNSPEC_DTPOFF:
4587 case UNSPEC_NTPOFF:
4588 if (offset > 0
4589 && trunc_int_for_mode (offset, SImode) == offset)
4590 return 1;
4592 break;
4593 default:
4594 return 0;
4597 return 0;
4598 default:
4599 return 0;
4603 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4605 x86_64_zero_extended_value (rtx value)
4607 switch (GET_CODE (value))
4609 case CONST_DOUBLE:
4610 if (HOST_BITS_PER_WIDE_INT == 32)
4611 return (GET_MODE (value) == VOIDmode
4612 && !CONST_DOUBLE_HIGH (value));
4613 else
4614 return 0;
4615 case CONST_INT:
4616 if (HOST_BITS_PER_WIDE_INT == 32)
4617 return INTVAL (value) >= 0;
4618 else
4619 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4620 break;
4622 /* For certain code models, the symbolic references are known to fit. */
4623 case SYMBOL_REF:
4624 /* TLS symbols are not constant. */
4625 if (tls_symbolic_operand (value, Pmode))
4626 return false;
4627 return ix86_cmodel == CM_SMALL;
4629 /* For certain code models, the code is near as well. */
4630 case LABEL_REF:
4631 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4633 /* We also may accept the offsetted memory references in certain special
4634 cases. */
4635 case CONST:
4636 if (GET_CODE (XEXP (value, 0)) == PLUS)
4638 rtx op1 = XEXP (XEXP (value, 0), 0);
4639 rtx op2 = XEXP (XEXP (value, 0), 1);
4641 if (ix86_cmodel == CM_LARGE)
4642 return 0;
4643 switch (GET_CODE (op1))
4645 case SYMBOL_REF:
4646 return 0;
4647 /* For small code model we may accept pretty large positive
4648 offsets, since one bit is available for free. Negative
4649 offsets are limited by the size of NULL pointer area
4650 specified by the ABI. */
4651 if (ix86_cmodel == CM_SMALL
4652 && GET_CODE (op2) == CONST_INT
4653 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4654 && (trunc_int_for_mode (INTVAL (op2), SImode)
4655 == INTVAL (op2)))
4656 return 1;
4657 /* ??? For the kernel, we may accept adjustment of
4658 -0x10000000, since we know that it will just convert
4659 negative address space to positive, but perhaps this
4660 is not worthwhile. */
4661 break;
4662 case LABEL_REF:
4663 /* These conditions are similar to SYMBOL_REF ones, just the
4664 constraints for code models differ. */
4665 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4666 && GET_CODE (op2) == CONST_INT
4667 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4668 && (trunc_int_for_mode (INTVAL (op2), SImode)
4669 == INTVAL (op2)))
4670 return 1;
4671 break;
4672 default:
4673 return 0;
4676 return 0;
4677 default:
4678 return 0;
4682 /* Value should be nonzero if functions must have frame pointers.
4683 Zero means the frame pointer need not be set up (and parms may
4684 be accessed via the stack pointer) in functions that seem suitable. */
4687 ix86_frame_pointer_required (void)
4689 /* If we accessed previous frames, then the generated code expects
4690 to be able to access the saved ebp value in our frame. */
4691 if (cfun->machine->accesses_prev_frame)
4692 return 1;
4694 /* Several x86 os'es need a frame pointer for other reasons,
4695 usually pertaining to setjmp. */
4696 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4697 return 1;
4699 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4700 the frame pointer by default. Turn it back on now if we've not
4701 got a leaf function. */
4702 if (TARGET_OMIT_LEAF_FRAME_POINTER
4703 && (!current_function_is_leaf))
4704 return 1;
4706 if (current_function_profile)
4707 return 1;
4709 return 0;
4712 /* Record that the current function accesses previous call frames. */
4714 void
4715 ix86_setup_frame_addresses (void)
4717 cfun->machine->accesses_prev_frame = 1;
4720 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4721 # define USE_HIDDEN_LINKONCE 1
4722 #else
4723 # define USE_HIDDEN_LINKONCE 0
4724 #endif
4726 static int pic_labels_used;
4728 /* Fills in the label name that should be used for a pc thunk for
4729 the given register. */
4731 static void
4732 get_pc_thunk_name (char name[32], unsigned int regno)
4734 if (USE_HIDDEN_LINKONCE)
4735 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4736 else
4737 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4741 /* This function generates code for -fpic that loads %ebx with
4742 the return address of the caller and then returns. */
4744 void
4745 ix86_file_end (void)
4747 rtx xops[2];
4748 int regno;
4750 for (regno = 0; regno < 8; ++regno)
4752 char name[32];
4754 if (! ((pic_labels_used >> regno) & 1))
4755 continue;
4757 get_pc_thunk_name (name, regno);
4759 if (USE_HIDDEN_LINKONCE)
4761 tree decl;
4763 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4764 error_mark_node);
4765 TREE_PUBLIC (decl) = 1;
4766 TREE_STATIC (decl) = 1;
4767 DECL_ONE_ONLY (decl) = 1;
4769 (*targetm.asm_out.unique_section) (decl, 0);
4770 named_section (decl, NULL, 0);
4772 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4773 fputs ("\t.hidden\t", asm_out_file);
4774 assemble_name (asm_out_file, name);
4775 fputc ('\n', asm_out_file);
4776 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4778 else
4780 text_section ();
4781 ASM_OUTPUT_LABEL (asm_out_file, name);
4784 xops[0] = gen_rtx_REG (SImode, regno);
4785 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4786 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4787 output_asm_insn ("ret", xops);
4790 if (NEED_INDICATE_EXEC_STACK)
4791 file_end_indicate_exec_stack ();
4794 /* Emit code for the SET_GOT patterns. */
4796 const char *
4797 output_set_got (rtx dest)
4799 rtx xops[3];
4801 xops[0] = dest;
4802 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4804 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4806 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4808 if (!flag_pic)
4809 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4810 else
4811 output_asm_insn ("call\t%a2", xops);
4813 #if TARGET_MACHO
4814 /* Output the "canonical" label name ("Lxx$pb") here too. This
4815 is what will be referred to by the Mach-O PIC subsystem. */
4816 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4817 #endif
4818 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4819 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4821 if (flag_pic)
4822 output_asm_insn ("pop{l}\t%0", xops);
4824 else
4826 char name[32];
4827 get_pc_thunk_name (name, REGNO (dest));
4828 pic_labels_used |= 1 << REGNO (dest);
4830 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4831 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4832 output_asm_insn ("call\t%X2", xops);
4835 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4836 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4837 else if (!TARGET_MACHO)
4838 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4840 return "";
4843 /* Generate an "push" pattern for input ARG. */
4845 static rtx
4846 gen_push (rtx arg)
4848 return gen_rtx_SET (VOIDmode,
4849 gen_rtx_MEM (Pmode,
4850 gen_rtx_PRE_DEC (Pmode,
4851 stack_pointer_rtx)),
4852 arg);
4855 /* Return >= 0 if there is an unused call-clobbered register available
4856 for the entire function. */
4858 static unsigned int
4859 ix86_select_alt_pic_regnum (void)
4861 if (current_function_is_leaf && !current_function_profile)
4863 int i;
4864 for (i = 2; i >= 0; --i)
4865 if (!regs_ever_live[i])
4866 return i;
4869 return INVALID_REGNUM;
4872 /* Return 1 if we need to save REGNO. */
4873 static int
4874 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4876 if (pic_offset_table_rtx
4877 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4878 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4879 || current_function_profile
4880 || current_function_calls_eh_return
4881 || current_function_uses_const_pool))
4883 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4884 return 0;
4885 return 1;
4888 if (current_function_calls_eh_return && maybe_eh_return)
4890 unsigned i;
4891 for (i = 0; ; i++)
4893 unsigned test = EH_RETURN_DATA_REGNO (i);
4894 if (test == INVALID_REGNUM)
4895 break;
4896 if (test == regno)
4897 return 1;
4901 return (regs_ever_live[regno]
4902 && !call_used_regs[regno]
4903 && !fixed_regs[regno]
4904 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4907 /* Return number of registers to be saved on the stack. */
4909 static int
4910 ix86_nsaved_regs (void)
4912 int nregs = 0;
4913 int regno;
4915 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4916 if (ix86_save_reg (regno, true))
4917 nregs++;
4918 return nregs;
4921 /* Return the offset between two registers, one to be eliminated, and the other
4922 its replacement, at the start of a routine. */
4924 HOST_WIDE_INT
4925 ix86_initial_elimination_offset (int from, int to)
4927 struct ix86_frame frame;
4928 ix86_compute_frame_layout (&frame);
4930 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4931 return frame.hard_frame_pointer_offset;
4932 else if (from == FRAME_POINTER_REGNUM
4933 && to == HARD_FRAME_POINTER_REGNUM)
4934 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4935 else
4937 if (to != STACK_POINTER_REGNUM)
4938 abort ();
4939 else if (from == ARG_POINTER_REGNUM)
4940 return frame.stack_pointer_offset;
4941 else if (from != FRAME_POINTER_REGNUM)
4942 abort ();
4943 else
4944 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4948 /* Fill structure ix86_frame about frame of currently computed function. */
4950 static void
4951 ix86_compute_frame_layout (struct ix86_frame *frame)
4953 HOST_WIDE_INT total_size;
4954 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4955 HOST_WIDE_INT offset;
4956 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4957 HOST_WIDE_INT size = get_frame_size ();
4959 frame->nregs = ix86_nsaved_regs ();
4960 total_size = size;
4962 /* During reload iteration the amount of registers saved can change.
4963 Recompute the value as needed. Do not recompute when amount of registers
4964 didn't change as reload does mutiple calls to the function and does not
4965 expect the decision to change within single iteration. */
4966 if (!optimize_size
4967 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4969 int count = frame->nregs;
4971 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4972 /* The fast prologue uses move instead of push to save registers. This
4973 is significantly longer, but also executes faster as modern hardware
4974 can execute the moves in parallel, but can't do that for push/pop.
4976 Be careful about choosing what prologue to emit: When function takes
4977 many instructions to execute we may use slow version as well as in
4978 case function is known to be outside hot spot (this is known with
4979 feedback only). Weight the size of function by number of registers
4980 to save as it is cheap to use one or two push instructions but very
4981 slow to use many of them. */
4982 if (count)
4983 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4984 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4985 || (flag_branch_probabilities
4986 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4987 cfun->machine->use_fast_prologue_epilogue = false;
4988 else
4989 cfun->machine->use_fast_prologue_epilogue
4990 = !expensive_function_p (count);
4992 if (TARGET_PROLOGUE_USING_MOVE
4993 && cfun->machine->use_fast_prologue_epilogue)
4994 frame->save_regs_using_mov = true;
4995 else
4996 frame->save_regs_using_mov = false;
4999 /* Skip return address and saved base pointer. */
5000 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5002 frame->hard_frame_pointer_offset = offset;
5004 /* Do some sanity checking of stack_alignment_needed and
5005 preferred_alignment, since i386 port is the only using those features
5006 that may break easily. */
5008 if (size && !stack_alignment_needed)
5009 abort ();
5010 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5011 abort ();
5012 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5013 abort ();
5014 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5015 abort ();
5017 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5018 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5020 /* Register save area */
5021 offset += frame->nregs * UNITS_PER_WORD;
5023 /* Va-arg area */
5024 if (ix86_save_varrargs_registers)
5026 offset += X86_64_VARARGS_SIZE;
5027 frame->va_arg_size = X86_64_VARARGS_SIZE;
5029 else
5030 frame->va_arg_size = 0;
5032 /* Align start of frame for local function. */
5033 frame->padding1 = ((offset + stack_alignment_needed - 1)
5034 & -stack_alignment_needed) - offset;
5036 offset += frame->padding1;
5038 /* Frame pointer points here. */
5039 frame->frame_pointer_offset = offset;
5041 offset += size;
5043 /* Add outgoing arguments area. Can be skipped if we eliminated
5044 all the function calls as dead code.
5045 Skipping is however impossible when function calls alloca. Alloca
5046 expander assumes that last current_function_outgoing_args_size
5047 of stack frame are unused. */
5048 if (ACCUMULATE_OUTGOING_ARGS
5049 && (!current_function_is_leaf || current_function_calls_alloca))
5051 offset += current_function_outgoing_args_size;
5052 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5054 else
5055 frame->outgoing_arguments_size = 0;
5057 /* Align stack boundary. Only needed if we're calling another function
5058 or using alloca. */
5059 if (!current_function_is_leaf || current_function_calls_alloca)
5060 frame->padding2 = ((offset + preferred_alignment - 1)
5061 & -preferred_alignment) - offset;
5062 else
5063 frame->padding2 = 0;
5065 offset += frame->padding2;
5067 /* We've reached end of stack frame. */
5068 frame->stack_pointer_offset = offset;
5070 /* Size prologue needs to allocate. */
5071 frame->to_allocate =
5072 (size + frame->padding1 + frame->padding2
5073 + frame->outgoing_arguments_size + frame->va_arg_size);
5075 if ((!frame->to_allocate && frame->nregs <= 1)
5076 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5077 frame->save_regs_using_mov = false;
5079 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5080 && current_function_is_leaf)
5082 frame->red_zone_size = frame->to_allocate;
5083 if (frame->save_regs_using_mov)
5084 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5085 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5086 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5088 else
5089 frame->red_zone_size = 0;
5090 frame->to_allocate -= frame->red_zone_size;
5091 frame->stack_pointer_offset -= frame->red_zone_size;
5092 #if 0
5093 fprintf (stderr, "nregs: %i\n", frame->nregs);
5094 fprintf (stderr, "size: %i\n", size);
5095 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5096 fprintf (stderr, "padding1: %i\n", frame->padding1);
5097 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5098 fprintf (stderr, "padding2: %i\n", frame->padding2);
5099 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5100 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5101 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5102 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5103 frame->hard_frame_pointer_offset);
5104 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5105 #endif
5108 /* Emit code to save registers in the prologue. */
5110 static void
5111 ix86_emit_save_regs (void)
5113 int regno;
5114 rtx insn;
5116 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5117 if (ix86_save_reg (regno, true))
5119 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5120 RTX_FRAME_RELATED_P (insn) = 1;
5124 /* Emit code to save registers using MOV insns. First register
5125 is restored from POINTER + OFFSET. */
5126 static void
5127 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5129 int regno;
5130 rtx insn;
5132 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5133 if (ix86_save_reg (regno, true))
5135 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5136 Pmode, offset),
5137 gen_rtx_REG (Pmode, regno));
5138 RTX_FRAME_RELATED_P (insn) = 1;
5139 offset += UNITS_PER_WORD;
5143 /* Expand prologue or epilogue stack adjustment.
5144 The pattern exist to put a dependency on all ebp-based memory accesses.
5145 STYLE should be negative if instructions should be marked as frame related,
5146 zero if %r11 register is live and cannot be freely used and positive
5147 otherwise. */
5149 static void
5150 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5152 rtx insn;
5154 if (! TARGET_64BIT)
5155 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5156 else if (x86_64_immediate_operand (offset, DImode))
5157 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5158 else
5160 rtx r11;
5161 /* r11 is used by indirect sibcall return as well, set before the
5162 epilogue and used after the epilogue. ATM indirect sibcall
5163 shouldn't be used together with huge frame sizes in one
5164 function because of the frame_size check in sibcall.c. */
5165 if (style == 0)
5166 abort ();
5167 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5168 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5169 if (style < 0)
5170 RTX_FRAME_RELATED_P (insn) = 1;
5171 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5172 offset));
5174 if (style < 0)
5175 RTX_FRAME_RELATED_P (insn) = 1;
5178 /* Expand the prologue into a bunch of separate insns. */
5180 void
5181 ix86_expand_prologue (void)
5183 rtx insn;
5184 bool pic_reg_used;
5185 struct ix86_frame frame;
5186 HOST_WIDE_INT allocate;
5188 ix86_compute_frame_layout (&frame);
5190 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5191 slower on all targets. Also sdb doesn't like it. */
5193 if (frame_pointer_needed)
5195 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5196 RTX_FRAME_RELATED_P (insn) = 1;
5198 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5199 RTX_FRAME_RELATED_P (insn) = 1;
5202 allocate = frame.to_allocate;
5204 if (!frame.save_regs_using_mov)
5205 ix86_emit_save_regs ();
5206 else
5207 allocate += frame.nregs * UNITS_PER_WORD;
5209 /* When using red zone we may start register saving before allocating
5210 the stack frame saving one cycle of the prologue. */
5211 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5212 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5213 : stack_pointer_rtx,
5214 -frame.nregs * UNITS_PER_WORD);
5216 if (allocate == 0)
5218 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5219 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5220 GEN_INT (-allocate), -1);
5221 else
5223 /* Only valid for Win32. */
5224 rtx eax = gen_rtx_REG (SImode, 0);
5225 bool eax_live = ix86_eax_live_at_start_p ();
5227 if (TARGET_64BIT)
5228 abort ();
5230 if (eax_live)
5232 emit_insn (gen_push (eax));
5233 allocate -= 4;
5236 insn = emit_move_insn (eax, GEN_INT (allocate));
5237 RTX_FRAME_RELATED_P (insn) = 1;
5239 insn = emit_insn (gen_allocate_stack_worker (eax));
5240 RTX_FRAME_RELATED_P (insn) = 1;
5242 if (eax_live)
5244 rtx t = plus_constant (stack_pointer_rtx, allocate);
5245 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5249 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5251 if (!frame_pointer_needed || !frame.to_allocate)
5252 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5253 else
5254 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5255 -frame.nregs * UNITS_PER_WORD);
5258 pic_reg_used = false;
5259 if (pic_offset_table_rtx
5260 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5261 || current_function_profile))
5263 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5265 if (alt_pic_reg_used != INVALID_REGNUM)
5266 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5268 pic_reg_used = true;
5271 if (pic_reg_used)
5273 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5275 /* Even with accurate pre-reload life analysis, we can wind up
5276 deleting all references to the pic register after reload.
5277 Consider if cross-jumping unifies two sides of a branch
5278 controlled by a comparison vs the only read from a global.
5279 In which case, allow the set_got to be deleted, though we're
5280 too late to do anything about the ebx save in the prologue. */
5281 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5284 /* Prevent function calls from be scheduled before the call to mcount.
5285 In the pic_reg_used case, make sure that the got load isn't deleted. */
5286 if (current_function_profile)
5287 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5290 /* Emit code to restore saved registers using MOV insns. First register
5291 is restored from POINTER + OFFSET. */
5292 static void
5293 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5294 int maybe_eh_return)
5296 int regno;
5297 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5299 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5300 if (ix86_save_reg (regno, maybe_eh_return))
5302 /* Ensure that adjust_address won't be forced to produce pointer
5303 out of range allowed by x86-64 instruction set. */
5304 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5306 rtx r11;
5308 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5309 emit_move_insn (r11, GEN_INT (offset));
5310 emit_insn (gen_adddi3 (r11, r11, pointer));
5311 base_address = gen_rtx_MEM (Pmode, r11);
5312 offset = 0;
5314 emit_move_insn (gen_rtx_REG (Pmode, regno),
5315 adjust_address (base_address, Pmode, offset));
5316 offset += UNITS_PER_WORD;
5320 /* Restore function stack, frame, and registers. */
5322 void
5323 ix86_expand_epilogue (int style)
5325 int regno;
5326 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5327 struct ix86_frame frame;
5328 HOST_WIDE_INT offset;
5330 ix86_compute_frame_layout (&frame);
5332 /* Calculate start of saved registers relative to ebp. Special care
5333 must be taken for the normal return case of a function using
5334 eh_return: the eax and edx registers are marked as saved, but not
5335 restored along this path. */
5336 offset = frame.nregs;
5337 if (current_function_calls_eh_return && style != 2)
5338 offset -= 2;
5339 offset *= -UNITS_PER_WORD;
5341 /* If we're only restoring one register and sp is not valid then
5342 using a move instruction to restore the register since it's
5343 less work than reloading sp and popping the register.
5345 The default code result in stack adjustment using add/lea instruction,
5346 while this code results in LEAVE instruction (or discrete equivalent),
5347 so it is profitable in some other cases as well. Especially when there
5348 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5349 and there is exactly one register to pop. This heuristic may need some
5350 tuning in future. */
5351 if ((!sp_valid && frame.nregs <= 1)
5352 || (TARGET_EPILOGUE_USING_MOVE
5353 && cfun->machine->use_fast_prologue_epilogue
5354 && (frame.nregs > 1 || frame.to_allocate))
5355 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5356 || (frame_pointer_needed && TARGET_USE_LEAVE
5357 && cfun->machine->use_fast_prologue_epilogue
5358 && frame.nregs == 1)
5359 || current_function_calls_eh_return)
5361 /* Restore registers. We can use ebp or esp to address the memory
5362 locations. If both are available, default to ebp, since offsets
5363 are known to be small. Only exception is esp pointing directly to the
5364 end of block of saved registers, where we may simplify addressing
5365 mode. */
5367 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5368 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5369 frame.to_allocate, style == 2);
5370 else
5371 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5372 offset, style == 2);
5374 /* eh_return epilogues need %ecx added to the stack pointer. */
5375 if (style == 2)
5377 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5379 if (frame_pointer_needed)
5381 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5382 tmp = plus_constant (tmp, UNITS_PER_WORD);
5383 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5385 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5386 emit_move_insn (hard_frame_pointer_rtx, tmp);
5388 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5389 const0_rtx, style);
5391 else
5393 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5394 tmp = plus_constant (tmp, (frame.to_allocate
5395 + frame.nregs * UNITS_PER_WORD));
5396 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5399 else if (!frame_pointer_needed)
5400 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5401 GEN_INT (frame.to_allocate
5402 + frame.nregs * UNITS_PER_WORD),
5403 style);
5404 /* If not an i386, mov & pop is faster than "leave". */
5405 else if (TARGET_USE_LEAVE || optimize_size
5406 || !cfun->machine->use_fast_prologue_epilogue)
5407 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5408 else
5410 pro_epilogue_adjust_stack (stack_pointer_rtx,
5411 hard_frame_pointer_rtx,
5412 const0_rtx, style);
5413 if (TARGET_64BIT)
5414 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5415 else
5416 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5419 else
5421 /* First step is to deallocate the stack frame so that we can
5422 pop the registers. */
5423 if (!sp_valid)
5425 if (!frame_pointer_needed)
5426 abort ();
5427 pro_epilogue_adjust_stack (stack_pointer_rtx,
5428 hard_frame_pointer_rtx,
5429 GEN_INT (offset), style);
5431 else if (frame.to_allocate)
5432 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5433 GEN_INT (frame.to_allocate), style);
5435 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5436 if (ix86_save_reg (regno, false))
5438 if (TARGET_64BIT)
5439 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5440 else
5441 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5443 if (frame_pointer_needed)
5445 /* Leave results in shorter dependency chains on CPUs that are
5446 able to grok it fast. */
5447 if (TARGET_USE_LEAVE)
5448 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5449 else if (TARGET_64BIT)
5450 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5451 else
5452 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5456 /* Sibcall epilogues don't want a return instruction. */
5457 if (style == 0)
5458 return;
5460 if (current_function_pops_args && current_function_args_size)
5462 rtx popc = GEN_INT (current_function_pops_args);
5464 /* i386 can only pop 64K bytes. If asked to pop more, pop
5465 return address, do explicit add, and jump indirectly to the
5466 caller. */
5468 if (current_function_pops_args >= 65536)
5470 rtx ecx = gen_rtx_REG (SImode, 2);
5472 /* There is no "pascal" calling convention in 64bit ABI. */
5473 if (TARGET_64BIT)
5474 abort ();
5476 emit_insn (gen_popsi1 (ecx));
5477 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5478 emit_jump_insn (gen_return_indirect_internal (ecx));
5480 else
5481 emit_jump_insn (gen_return_pop_internal (popc));
5483 else
5484 emit_jump_insn (gen_return_internal ());
5487 /* Reset from the function's potential modifications. */
5489 static void
5490 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5491 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5493 if (pic_offset_table_rtx)
5494 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5497 /* Extract the parts of an RTL expression that is a valid memory address
5498 for an instruction. Return 0 if the structure of the address is
5499 grossly off. Return -1 if the address contains ASHIFT, so it is not
5500 strictly valid, but still used for computing length of lea instruction. */
5502 static int
5503 ix86_decompose_address (rtx addr, struct ix86_address *out)
5505 rtx base = NULL_RTX;
5506 rtx index = NULL_RTX;
5507 rtx disp = NULL_RTX;
5508 HOST_WIDE_INT scale = 1;
5509 rtx scale_rtx = NULL_RTX;
5510 int retval = 1;
5511 enum ix86_address_seg seg = SEG_DEFAULT;
5513 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5514 base = addr;
5515 else if (GET_CODE (addr) == PLUS)
5517 rtx addends[4], op;
5518 int n = 0, i;
5520 op = addr;
5523 if (n >= 4)
5524 return 0;
5525 addends[n++] = XEXP (op, 1);
5526 op = XEXP (op, 0);
5528 while (GET_CODE (op) == PLUS);
5529 if (n >= 4)
5530 return 0;
5531 addends[n] = op;
5533 for (i = n; i >= 0; --i)
5535 op = addends[i];
5536 switch (GET_CODE (op))
5538 case MULT:
5539 if (index)
5540 return 0;
5541 index = XEXP (op, 0);
5542 scale_rtx = XEXP (op, 1);
5543 break;
5545 case UNSPEC:
5546 if (XINT (op, 1) == UNSPEC_TP
5547 && TARGET_TLS_DIRECT_SEG_REFS
5548 && seg == SEG_DEFAULT)
5549 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5550 else
5551 return 0;
5552 break;
5554 case REG:
5555 case SUBREG:
5556 if (!base)
5557 base = op;
5558 else if (!index)
5559 index = op;
5560 else
5561 return 0;
5562 break;
5564 case CONST:
5565 case CONST_INT:
5566 case SYMBOL_REF:
5567 case LABEL_REF:
5568 if (disp)
5569 return 0;
5570 disp = op;
5571 break;
5573 default:
5574 return 0;
5578 else if (GET_CODE (addr) == MULT)
5580 index = XEXP (addr, 0); /* index*scale */
5581 scale_rtx = XEXP (addr, 1);
5583 else if (GET_CODE (addr) == ASHIFT)
5585 rtx tmp;
5587 /* We're called for lea too, which implements ashift on occasion. */
5588 index = XEXP (addr, 0);
5589 tmp = XEXP (addr, 1);
5590 if (GET_CODE (tmp) != CONST_INT)
5591 return 0;
5592 scale = INTVAL (tmp);
5593 if ((unsigned HOST_WIDE_INT) scale > 3)
5594 return 0;
5595 scale = 1 << scale;
5596 retval = -1;
5598 else
5599 disp = addr; /* displacement */
5601 /* Extract the integral value of scale. */
5602 if (scale_rtx)
5604 if (GET_CODE (scale_rtx) != CONST_INT)
5605 return 0;
5606 scale = INTVAL (scale_rtx);
5609 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5610 if (base && index && scale == 1
5611 && (index == arg_pointer_rtx
5612 || index == frame_pointer_rtx
5613 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5615 rtx tmp = base;
5616 base = index;
5617 index = tmp;
5620 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5621 if ((base == hard_frame_pointer_rtx
5622 || base == frame_pointer_rtx
5623 || base == arg_pointer_rtx) && !disp)
5624 disp = const0_rtx;
5626 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5627 Avoid this by transforming to [%esi+0]. */
5628 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5629 && base && !index && !disp
5630 && REG_P (base)
5631 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5632 disp = const0_rtx;
5634 /* Special case: encode reg+reg instead of reg*2. */
5635 if (!base && index && scale && scale == 2)
5636 base = index, scale = 1;
5638 /* Special case: scaling cannot be encoded without base or displacement. */
5639 if (!base && !disp && index && scale != 1)
5640 disp = const0_rtx;
5642 out->base = base;
5643 out->index = index;
5644 out->disp = disp;
5645 out->scale = scale;
5646 out->seg = seg;
5648 return retval;
5651 /* Return cost of the memory address x.
5652 For i386, it is better to use a complex address than let gcc copy
5653 the address into a reg and make a new pseudo. But not if the address
5654 requires to two regs - that would mean more pseudos with longer
5655 lifetimes. */
5656 static int
5657 ix86_address_cost (rtx x)
5659 struct ix86_address parts;
5660 int cost = 1;
5662 if (!ix86_decompose_address (x, &parts))
5663 abort ();
5665 /* More complex memory references are better. */
5666 if (parts.disp && parts.disp != const0_rtx)
5667 cost--;
5668 if (parts.seg != SEG_DEFAULT)
5669 cost--;
5671 /* Attempt to minimize number of registers in the address. */
5672 if ((parts.base
5673 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5674 || (parts.index
5675 && (!REG_P (parts.index)
5676 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5677 cost++;
5679 if (parts.base
5680 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5681 && parts.index
5682 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5683 && parts.base != parts.index)
5684 cost++;
5686 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5687 since it's predecode logic can't detect the length of instructions
5688 and it degenerates to vector decoded. Increase cost of such
5689 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5690 to split such addresses or even refuse such addresses at all.
5692 Following addressing modes are affected:
5693 [base+scale*index]
5694 [scale*index+disp]
5695 [base+index]
5697 The first and last case may be avoidable by explicitly coding the zero in
5698 memory address, but I don't have AMD-K6 machine handy to check this
5699 theory. */
5701 if (TARGET_K6
5702 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5703 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5704 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5705 cost += 10;
5707 return cost;
5710 /* If X is a machine specific address (i.e. a symbol or label being
5711 referenced as a displacement from the GOT implemented using an
5712 UNSPEC), then return the base term. Otherwise return X. */
5715 ix86_find_base_term (rtx x)
5717 rtx term;
5719 if (TARGET_64BIT)
5721 if (GET_CODE (x) != CONST)
5722 return x;
5723 term = XEXP (x, 0);
5724 if (GET_CODE (term) == PLUS
5725 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5726 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5727 term = XEXP (term, 0);
5728 if (GET_CODE (term) != UNSPEC
5729 || XINT (term, 1) != UNSPEC_GOTPCREL)
5730 return x;
5732 term = XVECEXP (term, 0, 0);
5734 if (GET_CODE (term) != SYMBOL_REF
5735 && GET_CODE (term) != LABEL_REF)
5736 return x;
5738 return term;
5741 term = ix86_delegitimize_address (x);
5743 if (GET_CODE (term) != SYMBOL_REF
5744 && GET_CODE (term) != LABEL_REF)
5745 return x;
5747 return term;
5750 /* Determine if a given RTX is a valid constant. We already know this
5751 satisfies CONSTANT_P. */
5753 bool
5754 legitimate_constant_p (rtx x)
5756 rtx inner;
5758 switch (GET_CODE (x))
5760 case SYMBOL_REF:
5761 /* TLS symbols are not constant. */
5762 if (tls_symbolic_operand (x, Pmode))
5763 return false;
5764 break;
5766 case CONST:
5767 inner = XEXP (x, 0);
5769 /* Offsets of TLS symbols are never valid.
5770 Discourage CSE from creating them. */
5771 if (GET_CODE (inner) == PLUS
5772 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5773 return false;
5775 if (GET_CODE (inner) == PLUS)
5777 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5778 return false;
5779 inner = XEXP (inner, 0);
5782 /* Only some unspecs are valid as "constants". */
5783 if (GET_CODE (inner) == UNSPEC)
5784 switch (XINT (inner, 1))
5786 case UNSPEC_TPOFF:
5787 case UNSPEC_NTPOFF:
5788 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5789 case UNSPEC_DTPOFF:
5790 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5791 default:
5792 return false;
5794 break;
5796 default:
5797 break;
5800 /* Otherwise we handle everything else in the move patterns. */
5801 return true;
5804 /* Determine if it's legal to put X into the constant pool. This
5805 is not possible for the address of thread-local symbols, which
5806 is checked above. */
5808 static bool
5809 ix86_cannot_force_const_mem (rtx x)
5811 return !legitimate_constant_p (x);
5814 /* Determine if a given RTX is a valid constant address. */
5816 bool
5817 constant_address_p (rtx x)
5819 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5822 /* Nonzero if the constant value X is a legitimate general operand
5823 when generating PIC code. It is given that flag_pic is on and
5824 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5826 bool
5827 legitimate_pic_operand_p (rtx x)
5829 rtx inner;
5831 switch (GET_CODE (x))
5833 case CONST:
5834 inner = XEXP (x, 0);
5836 /* Only some unspecs are valid as "constants". */
5837 if (GET_CODE (inner) == UNSPEC)
5838 switch (XINT (inner, 1))
5840 case UNSPEC_TPOFF:
5841 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5842 default:
5843 return false;
5845 /* FALLTHRU */
5847 case SYMBOL_REF:
5848 case LABEL_REF:
5849 return legitimate_pic_address_disp_p (x);
5851 default:
5852 return true;
5856 /* Determine if a given CONST RTX is a valid memory displacement
5857 in PIC mode. */
5860 legitimate_pic_address_disp_p (rtx disp)
5862 bool saw_plus;
5864 /* In 64bit mode we can allow direct addresses of symbols and labels
5865 when they are not dynamic symbols. */
5866 if (TARGET_64BIT)
5868 /* TLS references should always be enclosed in UNSPEC. */
5869 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5870 return 0;
5871 if (GET_CODE (disp) == SYMBOL_REF
5872 && ix86_cmodel == CM_SMALL_PIC
5873 && SYMBOL_REF_LOCAL_P (disp))
5874 return 1;
5875 if (GET_CODE (disp) == LABEL_REF)
5876 return 1;
5877 if (GET_CODE (disp) == CONST
5878 && GET_CODE (XEXP (disp, 0)) == PLUS)
5880 rtx op0 = XEXP (XEXP (disp, 0), 0);
5881 rtx op1 = XEXP (XEXP (disp, 0), 1);
5883 /* TLS references should always be enclosed in UNSPEC. */
5884 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5885 return 0;
5886 if (((GET_CODE (op0) == SYMBOL_REF
5887 && ix86_cmodel == CM_SMALL_PIC
5888 && SYMBOL_REF_LOCAL_P (op0))
5889 || GET_CODE (op0) == LABEL_REF)
5890 && GET_CODE (op1) == CONST_INT
5891 && INTVAL (op1) < 16*1024*1024
5892 && INTVAL (op1) >= -16*1024*1024)
5893 return 1;
5896 if (GET_CODE (disp) != CONST)
5897 return 0;
5898 disp = XEXP (disp, 0);
5900 if (TARGET_64BIT)
5902 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5903 of GOT tables. We should not need these anyway. */
5904 if (GET_CODE (disp) != UNSPEC
5905 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5906 return 0;
5908 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5909 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5910 return 0;
5911 return 1;
5914 saw_plus = false;
5915 if (GET_CODE (disp) == PLUS)
5917 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5918 return 0;
5919 disp = XEXP (disp, 0);
5920 saw_plus = true;
5923 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5924 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5926 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5927 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5928 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5930 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5931 if (! strcmp (sym_name, "<pic base>"))
5932 return 1;
5936 if (GET_CODE (disp) != UNSPEC)
5937 return 0;
5939 switch (XINT (disp, 1))
5941 case UNSPEC_GOT:
5942 if (saw_plus)
5943 return false;
5944 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5945 case UNSPEC_GOTOFF:
5946 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5947 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5948 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5949 return false;
5950 case UNSPEC_GOTTPOFF:
5951 case UNSPEC_GOTNTPOFF:
5952 case UNSPEC_INDNTPOFF:
5953 if (saw_plus)
5954 return false;
5955 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5956 case UNSPEC_NTPOFF:
5957 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5958 case UNSPEC_DTPOFF:
5959 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5962 return 0;
5965 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5966 memory address for an instruction. The MODE argument is the machine mode
5967 for the MEM expression that wants to use this address.
5969 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5970 convert common non-canonical forms to canonical form so that they will
5971 be recognized. */
5974 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5976 struct ix86_address parts;
5977 rtx base, index, disp;
5978 HOST_WIDE_INT scale;
5979 const char *reason = NULL;
5980 rtx reason_rtx = NULL_RTX;
5982 if (TARGET_DEBUG_ADDR)
5984 fprintf (stderr,
5985 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5986 GET_MODE_NAME (mode), strict);
5987 debug_rtx (addr);
5990 if (ix86_decompose_address (addr, &parts) <= 0)
5992 reason = "decomposition failed";
5993 goto report_error;
5996 base = parts.base;
5997 index = parts.index;
5998 disp = parts.disp;
5999 scale = parts.scale;
6001 /* Validate base register.
6003 Don't allow SUBREG's here, it can lead to spill failures when the base
6004 is one word out of a two word structure, which is represented internally
6005 as a DImode int. */
6007 if (base)
6009 reason_rtx = base;
6011 if (GET_CODE (base) != REG)
6013 reason = "base is not a register";
6014 goto report_error;
6017 if (GET_MODE (base) != Pmode)
6019 reason = "base is not in Pmode";
6020 goto report_error;
6023 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6024 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6026 reason = "base is not valid";
6027 goto report_error;
6031 /* Validate index register.
6033 Don't allow SUBREG's here, it can lead to spill failures when the index
6034 is one word out of a two word structure, which is represented internally
6035 as a DImode int. */
6037 if (index)
6039 reason_rtx = index;
6041 if (GET_CODE (index) != REG)
6043 reason = "index is not a register";
6044 goto report_error;
6047 if (GET_MODE (index) != Pmode)
6049 reason = "index is not in Pmode";
6050 goto report_error;
6053 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6054 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6056 reason = "index is not valid";
6057 goto report_error;
6061 /* Validate scale factor. */
6062 if (scale != 1)
6064 reason_rtx = GEN_INT (scale);
6065 if (!index)
6067 reason = "scale without index";
6068 goto report_error;
6071 if (scale != 2 && scale != 4 && scale != 8)
6073 reason = "scale is not a valid multiplier";
6074 goto report_error;
6078 /* Validate displacement. */
6079 if (disp)
6081 reason_rtx = disp;
6083 if (GET_CODE (disp) == CONST
6084 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6085 switch (XINT (XEXP (disp, 0), 1))
6087 case UNSPEC_GOT:
6088 case UNSPEC_GOTOFF:
6089 case UNSPEC_GOTPCREL:
6090 if (!flag_pic)
6091 abort ();
6092 goto is_legitimate_pic;
6094 case UNSPEC_GOTTPOFF:
6095 case UNSPEC_GOTNTPOFF:
6096 case UNSPEC_INDNTPOFF:
6097 case UNSPEC_NTPOFF:
6098 case UNSPEC_DTPOFF:
6099 break;
6101 default:
6102 reason = "invalid address unspec";
6103 goto report_error;
6106 else if (flag_pic && (SYMBOLIC_CONST (disp)
6107 #if TARGET_MACHO
6108 && !machopic_operand_p (disp)
6109 #endif
6112 is_legitimate_pic:
6113 if (TARGET_64BIT && (index || base))
6115 /* foo@dtpoff(%rX) is ok. */
6116 if (GET_CODE (disp) != CONST
6117 || GET_CODE (XEXP (disp, 0)) != PLUS
6118 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6119 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6120 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6121 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6123 reason = "non-constant pic memory reference";
6124 goto report_error;
6127 else if (! legitimate_pic_address_disp_p (disp))
6129 reason = "displacement is an invalid pic construct";
6130 goto report_error;
6133 /* This code used to verify that a symbolic pic displacement
6134 includes the pic_offset_table_rtx register.
6136 While this is good idea, unfortunately these constructs may
6137 be created by "adds using lea" optimization for incorrect
6138 code like:
6140 int a;
6141 int foo(int i)
6143 return *(&a+i);
6146 This code is nonsensical, but results in addressing
6147 GOT table with pic_offset_table_rtx base. We can't
6148 just refuse it easily, since it gets matched by
6149 "addsi3" pattern, that later gets split to lea in the
6150 case output register differs from input. While this
6151 can be handled by separate addsi pattern for this case
6152 that never results in lea, this seems to be easier and
6153 correct fix for crash to disable this test. */
6155 else if (GET_CODE (disp) != LABEL_REF
6156 && GET_CODE (disp) != CONST_INT
6157 && (GET_CODE (disp) != CONST
6158 || !legitimate_constant_p (disp))
6159 && (GET_CODE (disp) != SYMBOL_REF
6160 || !legitimate_constant_p (disp)))
6162 reason = "displacement is not constant";
6163 goto report_error;
6165 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6167 reason = "displacement is out of range";
6168 goto report_error;
6172 /* Everything looks valid. */
6173 if (TARGET_DEBUG_ADDR)
6174 fprintf (stderr, "Success.\n");
6175 return TRUE;
6177 report_error:
6178 if (TARGET_DEBUG_ADDR)
6180 fprintf (stderr, "Error: %s\n", reason);
6181 debug_rtx (reason_rtx);
6183 return FALSE;
6186 /* Return an unique alias set for the GOT. */
6188 static HOST_WIDE_INT
6189 ix86_GOT_alias_set (void)
6191 static HOST_WIDE_INT set = -1;
6192 if (set == -1)
6193 set = new_alias_set ();
6194 return set;
6197 /* Return a legitimate reference for ORIG (an address) using the
6198 register REG. If REG is 0, a new pseudo is generated.
6200 There are two types of references that must be handled:
6202 1. Global data references must load the address from the GOT, via
6203 the PIC reg. An insn is emitted to do this load, and the reg is
6204 returned.
6206 2. Static data references, constant pool addresses, and code labels
6207 compute the address as an offset from the GOT, whose base is in
6208 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6209 differentiate them from global data objects. The returned
6210 address is the PIC reg + an unspec constant.
6212 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6213 reg also appears in the address. */
6216 legitimize_pic_address (rtx orig, rtx reg)
6218 rtx addr = orig;
6219 rtx new = orig;
6220 rtx base;
6222 #if TARGET_MACHO
6223 if (reg == 0)
6224 reg = gen_reg_rtx (Pmode);
6225 /* Use the generic Mach-O PIC machinery. */
6226 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6227 #endif
6229 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6230 new = addr;
6231 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6233 /* This symbol may be referenced via a displacement from the PIC
6234 base address (@GOTOFF). */
6236 if (reload_in_progress)
6237 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6238 if (GET_CODE (addr) == CONST)
6239 addr = XEXP (addr, 0);
6240 if (GET_CODE (addr) == PLUS)
6242 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6243 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6245 else
6246 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6247 new = gen_rtx_CONST (Pmode, new);
6248 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6250 if (reg != 0)
6252 emit_move_insn (reg, new);
6253 new = reg;
6256 else if (GET_CODE (addr) == SYMBOL_REF)
6258 if (TARGET_64BIT)
6260 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6261 new = gen_rtx_CONST (Pmode, new);
6262 new = gen_rtx_MEM (Pmode, new);
6263 RTX_UNCHANGING_P (new) = 1;
6264 set_mem_alias_set (new, ix86_GOT_alias_set ());
6266 if (reg == 0)
6267 reg = gen_reg_rtx (Pmode);
6268 /* Use directly gen_movsi, otherwise the address is loaded
6269 into register for CSE. We don't want to CSE this addresses,
6270 instead we CSE addresses from the GOT table, so skip this. */
6271 emit_insn (gen_movsi (reg, new));
6272 new = reg;
6274 else
6276 /* This symbol must be referenced via a load from the
6277 Global Offset Table (@GOT). */
6279 if (reload_in_progress)
6280 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6281 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6282 new = gen_rtx_CONST (Pmode, new);
6283 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6284 new = gen_rtx_MEM (Pmode, new);
6285 RTX_UNCHANGING_P (new) = 1;
6286 set_mem_alias_set (new, ix86_GOT_alias_set ());
6288 if (reg == 0)
6289 reg = gen_reg_rtx (Pmode);
6290 emit_move_insn (reg, new);
6291 new = reg;
6294 else
6296 if (GET_CODE (addr) == CONST)
6298 addr = XEXP (addr, 0);
6300 /* We must match stuff we generate before. Assume the only
6301 unspecs that can get here are ours. Not that we could do
6302 anything with them anyway.... */
6303 if (GET_CODE (addr) == UNSPEC
6304 || (GET_CODE (addr) == PLUS
6305 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6306 return orig;
6307 if (GET_CODE (addr) != PLUS)
6308 abort ();
6310 if (GET_CODE (addr) == PLUS)
6312 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6314 /* Check first to see if this is a constant offset from a @GOTOFF
6315 symbol reference. */
6316 if (local_symbolic_operand (op0, Pmode)
6317 && GET_CODE (op1) == CONST_INT)
6319 if (!TARGET_64BIT)
6321 if (reload_in_progress)
6322 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6323 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6324 UNSPEC_GOTOFF);
6325 new = gen_rtx_PLUS (Pmode, new, op1);
6326 new = gen_rtx_CONST (Pmode, new);
6327 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6329 if (reg != 0)
6331 emit_move_insn (reg, new);
6332 new = reg;
6335 else
6337 if (INTVAL (op1) < -16*1024*1024
6338 || INTVAL (op1) >= 16*1024*1024)
6339 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6342 else
6344 base = legitimize_pic_address (XEXP (addr, 0), reg);
6345 new = legitimize_pic_address (XEXP (addr, 1),
6346 base == reg ? NULL_RTX : reg);
6348 if (GET_CODE (new) == CONST_INT)
6349 new = plus_constant (base, INTVAL (new));
6350 else
6352 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6354 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6355 new = XEXP (new, 1);
6357 new = gen_rtx_PLUS (Pmode, base, new);
6362 return new;
6365 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6367 static rtx
6368 get_thread_pointer (int to_reg)
6370 rtx tp, reg, insn;
6372 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6373 if (!to_reg)
6374 return tp;
6376 reg = gen_reg_rtx (Pmode);
6377 insn = gen_rtx_SET (VOIDmode, reg, tp);
6378 insn = emit_insn (insn);
6380 return reg;
6383 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6384 false if we expect this to be used for a memory address and true if
6385 we expect to load the address into a register. */
6387 static rtx
6388 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6390 rtx dest, base, off, pic;
6391 int type;
6393 switch (model)
6395 case TLS_MODEL_GLOBAL_DYNAMIC:
6396 dest = gen_reg_rtx (Pmode);
6397 if (TARGET_64BIT)
6399 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6401 start_sequence ();
6402 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6403 insns = get_insns ();
6404 end_sequence ();
6406 emit_libcall_block (insns, dest, rax, x);
6408 else
6409 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6410 break;
6412 case TLS_MODEL_LOCAL_DYNAMIC:
6413 base = gen_reg_rtx (Pmode);
6414 if (TARGET_64BIT)
6416 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6418 start_sequence ();
6419 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6420 insns = get_insns ();
6421 end_sequence ();
6423 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6424 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6425 emit_libcall_block (insns, base, rax, note);
6427 else
6428 emit_insn (gen_tls_local_dynamic_base_32 (base));
6430 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6431 off = gen_rtx_CONST (Pmode, off);
6433 return gen_rtx_PLUS (Pmode, base, off);
6435 case TLS_MODEL_INITIAL_EXEC:
6436 if (TARGET_64BIT)
6438 pic = NULL;
6439 type = UNSPEC_GOTNTPOFF;
6441 else if (flag_pic)
6443 if (reload_in_progress)
6444 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6445 pic = pic_offset_table_rtx;
6446 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6448 else if (!TARGET_GNU_TLS)
6450 pic = gen_reg_rtx (Pmode);
6451 emit_insn (gen_set_got (pic));
6452 type = UNSPEC_GOTTPOFF;
6454 else
6456 pic = NULL;
6457 type = UNSPEC_INDNTPOFF;
6460 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6461 off = gen_rtx_CONST (Pmode, off);
6462 if (pic)
6463 off = gen_rtx_PLUS (Pmode, pic, off);
6464 off = gen_rtx_MEM (Pmode, off);
6465 RTX_UNCHANGING_P (off) = 1;
6466 set_mem_alias_set (off, ix86_GOT_alias_set ());
6468 if (TARGET_64BIT || TARGET_GNU_TLS)
6470 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6471 off = force_reg (Pmode, off);
6472 return gen_rtx_PLUS (Pmode, base, off);
6474 else
6476 base = get_thread_pointer (true);
6477 dest = gen_reg_rtx (Pmode);
6478 emit_insn (gen_subsi3 (dest, base, off));
6480 break;
6482 case TLS_MODEL_LOCAL_EXEC:
6483 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6484 (TARGET_64BIT || TARGET_GNU_TLS)
6485 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6486 off = gen_rtx_CONST (Pmode, off);
6488 if (TARGET_64BIT || TARGET_GNU_TLS)
6490 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6491 return gen_rtx_PLUS (Pmode, base, off);
6493 else
6495 base = get_thread_pointer (true);
6496 dest = gen_reg_rtx (Pmode);
6497 emit_insn (gen_subsi3 (dest, base, off));
6499 break;
6501 default:
6502 abort ();
6505 return dest;
6508 /* Try machine-dependent ways of modifying an illegitimate address
6509 to be legitimate. If we find one, return the new, valid address.
6510 This macro is used in only one place: `memory_address' in explow.c.
6512 OLDX is the address as it was before break_out_memory_refs was called.
6513 In some cases it is useful to look at this to decide what needs to be done.
6515 MODE and WIN are passed so that this macro can use
6516 GO_IF_LEGITIMATE_ADDRESS.
6518 It is always safe for this macro to do nothing. It exists to recognize
6519 opportunities to optimize the output.
6521 For the 80386, we handle X+REG by loading X into a register R and
6522 using R+REG. R will go in a general reg and indexing will be used.
6523 However, if REG is a broken-out memory address or multiplication,
6524 nothing needs to be done because REG can certainly go in a general reg.
6526 When -fpic is used, special handling is needed for symbolic references.
6527 See comments by legitimize_pic_address in i386.c for details. */
6530 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6532 int changed = 0;
6533 unsigned log;
6535 if (TARGET_DEBUG_ADDR)
6537 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6538 GET_MODE_NAME (mode));
6539 debug_rtx (x);
6542 log = tls_symbolic_operand (x, mode);
6543 if (log)
6544 return legitimize_tls_address (x, log, false);
6546 if (flag_pic && SYMBOLIC_CONST (x))
6547 return legitimize_pic_address (x, 0);
6549 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6550 if (GET_CODE (x) == ASHIFT
6551 && GET_CODE (XEXP (x, 1)) == CONST_INT
6552 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6554 changed = 1;
6555 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6556 GEN_INT (1 << log));
6559 if (GET_CODE (x) == PLUS)
6561 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6563 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6564 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6565 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6567 changed = 1;
6568 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6569 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6570 GEN_INT (1 << log));
6573 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6574 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6575 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6577 changed = 1;
6578 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6579 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6580 GEN_INT (1 << log));
6583 /* Put multiply first if it isn't already. */
6584 if (GET_CODE (XEXP (x, 1)) == MULT)
6586 rtx tmp = XEXP (x, 0);
6587 XEXP (x, 0) = XEXP (x, 1);
6588 XEXP (x, 1) = tmp;
6589 changed = 1;
6592 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6593 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6594 created by virtual register instantiation, register elimination, and
6595 similar optimizations. */
6596 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6598 changed = 1;
6599 x = gen_rtx_PLUS (Pmode,
6600 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6601 XEXP (XEXP (x, 1), 0)),
6602 XEXP (XEXP (x, 1), 1));
6605 /* Canonicalize
6606 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6607 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6608 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6610 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6611 && CONSTANT_P (XEXP (x, 1)))
6613 rtx constant;
6614 rtx other = NULL_RTX;
6616 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6618 constant = XEXP (x, 1);
6619 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6621 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6623 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6624 other = XEXP (x, 1);
6626 else
6627 constant = 0;
6629 if (constant)
6631 changed = 1;
6632 x = gen_rtx_PLUS (Pmode,
6633 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6634 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6635 plus_constant (other, INTVAL (constant)));
6639 if (changed && legitimate_address_p (mode, x, FALSE))
6640 return x;
6642 if (GET_CODE (XEXP (x, 0)) == MULT)
6644 changed = 1;
6645 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6648 if (GET_CODE (XEXP (x, 1)) == MULT)
6650 changed = 1;
6651 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6654 if (changed
6655 && GET_CODE (XEXP (x, 1)) == REG
6656 && GET_CODE (XEXP (x, 0)) == REG)
6657 return x;
6659 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6661 changed = 1;
6662 x = legitimize_pic_address (x, 0);
6665 if (changed && legitimate_address_p (mode, x, FALSE))
6666 return x;
6668 if (GET_CODE (XEXP (x, 0)) == REG)
6670 rtx temp = gen_reg_rtx (Pmode);
6671 rtx val = force_operand (XEXP (x, 1), temp);
6672 if (val != temp)
6673 emit_move_insn (temp, val);
6675 XEXP (x, 1) = temp;
6676 return x;
6679 else if (GET_CODE (XEXP (x, 1)) == REG)
6681 rtx temp = gen_reg_rtx (Pmode);
6682 rtx val = force_operand (XEXP (x, 0), temp);
6683 if (val != temp)
6684 emit_move_insn (temp, val);
6686 XEXP (x, 0) = temp;
6687 return x;
6691 return x;
6694 /* Print an integer constant expression in assembler syntax. Addition
6695 and subtraction are the only arithmetic that may appear in these
6696 expressions. FILE is the stdio stream to write to, X is the rtx, and
6697 CODE is the operand print code from the output string. */
6699 static void
6700 output_pic_addr_const (FILE *file, rtx x, int code)
6702 char buf[256];
6704 switch (GET_CODE (x))
6706 case PC:
6707 if (flag_pic)
6708 putc ('.', file);
6709 else
6710 abort ();
6711 break;
6713 case SYMBOL_REF:
6714 assemble_name (file, XSTR (x, 0));
6715 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6716 fputs ("@PLT", file);
6717 break;
6719 case LABEL_REF:
6720 x = XEXP (x, 0);
6721 /* FALLTHRU */
6722 case CODE_LABEL:
6723 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6724 assemble_name (asm_out_file, buf);
6725 break;
6727 case CONST_INT:
6728 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6729 break;
6731 case CONST:
6732 /* This used to output parentheses around the expression,
6733 but that does not work on the 386 (either ATT or BSD assembler). */
6734 output_pic_addr_const (file, XEXP (x, 0), code);
6735 break;
6737 case CONST_DOUBLE:
6738 if (GET_MODE (x) == VOIDmode)
6740 /* We can use %d if the number is <32 bits and positive. */
6741 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6742 fprintf (file, "0x%lx%08lx",
6743 (unsigned long) CONST_DOUBLE_HIGH (x),
6744 (unsigned long) CONST_DOUBLE_LOW (x));
6745 else
6746 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6748 else
6749 /* We can't handle floating point constants;
6750 PRINT_OPERAND must handle them. */
6751 output_operand_lossage ("floating constant misused");
6752 break;
6754 case PLUS:
6755 /* Some assemblers need integer constants to appear first. */
6756 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6758 output_pic_addr_const (file, XEXP (x, 0), code);
6759 putc ('+', file);
6760 output_pic_addr_const (file, XEXP (x, 1), code);
6762 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6764 output_pic_addr_const (file, XEXP (x, 1), code);
6765 putc ('+', file);
6766 output_pic_addr_const (file, XEXP (x, 0), code);
6768 else
6769 abort ();
6770 break;
6772 case MINUS:
6773 if (!TARGET_MACHO)
6774 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6775 output_pic_addr_const (file, XEXP (x, 0), code);
6776 putc ('-', file);
6777 output_pic_addr_const (file, XEXP (x, 1), code);
6778 if (!TARGET_MACHO)
6779 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6780 break;
6782 case UNSPEC:
6783 if (XVECLEN (x, 0) != 1)
6784 abort ();
6785 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6786 switch (XINT (x, 1))
6788 case UNSPEC_GOT:
6789 fputs ("@GOT", file);
6790 break;
6791 case UNSPEC_GOTOFF:
6792 fputs ("@GOTOFF", file);
6793 break;
6794 case UNSPEC_GOTPCREL:
6795 fputs ("@GOTPCREL(%rip)", file);
6796 break;
6797 case UNSPEC_GOTTPOFF:
6798 /* FIXME: This might be @TPOFF in Sun ld too. */
6799 fputs ("@GOTTPOFF", file);
6800 break;
6801 case UNSPEC_TPOFF:
6802 fputs ("@TPOFF", file);
6803 break;
6804 case UNSPEC_NTPOFF:
6805 if (TARGET_64BIT)
6806 fputs ("@TPOFF", file);
6807 else
6808 fputs ("@NTPOFF", file);
6809 break;
6810 case UNSPEC_DTPOFF:
6811 fputs ("@DTPOFF", file);
6812 break;
6813 case UNSPEC_GOTNTPOFF:
6814 if (TARGET_64BIT)
6815 fputs ("@GOTTPOFF(%rip)", file);
6816 else
6817 fputs ("@GOTNTPOFF", file);
6818 break;
6819 case UNSPEC_INDNTPOFF:
6820 fputs ("@INDNTPOFF", file);
6821 break;
6822 default:
6823 output_operand_lossage ("invalid UNSPEC as operand");
6824 break;
6826 break;
6828 default:
6829 output_operand_lossage ("invalid expression as operand");
6833 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6834 We need to handle our special PIC relocations. */
6836 void
6837 i386_dwarf_output_addr_const (FILE *file, rtx x)
6839 #ifdef ASM_QUAD
6840 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6841 #else
6842 if (TARGET_64BIT)
6843 abort ();
6844 fprintf (file, "%s", ASM_LONG);
6845 #endif
6846 if (flag_pic)
6847 output_pic_addr_const (file, x, '\0');
6848 else
6849 output_addr_const (file, x);
6850 fputc ('\n', file);
6853 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6854 We need to emit DTP-relative relocations. */
6856 void
6857 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6859 fputs (ASM_LONG, file);
6860 output_addr_const (file, x);
6861 fputs ("@DTPOFF", file);
6862 switch (size)
6864 case 4:
6865 break;
6866 case 8:
6867 fputs (", 0", file);
6868 break;
6869 default:
6870 abort ();
6874 /* In the name of slightly smaller debug output, and to cater to
6875 general assembler losage, recognize PIC+GOTOFF and turn it back
6876 into a direct symbol reference. */
6878 static rtx
6879 ix86_delegitimize_address (rtx orig_x)
6881 rtx x = orig_x, y;
6883 if (GET_CODE (x) == MEM)
6884 x = XEXP (x, 0);
6886 if (TARGET_64BIT)
6888 if (GET_CODE (x) != CONST
6889 || GET_CODE (XEXP (x, 0)) != UNSPEC
6890 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6891 || GET_CODE (orig_x) != MEM)
6892 return orig_x;
6893 return XVECEXP (XEXP (x, 0), 0, 0);
6896 if (GET_CODE (x) != PLUS
6897 || GET_CODE (XEXP (x, 1)) != CONST)
6898 return orig_x;
6900 if (GET_CODE (XEXP (x, 0)) == REG
6901 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6902 /* %ebx + GOT/GOTOFF */
6903 y = NULL;
6904 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6906 /* %ebx + %reg * scale + GOT/GOTOFF */
6907 y = XEXP (x, 0);
6908 if (GET_CODE (XEXP (y, 0)) == REG
6909 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6910 y = XEXP (y, 1);
6911 else if (GET_CODE (XEXP (y, 1)) == REG
6912 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6913 y = XEXP (y, 0);
6914 else
6915 return orig_x;
6916 if (GET_CODE (y) != REG
6917 && GET_CODE (y) != MULT
6918 && GET_CODE (y) != ASHIFT)
6919 return orig_x;
6921 else
6922 return orig_x;
6924 x = XEXP (XEXP (x, 1), 0);
6925 if (GET_CODE (x) == UNSPEC
6926 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6927 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6929 if (y)
6930 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6931 return XVECEXP (x, 0, 0);
6934 if (GET_CODE (x) == PLUS
6935 && GET_CODE (XEXP (x, 0)) == UNSPEC
6936 && GET_CODE (XEXP (x, 1)) == CONST_INT
6937 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6938 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6939 && GET_CODE (orig_x) != MEM)))
6941 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6942 if (y)
6943 return gen_rtx_PLUS (Pmode, y, x);
6944 return x;
6947 return orig_x;
6950 static void
6951 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6952 int fp, FILE *file)
6954 const char *suffix;
6956 if (mode == CCFPmode || mode == CCFPUmode)
6958 enum rtx_code second_code, bypass_code;
6959 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6960 if (bypass_code != NIL || second_code != NIL)
6961 abort ();
6962 code = ix86_fp_compare_code_to_integer (code);
6963 mode = CCmode;
6965 if (reverse)
6966 code = reverse_condition (code);
6968 switch (code)
6970 case EQ:
6971 suffix = "e";
6972 break;
6973 case NE:
6974 suffix = "ne";
6975 break;
6976 case GT:
6977 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6978 abort ();
6979 suffix = "g";
6980 break;
6981 case GTU:
6982 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6983 Those same assemblers have the same but opposite losage on cmov. */
6984 if (mode != CCmode)
6985 abort ();
6986 suffix = fp ? "nbe" : "a";
6987 break;
6988 case LT:
6989 if (mode == CCNOmode || mode == CCGOCmode)
6990 suffix = "s";
6991 else if (mode == CCmode || mode == CCGCmode)
6992 suffix = "l";
6993 else
6994 abort ();
6995 break;
6996 case LTU:
6997 if (mode != CCmode)
6998 abort ();
6999 suffix = "b";
7000 break;
7001 case GE:
7002 if (mode == CCNOmode || mode == CCGOCmode)
7003 suffix = "ns";
7004 else if (mode == CCmode || mode == CCGCmode)
7005 suffix = "ge";
7006 else
7007 abort ();
7008 break;
7009 case GEU:
7010 /* ??? As above. */
7011 if (mode != CCmode)
7012 abort ();
7013 suffix = fp ? "nb" : "ae";
7014 break;
7015 case LE:
7016 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7017 abort ();
7018 suffix = "le";
7019 break;
7020 case LEU:
7021 if (mode != CCmode)
7022 abort ();
7023 suffix = "be";
7024 break;
7025 case UNORDERED:
7026 suffix = fp ? "u" : "p";
7027 break;
7028 case ORDERED:
7029 suffix = fp ? "nu" : "np";
7030 break;
7031 default:
7032 abort ();
7034 fputs (suffix, file);
7037 /* Print the name of register X to FILE based on its machine mode and number.
7038 If CODE is 'w', pretend the mode is HImode.
7039 If CODE is 'b', pretend the mode is QImode.
7040 If CODE is 'k', pretend the mode is SImode.
7041 If CODE is 'q', pretend the mode is DImode.
7042 If CODE is 'h', pretend the reg is the `high' byte register.
7043 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7045 void
7046 print_reg (rtx x, int code, FILE *file)
7048 if (REGNO (x) == ARG_POINTER_REGNUM
7049 || REGNO (x) == FRAME_POINTER_REGNUM
7050 || REGNO (x) == FLAGS_REG
7051 || REGNO (x) == FPSR_REG)
7052 abort ();
7054 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7055 putc ('%', file);
7057 if (code == 'w' || MMX_REG_P (x))
7058 code = 2;
7059 else if (code == 'b')
7060 code = 1;
7061 else if (code == 'k')
7062 code = 4;
7063 else if (code == 'q')
7064 code = 8;
7065 else if (code == 'y')
7066 code = 3;
7067 else if (code == 'h')
7068 code = 0;
7069 else
7070 code = GET_MODE_SIZE (GET_MODE (x));
7072 /* Irritatingly, AMD extended registers use different naming convention
7073 from the normal registers. */
7074 if (REX_INT_REG_P (x))
7076 if (!TARGET_64BIT)
7077 abort ();
7078 switch (code)
7080 case 0:
7081 error ("extended registers have no high halves");
7082 break;
7083 case 1:
7084 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7085 break;
7086 case 2:
7087 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7088 break;
7089 case 4:
7090 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7091 break;
7092 case 8:
7093 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7094 break;
7095 default:
7096 error ("unsupported operand size for extended register");
7097 break;
7099 return;
7101 switch (code)
7103 case 3:
7104 if (STACK_TOP_P (x))
7106 fputs ("st(0)", file);
7107 break;
7109 /* FALLTHRU */
7110 case 8:
7111 case 4:
7112 case 12:
7113 if (! ANY_FP_REG_P (x))
7114 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7115 /* FALLTHRU */
7116 case 16:
7117 case 2:
7118 normal:
7119 fputs (hi_reg_name[REGNO (x)], file);
7120 break;
7121 case 1:
7122 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7123 goto normal;
7124 fputs (qi_reg_name[REGNO (x)], file);
7125 break;
7126 case 0:
7127 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7128 goto normal;
7129 fputs (qi_high_reg_name[REGNO (x)], file);
7130 break;
7131 default:
7132 abort ();
7136 /* Locate some local-dynamic symbol still in use by this function
7137 so that we can print its name in some tls_local_dynamic_base
7138 pattern. */
7140 static const char *
7141 get_some_local_dynamic_name (void)
7143 rtx insn;
7145 if (cfun->machine->some_ld_name)
7146 return cfun->machine->some_ld_name;
7148 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7149 if (INSN_P (insn)
7150 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7151 return cfun->machine->some_ld_name;
7153 abort ();
7156 static int
7157 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7159 rtx x = *px;
7161 if (GET_CODE (x) == SYMBOL_REF
7162 && local_dynamic_symbolic_operand (x, Pmode))
7164 cfun->machine->some_ld_name = XSTR (x, 0);
7165 return 1;
7168 return 0;
7171 /* Meaning of CODE:
7172 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7173 C -- print opcode suffix for set/cmov insn.
7174 c -- like C, but print reversed condition
7175 F,f -- likewise, but for floating-point.
7176 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7177 otherwise nothing
7178 R -- print the prefix for register names.
7179 z -- print the opcode suffix for the size of the current operand.
7180 * -- print a star (in certain assembler syntax)
7181 A -- print an absolute memory reference.
7182 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7183 s -- print a shift double count, followed by the assemblers argument
7184 delimiter.
7185 b -- print the QImode name of the register for the indicated operand.
7186 %b0 would print %al if operands[0] is reg 0.
7187 w -- likewise, print the HImode name of the register.
7188 k -- likewise, print the SImode name of the register.
7189 q -- likewise, print the DImode name of the register.
7190 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7191 y -- print "st(0)" instead of "st" as a register.
7192 D -- print condition for SSE cmp instruction.
7193 P -- if PIC, print an @PLT suffix.
7194 X -- don't print any sort of PIC '@' suffix for a symbol.
7195 & -- print some in-use local-dynamic symbol name.
7198 void
7199 print_operand (FILE *file, rtx x, int code)
7201 if (code)
7203 switch (code)
7205 case '*':
7206 if (ASSEMBLER_DIALECT == ASM_ATT)
7207 putc ('*', file);
7208 return;
7210 case '&':
7211 assemble_name (file, get_some_local_dynamic_name ());
7212 return;
7214 case 'A':
7215 if (ASSEMBLER_DIALECT == ASM_ATT)
7216 putc ('*', file);
7217 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7219 /* Intel syntax. For absolute addresses, registers should not
7220 be surrounded by braces. */
7221 if (GET_CODE (x) != REG)
7223 putc ('[', file);
7224 PRINT_OPERAND (file, x, 0);
7225 putc (']', file);
7226 return;
7229 else
7230 abort ();
7232 PRINT_OPERAND (file, x, 0);
7233 return;
7236 case 'L':
7237 if (ASSEMBLER_DIALECT == ASM_ATT)
7238 putc ('l', file);
7239 return;
7241 case 'W':
7242 if (ASSEMBLER_DIALECT == ASM_ATT)
7243 putc ('w', file);
7244 return;
7246 case 'B':
7247 if (ASSEMBLER_DIALECT == ASM_ATT)
7248 putc ('b', file);
7249 return;
7251 case 'Q':
7252 if (ASSEMBLER_DIALECT == ASM_ATT)
7253 putc ('l', file);
7254 return;
7256 case 'S':
7257 if (ASSEMBLER_DIALECT == ASM_ATT)
7258 putc ('s', file);
7259 return;
7261 case 'T':
7262 if (ASSEMBLER_DIALECT == ASM_ATT)
7263 putc ('t', file);
7264 return;
7266 case 'z':
7267 /* 387 opcodes don't get size suffixes if the operands are
7268 registers. */
7269 if (STACK_REG_P (x))
7270 return;
7272 /* Likewise if using Intel opcodes. */
7273 if (ASSEMBLER_DIALECT == ASM_INTEL)
7274 return;
7276 /* This is the size of op from size of operand. */
7277 switch (GET_MODE_SIZE (GET_MODE (x)))
7279 case 2:
7280 #ifdef HAVE_GAS_FILDS_FISTS
7281 putc ('s', file);
7282 #endif
7283 return;
7285 case 4:
7286 if (GET_MODE (x) == SFmode)
7288 putc ('s', file);
7289 return;
7291 else
7292 putc ('l', file);
7293 return;
7295 case 12:
7296 case 16:
7297 putc ('t', file);
7298 return;
7300 case 8:
7301 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7303 #ifdef GAS_MNEMONICS
7304 putc ('q', file);
7305 #else
7306 putc ('l', file);
7307 putc ('l', file);
7308 #endif
7310 else
7311 putc ('l', file);
7312 return;
7314 default:
7315 abort ();
7318 case 'b':
7319 case 'w':
7320 case 'k':
7321 case 'q':
7322 case 'h':
7323 case 'y':
7324 case 'X':
7325 case 'P':
7326 break;
7328 case 's':
7329 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7331 PRINT_OPERAND (file, x, 0);
7332 putc (',', file);
7334 return;
7336 case 'D':
7337 /* Little bit of braindamage here. The SSE compare instructions
7338 does use completely different names for the comparisons that the
7339 fp conditional moves. */
7340 switch (GET_CODE (x))
7342 case EQ:
7343 case UNEQ:
7344 fputs ("eq", file);
7345 break;
7346 case LT:
7347 case UNLT:
7348 fputs ("lt", file);
7349 break;
7350 case LE:
7351 case UNLE:
7352 fputs ("le", file);
7353 break;
7354 case UNORDERED:
7355 fputs ("unord", file);
7356 break;
7357 case NE:
7358 case LTGT:
7359 fputs ("neq", file);
7360 break;
7361 case UNGE:
7362 case GE:
7363 fputs ("nlt", file);
7364 break;
7365 case UNGT:
7366 case GT:
7367 fputs ("nle", file);
7368 break;
7369 case ORDERED:
7370 fputs ("ord", file);
7371 break;
7372 default:
7373 abort ();
7374 break;
7376 return;
7377 case 'O':
7378 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7379 if (ASSEMBLER_DIALECT == ASM_ATT)
7381 switch (GET_MODE (x))
7383 case HImode: putc ('w', file); break;
7384 case SImode:
7385 case SFmode: putc ('l', file); break;
7386 case DImode:
7387 case DFmode: putc ('q', file); break;
7388 default: abort ();
7390 putc ('.', file);
7392 #endif
7393 return;
7394 case 'C':
7395 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7396 return;
7397 case 'F':
7398 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7399 if (ASSEMBLER_DIALECT == ASM_ATT)
7400 putc ('.', file);
7401 #endif
7402 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7403 return;
7405 /* Like above, but reverse condition */
7406 case 'c':
7407 /* Check to see if argument to %c is really a constant
7408 and not a condition code which needs to be reversed. */
7409 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7411 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7412 return;
7414 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7415 return;
7416 case 'f':
7417 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7418 if (ASSEMBLER_DIALECT == ASM_ATT)
7419 putc ('.', file);
7420 #endif
7421 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7422 return;
7423 case '+':
7425 rtx x;
7427 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7428 return;
7430 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7431 if (x)
7433 int pred_val = INTVAL (XEXP (x, 0));
7435 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7436 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7438 int taken = pred_val > REG_BR_PROB_BASE / 2;
7439 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7441 /* Emit hints only in the case default branch prediction
7442 heuristics would fail. */
7443 if (taken != cputaken)
7445 /* We use 3e (DS) prefix for taken branches and
7446 2e (CS) prefix for not taken branches. */
7447 if (taken)
7448 fputs ("ds ; ", file);
7449 else
7450 fputs ("cs ; ", file);
7454 return;
7456 default:
7457 output_operand_lossage ("invalid operand code `%c'", code);
7461 if (GET_CODE (x) == REG)
7462 print_reg (x, code, file);
7464 else if (GET_CODE (x) == MEM)
7466 /* No `byte ptr' prefix for call instructions. */
7467 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7469 const char * size;
7470 switch (GET_MODE_SIZE (GET_MODE (x)))
7472 case 1: size = "BYTE"; break;
7473 case 2: size = "WORD"; break;
7474 case 4: size = "DWORD"; break;
7475 case 8: size = "QWORD"; break;
7476 case 12: size = "XWORD"; break;
7477 case 16: size = "XMMWORD"; break;
7478 default:
7479 abort ();
7482 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7483 if (code == 'b')
7484 size = "BYTE";
7485 else if (code == 'w')
7486 size = "WORD";
7487 else if (code == 'k')
7488 size = "DWORD";
7490 fputs (size, file);
7491 fputs (" PTR ", file);
7494 x = XEXP (x, 0);
7495 /* Avoid (%rip) for call operands. */
7496 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7497 && GET_CODE (x) != CONST_INT)
7498 output_addr_const (file, x);
7499 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7500 output_operand_lossage ("invalid constraints for operand");
7501 else
7502 output_address (x);
7505 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7507 REAL_VALUE_TYPE r;
7508 long l;
7510 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7511 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7513 if (ASSEMBLER_DIALECT == ASM_ATT)
7514 putc ('$', file);
7515 fprintf (file, "0x%08lx", l);
7518 /* These float cases don't actually occur as immediate operands. */
7519 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7521 char dstr[30];
7523 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7524 fprintf (file, "%s", dstr);
7527 else if (GET_CODE (x) == CONST_DOUBLE
7528 && GET_MODE (x) == XFmode)
7530 char dstr[30];
7532 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7533 fprintf (file, "%s", dstr);
7536 else
7538 if (code != 'P')
7540 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7542 if (ASSEMBLER_DIALECT == ASM_ATT)
7543 putc ('$', file);
7545 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7546 || GET_CODE (x) == LABEL_REF)
7548 if (ASSEMBLER_DIALECT == ASM_ATT)
7549 putc ('$', file);
7550 else
7551 fputs ("OFFSET FLAT:", file);
7554 if (GET_CODE (x) == CONST_INT)
7555 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7556 else if (flag_pic)
7557 output_pic_addr_const (file, x, code);
7558 else
7559 output_addr_const (file, x);
7563 /* Print a memory operand whose address is ADDR. */
7565 void
7566 print_operand_address (FILE *file, rtx addr)
7568 struct ix86_address parts;
7569 rtx base, index, disp;
7570 int scale;
7572 if (! ix86_decompose_address (addr, &parts))
7573 abort ();
7575 base = parts.base;
7576 index = parts.index;
7577 disp = parts.disp;
7578 scale = parts.scale;
7580 switch (parts.seg)
7582 case SEG_DEFAULT:
7583 break;
7584 case SEG_FS:
7585 case SEG_GS:
7586 if (USER_LABEL_PREFIX[0] == 0)
7587 putc ('%', file);
7588 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7589 break;
7590 default:
7591 abort ();
7594 if (!base && !index)
7596 /* Displacement only requires special attention. */
7598 if (GET_CODE (disp) == CONST_INT)
7600 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7602 if (USER_LABEL_PREFIX[0] == 0)
7603 putc ('%', file);
7604 fputs ("ds:", file);
7606 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7608 else if (flag_pic)
7609 output_pic_addr_const (file, disp, 0);
7610 else
7611 output_addr_const (file, disp);
7613 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7614 if (TARGET_64BIT
7615 && ((GET_CODE (disp) == SYMBOL_REF
7616 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7617 || GET_CODE (disp) == LABEL_REF
7618 || (GET_CODE (disp) == CONST
7619 && GET_CODE (XEXP (disp, 0)) == PLUS
7620 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7621 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7622 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7623 fputs ("(%rip)", file);
7625 else
7627 if (ASSEMBLER_DIALECT == ASM_ATT)
7629 if (disp)
7631 if (flag_pic)
7632 output_pic_addr_const (file, disp, 0);
7633 else if (GET_CODE (disp) == LABEL_REF)
7634 output_asm_label (disp);
7635 else
7636 output_addr_const (file, disp);
7639 putc ('(', file);
7640 if (base)
7641 print_reg (base, 0, file);
7642 if (index)
7644 putc (',', file);
7645 print_reg (index, 0, file);
7646 if (scale != 1)
7647 fprintf (file, ",%d", scale);
7649 putc (')', file);
7651 else
7653 rtx offset = NULL_RTX;
7655 if (disp)
7657 /* Pull out the offset of a symbol; print any symbol itself. */
7658 if (GET_CODE (disp) == CONST
7659 && GET_CODE (XEXP (disp, 0)) == PLUS
7660 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7662 offset = XEXP (XEXP (disp, 0), 1);
7663 disp = gen_rtx_CONST (VOIDmode,
7664 XEXP (XEXP (disp, 0), 0));
7667 if (flag_pic)
7668 output_pic_addr_const (file, disp, 0);
7669 else if (GET_CODE (disp) == LABEL_REF)
7670 output_asm_label (disp);
7671 else if (GET_CODE (disp) == CONST_INT)
7672 offset = disp;
7673 else
7674 output_addr_const (file, disp);
7677 putc ('[', file);
7678 if (base)
7680 print_reg (base, 0, file);
7681 if (offset)
7683 if (INTVAL (offset) >= 0)
7684 putc ('+', file);
7685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7688 else if (offset)
7689 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7690 else
7691 putc ('0', file);
7693 if (index)
7695 putc ('+', file);
7696 print_reg (index, 0, file);
7697 if (scale != 1)
7698 fprintf (file, "*%d", scale);
7700 putc (']', file);
7705 bool
7706 output_addr_const_extra (FILE *file, rtx x)
7708 rtx op;
7710 if (GET_CODE (x) != UNSPEC)
7711 return false;
7713 op = XVECEXP (x, 0, 0);
7714 switch (XINT (x, 1))
7716 case UNSPEC_GOTTPOFF:
7717 output_addr_const (file, op);
7718 /* FIXME: This might be @TPOFF in Sun ld. */
7719 fputs ("@GOTTPOFF", file);
7720 break;
7721 case UNSPEC_TPOFF:
7722 output_addr_const (file, op);
7723 fputs ("@TPOFF", file);
7724 break;
7725 case UNSPEC_NTPOFF:
7726 output_addr_const (file, op);
7727 if (TARGET_64BIT)
7728 fputs ("@TPOFF", file);
7729 else
7730 fputs ("@NTPOFF", file);
7731 break;
7732 case UNSPEC_DTPOFF:
7733 output_addr_const (file, op);
7734 fputs ("@DTPOFF", file);
7735 break;
7736 case UNSPEC_GOTNTPOFF:
7737 output_addr_const (file, op);
7738 if (TARGET_64BIT)
7739 fputs ("@GOTTPOFF(%rip)", file);
7740 else
7741 fputs ("@GOTNTPOFF", file);
7742 break;
7743 case UNSPEC_INDNTPOFF:
7744 output_addr_const (file, op);
7745 fputs ("@INDNTPOFF", file);
7746 break;
7748 default:
7749 return false;
7752 return true;
7755 /* Split one or more DImode RTL references into pairs of SImode
7756 references. The RTL can be REG, offsettable MEM, integer constant, or
7757 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7758 split and "num" is its length. lo_half and hi_half are output arrays
7759 that parallel "operands". */
7761 void
7762 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7764 while (num--)
7766 rtx op = operands[num];
7768 /* simplify_subreg refuse to split volatile memory addresses,
7769 but we still have to handle it. */
7770 if (GET_CODE (op) == MEM)
7772 lo_half[num] = adjust_address (op, SImode, 0);
7773 hi_half[num] = adjust_address (op, SImode, 4);
7775 else
7777 lo_half[num] = simplify_gen_subreg (SImode, op,
7778 GET_MODE (op) == VOIDmode
7779 ? DImode : GET_MODE (op), 0);
7780 hi_half[num] = simplify_gen_subreg (SImode, op,
7781 GET_MODE (op) == VOIDmode
7782 ? DImode : GET_MODE (op), 4);
7786 /* Split one or more TImode RTL references into pairs of SImode
7787 references. The RTL can be REG, offsettable MEM, integer constant, or
7788 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7789 split and "num" is its length. lo_half and hi_half are output arrays
7790 that parallel "operands". */
7792 void
7793 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7795 while (num--)
7797 rtx op = operands[num];
7799 /* simplify_subreg refuse to split volatile memory addresses, but we
7800 still have to handle it. */
7801 if (GET_CODE (op) == MEM)
7803 lo_half[num] = adjust_address (op, DImode, 0);
7804 hi_half[num] = adjust_address (op, DImode, 8);
7806 else
7808 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7809 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7814 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7815 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7816 is the expression of the binary operation. The output may either be
7817 emitted here, or returned to the caller, like all output_* functions.
7819 There is no guarantee that the operands are the same mode, as they
7820 might be within FLOAT or FLOAT_EXTEND expressions. */
7822 #ifndef SYSV386_COMPAT
7823 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7824 wants to fix the assemblers because that causes incompatibility
7825 with gcc. No-one wants to fix gcc because that causes
7826 incompatibility with assemblers... You can use the option of
7827 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7828 #define SYSV386_COMPAT 1
7829 #endif
7831 const char *
7832 output_387_binary_op (rtx insn, rtx *operands)
7834 static char buf[30];
7835 const char *p;
7836 const char *ssep;
7837 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7839 #ifdef ENABLE_CHECKING
7840 /* Even if we do not want to check the inputs, this documents input
7841 constraints. Which helps in understanding the following code. */
7842 if (STACK_REG_P (operands[0])
7843 && ((REG_P (operands[1])
7844 && REGNO (operands[0]) == REGNO (operands[1])
7845 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7846 || (REG_P (operands[2])
7847 && REGNO (operands[0]) == REGNO (operands[2])
7848 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7849 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7850 ; /* ok */
7851 else if (!is_sse)
7852 abort ();
7853 #endif
7855 switch (GET_CODE (operands[3]))
7857 case PLUS:
7858 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7859 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7860 p = "fiadd";
7861 else
7862 p = "fadd";
7863 ssep = "add";
7864 break;
7866 case MINUS:
7867 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7868 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7869 p = "fisub";
7870 else
7871 p = "fsub";
7872 ssep = "sub";
7873 break;
7875 case MULT:
7876 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7877 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7878 p = "fimul";
7879 else
7880 p = "fmul";
7881 ssep = "mul";
7882 break;
7884 case DIV:
7885 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7886 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7887 p = "fidiv";
7888 else
7889 p = "fdiv";
7890 ssep = "div";
7891 break;
7893 default:
7894 abort ();
7897 if (is_sse)
7899 strcpy (buf, ssep);
7900 if (GET_MODE (operands[0]) == SFmode)
7901 strcat (buf, "ss\t{%2, %0|%0, %2}");
7902 else
7903 strcat (buf, "sd\t{%2, %0|%0, %2}");
7904 return buf;
7906 strcpy (buf, p);
7908 switch (GET_CODE (operands[3]))
7910 case MULT:
7911 case PLUS:
7912 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7914 rtx temp = operands[2];
7915 operands[2] = operands[1];
7916 operands[1] = temp;
7919 /* know operands[0] == operands[1]. */
7921 if (GET_CODE (operands[2]) == MEM)
7923 p = "%z2\t%2";
7924 break;
7927 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7929 if (STACK_TOP_P (operands[0]))
7930 /* How is it that we are storing to a dead operand[2]?
7931 Well, presumably operands[1] is dead too. We can't
7932 store the result to st(0) as st(0) gets popped on this
7933 instruction. Instead store to operands[2] (which I
7934 think has to be st(1)). st(1) will be popped later.
7935 gcc <= 2.8.1 didn't have this check and generated
7936 assembly code that the Unixware assembler rejected. */
7937 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7938 else
7939 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7940 break;
7943 if (STACK_TOP_P (operands[0]))
7944 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7945 else
7946 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7947 break;
7949 case MINUS:
7950 case DIV:
7951 if (GET_CODE (operands[1]) == MEM)
7953 p = "r%z1\t%1";
7954 break;
7957 if (GET_CODE (operands[2]) == MEM)
7959 p = "%z2\t%2";
7960 break;
7963 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7965 #if SYSV386_COMPAT
7966 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7967 derived assemblers, confusingly reverse the direction of
7968 the operation for fsub{r} and fdiv{r} when the
7969 destination register is not st(0). The Intel assembler
7970 doesn't have this brain damage. Read !SYSV386_COMPAT to
7971 figure out what the hardware really does. */
7972 if (STACK_TOP_P (operands[0]))
7973 p = "{p\t%0, %2|rp\t%2, %0}";
7974 else
7975 p = "{rp\t%2, %0|p\t%0, %2}";
7976 #else
7977 if (STACK_TOP_P (operands[0]))
7978 /* As above for fmul/fadd, we can't store to st(0). */
7979 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7980 else
7981 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7982 #endif
7983 break;
7986 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7988 #if SYSV386_COMPAT
7989 if (STACK_TOP_P (operands[0]))
7990 p = "{rp\t%0, %1|p\t%1, %0}";
7991 else
7992 p = "{p\t%1, %0|rp\t%0, %1}";
7993 #else
7994 if (STACK_TOP_P (operands[0]))
7995 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7996 else
7997 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7998 #endif
7999 break;
8002 if (STACK_TOP_P (operands[0]))
8004 if (STACK_TOP_P (operands[1]))
8005 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8006 else
8007 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8008 break;
8010 else if (STACK_TOP_P (operands[1]))
8012 #if SYSV386_COMPAT
8013 p = "{\t%1, %0|r\t%0, %1}";
8014 #else
8015 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8016 #endif
8018 else
8020 #if SYSV386_COMPAT
8021 p = "{r\t%2, %0|\t%0, %2}";
8022 #else
8023 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8024 #endif
8026 break;
8028 default:
8029 abort ();
8032 strcat (buf, p);
8033 return buf;
8036 /* Output code to initialize control word copies used by
8037 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8038 is set to control word rounding downwards. */
8039 void
8040 emit_i387_cw_initialization (rtx normal, rtx round_down)
8042 rtx reg = gen_reg_rtx (HImode);
8044 emit_insn (gen_x86_fnstcw_1 (normal));
8045 emit_move_insn (reg, normal);
8046 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8047 && !TARGET_64BIT)
8048 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8049 else
8050 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8051 emit_move_insn (round_down, reg);
8054 /* Output code for INSN to convert a float to a signed int. OPERANDS
8055 are the insn operands. The output may be [HSD]Imode and the input
8056 operand may be [SDX]Fmode. */
8058 const char *
8059 output_fix_trunc (rtx insn, rtx *operands)
8061 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8062 int dimode_p = GET_MODE (operands[0]) == DImode;
8064 /* Jump through a hoop or two for DImode, since the hardware has no
8065 non-popping instruction. We used to do this a different way, but
8066 that was somewhat fragile and broke with post-reload splitters. */
8067 if (dimode_p && !stack_top_dies)
8068 output_asm_insn ("fld\t%y1", operands);
8070 if (!STACK_TOP_P (operands[1]))
8071 abort ();
8073 if (GET_CODE (operands[0]) != MEM)
8074 abort ();
8076 output_asm_insn ("fldcw\t%3", operands);
8077 if (stack_top_dies || dimode_p)
8078 output_asm_insn ("fistp%z0\t%0", operands);
8079 else
8080 output_asm_insn ("fist%z0\t%0", operands);
8081 output_asm_insn ("fldcw\t%2", operands);
8083 return "";
8086 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8087 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8088 when fucom should be used. */
8090 const char *
8091 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8093 int stack_top_dies;
8094 rtx cmp_op0 = operands[0];
8095 rtx cmp_op1 = operands[1];
8096 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8098 if (eflags_p == 2)
8100 cmp_op0 = cmp_op1;
8101 cmp_op1 = operands[2];
8103 if (is_sse)
8105 if (GET_MODE (operands[0]) == SFmode)
8106 if (unordered_p)
8107 return "ucomiss\t{%1, %0|%0, %1}";
8108 else
8109 return "comiss\t{%1, %0|%0, %1}";
8110 else
8111 if (unordered_p)
8112 return "ucomisd\t{%1, %0|%0, %1}";
8113 else
8114 return "comisd\t{%1, %0|%0, %1}";
8117 if (! STACK_TOP_P (cmp_op0))
8118 abort ();
8120 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8122 if (STACK_REG_P (cmp_op1)
8123 && stack_top_dies
8124 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8125 && REGNO (cmp_op1) != FIRST_STACK_REG)
8127 /* If both the top of the 387 stack dies, and the other operand
8128 is also a stack register that dies, then this must be a
8129 `fcompp' float compare */
8131 if (eflags_p == 1)
8133 /* There is no double popping fcomi variant. Fortunately,
8134 eflags is immune from the fstp's cc clobbering. */
8135 if (unordered_p)
8136 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8137 else
8138 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8139 return "fstp\t%y0";
8141 else
8143 if (eflags_p == 2)
8145 if (unordered_p)
8146 return "fucompp\n\tfnstsw\t%0";
8147 else
8148 return "fcompp\n\tfnstsw\t%0";
8150 else
8152 if (unordered_p)
8153 return "fucompp";
8154 else
8155 return "fcompp";
8159 else
8161 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8163 static const char * const alt[24] =
8165 "fcom%z1\t%y1",
8166 "fcomp%z1\t%y1",
8167 "fucom%z1\t%y1",
8168 "fucomp%z1\t%y1",
8170 "ficom%z1\t%y1",
8171 "ficomp%z1\t%y1",
8172 NULL,
8173 NULL,
8175 "fcomi\t{%y1, %0|%0, %y1}",
8176 "fcomip\t{%y1, %0|%0, %y1}",
8177 "fucomi\t{%y1, %0|%0, %y1}",
8178 "fucomip\t{%y1, %0|%0, %y1}",
8180 NULL,
8181 NULL,
8182 NULL,
8183 NULL,
8185 "fcom%z2\t%y2\n\tfnstsw\t%0",
8186 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8187 "fucom%z2\t%y2\n\tfnstsw\t%0",
8188 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8190 "ficom%z2\t%y2\n\tfnstsw\t%0",
8191 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8192 NULL,
8193 NULL
8196 int mask;
8197 const char *ret;
8199 mask = eflags_p << 3;
8200 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8201 mask |= unordered_p << 1;
8202 mask |= stack_top_dies;
8204 if (mask >= 24)
8205 abort ();
8206 ret = alt[mask];
8207 if (ret == NULL)
8208 abort ();
8210 return ret;
8214 void
8215 ix86_output_addr_vec_elt (FILE *file, int value)
8217 const char *directive = ASM_LONG;
8219 if (TARGET_64BIT)
8221 #ifdef ASM_QUAD
8222 directive = ASM_QUAD;
8223 #else
8224 abort ();
8225 #endif
8228 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8231 void
8232 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8234 if (TARGET_64BIT)
8235 fprintf (file, "%s%s%d-%s%d\n",
8236 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8237 else if (HAVE_AS_GOTOFF_IN_DATA)
8238 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8239 #if TARGET_MACHO
8240 else if (TARGET_MACHO)
8242 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8243 machopic_output_function_base_name (file);
8244 fprintf(file, "\n");
8246 #endif
8247 else
8248 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8249 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8252 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8253 for the target. */
8255 void
8256 ix86_expand_clear (rtx dest)
8258 rtx tmp;
8260 /* We play register width games, which are only valid after reload. */
8261 if (!reload_completed)
8262 abort ();
8264 /* Avoid HImode and its attendant prefix byte. */
8265 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8266 dest = gen_rtx_REG (SImode, REGNO (dest));
8268 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8270 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8271 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8273 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8274 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8277 emit_insn (tmp);
8280 /* X is an unchanging MEM. If it is a constant pool reference, return
8281 the constant pool rtx, else NULL. */
8283 static rtx
8284 maybe_get_pool_constant (rtx x)
8286 x = ix86_delegitimize_address (XEXP (x, 0));
8288 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8289 return get_pool_constant (x);
8291 return NULL_RTX;
8294 void
8295 ix86_expand_move (enum machine_mode mode, rtx operands[])
8297 int strict = (reload_in_progress || reload_completed);
8298 rtx op0, op1;
8299 enum tls_model model;
8301 op0 = operands[0];
8302 op1 = operands[1];
8304 model = tls_symbolic_operand (op1, Pmode);
8305 if (model)
8307 op1 = legitimize_tls_address (op1, model, true);
8308 op1 = force_operand (op1, op0);
8309 if (op1 == op0)
8310 return;
8313 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8315 #if TARGET_MACHO
8316 if (MACHOPIC_PURE)
8318 rtx temp = ((reload_in_progress
8319 || ((op0 && GET_CODE (op0) == REG)
8320 && mode == Pmode))
8321 ? op0 : gen_reg_rtx (Pmode));
8322 op1 = machopic_indirect_data_reference (op1, temp);
8323 op1 = machopic_legitimize_pic_address (op1, mode,
8324 temp == op1 ? 0 : temp);
8326 else if (MACHOPIC_INDIRECT)
8327 op1 = machopic_indirect_data_reference (op1, 0);
8328 if (op0 == op1)
8329 return;
8330 #else
8331 if (GET_CODE (op0) == MEM)
8332 op1 = force_reg (Pmode, op1);
8333 else
8335 rtx temp = op0;
8336 if (GET_CODE (temp) != REG)
8337 temp = gen_reg_rtx (Pmode);
8338 temp = legitimize_pic_address (op1, temp);
8339 if (temp == op0)
8340 return;
8341 op1 = temp;
8343 #endif /* TARGET_MACHO */
8345 else
8347 if (GET_CODE (op0) == MEM
8348 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8349 || !push_operand (op0, mode))
8350 && GET_CODE (op1) == MEM)
8351 op1 = force_reg (mode, op1);
8353 if (push_operand (op0, mode)
8354 && ! general_no_elim_operand (op1, mode))
8355 op1 = copy_to_mode_reg (mode, op1);
8357 /* Force large constants in 64bit compilation into register
8358 to get them CSEed. */
8359 if (TARGET_64BIT && mode == DImode
8360 && immediate_operand (op1, mode)
8361 && !x86_64_zero_extended_value (op1)
8362 && !register_operand (op0, mode)
8363 && optimize && !reload_completed && !reload_in_progress)
8364 op1 = copy_to_mode_reg (mode, op1);
8366 if (FLOAT_MODE_P (mode))
8368 /* If we are loading a floating point constant to a register,
8369 force the value to memory now, since we'll get better code
8370 out the back end. */
8372 if (strict)
8374 else if (GET_CODE (op1) == CONST_DOUBLE)
8376 op1 = validize_mem (force_const_mem (mode, op1));
8377 if (!register_operand (op0, mode))
8379 rtx temp = gen_reg_rtx (mode);
8380 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8381 emit_move_insn (op0, temp);
8382 return;
8388 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8391 void
8392 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8394 /* Force constants other than zero into memory. We do not know how
8395 the instructions used to build constants modify the upper 64 bits
8396 of the register, once we have that information we may be able
8397 to handle some of them more efficiently. */
8398 if ((reload_in_progress | reload_completed) == 0
8399 && register_operand (operands[0], mode)
8400 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8401 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8403 /* Make operand1 a register if it isn't already. */
8404 if (!no_new_pseudos
8405 && !register_operand (operands[0], mode)
8406 && !register_operand (operands[1], mode))
8408 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8409 emit_move_insn (operands[0], temp);
8410 return;
8413 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8416 /* Attempt to expand a binary operator. Make the expansion closer to the
8417 actual machine, then just general_operand, which will allow 3 separate
8418 memory references (one output, two input) in a single insn. */
8420 void
8421 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8422 rtx operands[])
8424 int matching_memory;
8425 rtx src1, src2, dst, op, clob;
8427 dst = operands[0];
8428 src1 = operands[1];
8429 src2 = operands[2];
8431 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8432 if (GET_RTX_CLASS (code) == 'c'
8433 && (rtx_equal_p (dst, src2)
8434 || immediate_operand (src1, mode)))
8436 rtx temp = src1;
8437 src1 = src2;
8438 src2 = temp;
8441 /* If the destination is memory, and we do not have matching source
8442 operands, do things in registers. */
8443 matching_memory = 0;
8444 if (GET_CODE (dst) == MEM)
8446 if (rtx_equal_p (dst, src1))
8447 matching_memory = 1;
8448 else if (GET_RTX_CLASS (code) == 'c'
8449 && rtx_equal_p (dst, src2))
8450 matching_memory = 2;
8451 else
8452 dst = gen_reg_rtx (mode);
8455 /* Both source operands cannot be in memory. */
8456 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8458 if (matching_memory != 2)
8459 src2 = force_reg (mode, src2);
8460 else
8461 src1 = force_reg (mode, src1);
8464 /* If the operation is not commutable, source 1 cannot be a constant
8465 or non-matching memory. */
8466 if ((CONSTANT_P (src1)
8467 || (!matching_memory && GET_CODE (src1) == MEM))
8468 && GET_RTX_CLASS (code) != 'c')
8469 src1 = force_reg (mode, src1);
8471 /* If optimizing, copy to regs to improve CSE */
8472 if (optimize && ! no_new_pseudos)
8474 if (GET_CODE (dst) == MEM)
8475 dst = gen_reg_rtx (mode);
8476 if (GET_CODE (src1) == MEM)
8477 src1 = force_reg (mode, src1);
8478 if (GET_CODE (src2) == MEM)
8479 src2 = force_reg (mode, src2);
8482 /* Emit the instruction. */
8484 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8485 if (reload_in_progress)
8487 /* Reload doesn't know about the flags register, and doesn't know that
8488 it doesn't want to clobber it. We can only do this with PLUS. */
8489 if (code != PLUS)
8490 abort ();
8491 emit_insn (op);
8493 else
8495 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8496 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8499 /* Fix up the destination if needed. */
8500 if (dst != operands[0])
8501 emit_move_insn (operands[0], dst);
8504 /* Return TRUE or FALSE depending on whether the binary operator meets the
8505 appropriate constraints. */
8508 ix86_binary_operator_ok (enum rtx_code code,
8509 enum machine_mode mode ATTRIBUTE_UNUSED,
8510 rtx operands[3])
8512 /* Both source operands cannot be in memory. */
8513 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8514 return 0;
8515 /* If the operation is not commutable, source 1 cannot be a constant. */
8516 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8517 return 0;
8518 /* If the destination is memory, we must have a matching source operand. */
8519 if (GET_CODE (operands[0]) == MEM
8520 && ! (rtx_equal_p (operands[0], operands[1])
8521 || (GET_RTX_CLASS (code) == 'c'
8522 && rtx_equal_p (operands[0], operands[2]))))
8523 return 0;
8524 /* If the operation is not commutable and the source 1 is memory, we must
8525 have a matching destination. */
8526 if (GET_CODE (operands[1]) == MEM
8527 && GET_RTX_CLASS (code) != 'c'
8528 && ! rtx_equal_p (operands[0], operands[1]))
8529 return 0;
8530 return 1;
8533 /* Attempt to expand a unary operator. Make the expansion closer to the
8534 actual machine, then just general_operand, which will allow 2 separate
8535 memory references (one output, one input) in a single insn. */
8537 void
8538 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8539 rtx operands[])
8541 int matching_memory;
8542 rtx src, dst, op, clob;
8544 dst = operands[0];
8545 src = operands[1];
8547 /* If the destination is memory, and we do not have matching source
8548 operands, do things in registers. */
8549 matching_memory = 0;
8550 if (GET_CODE (dst) == MEM)
8552 if (rtx_equal_p (dst, src))
8553 matching_memory = 1;
8554 else
8555 dst = gen_reg_rtx (mode);
8558 /* When source operand is memory, destination must match. */
8559 if (!matching_memory && GET_CODE (src) == MEM)
8560 src = force_reg (mode, src);
8562 /* If optimizing, copy to regs to improve CSE */
8563 if (optimize && ! no_new_pseudos)
8565 if (GET_CODE (dst) == MEM)
8566 dst = gen_reg_rtx (mode);
8567 if (GET_CODE (src) == MEM)
8568 src = force_reg (mode, src);
8571 /* Emit the instruction. */
8573 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8574 if (reload_in_progress || code == NOT)
8576 /* Reload doesn't know about the flags register, and doesn't know that
8577 it doesn't want to clobber it. */
8578 if (code != NOT)
8579 abort ();
8580 emit_insn (op);
8582 else
8584 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8585 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8588 /* Fix up the destination if needed. */
8589 if (dst != operands[0])
8590 emit_move_insn (operands[0], dst);
8593 /* Return TRUE or FALSE depending on whether the unary operator meets the
8594 appropriate constraints. */
8597 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8598 enum machine_mode mode ATTRIBUTE_UNUSED,
8599 rtx operands[2] ATTRIBUTE_UNUSED)
8601 /* If one of operands is memory, source and destination must match. */
8602 if ((GET_CODE (operands[0]) == MEM
8603 || GET_CODE (operands[1]) == MEM)
8604 && ! rtx_equal_p (operands[0], operands[1]))
8605 return FALSE;
8606 return TRUE;
8609 /* Return TRUE or FALSE depending on whether the first SET in INSN
8610 has source and destination with matching CC modes, and that the
8611 CC mode is at least as constrained as REQ_MODE. */
8614 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8616 rtx set;
8617 enum machine_mode set_mode;
8619 set = PATTERN (insn);
8620 if (GET_CODE (set) == PARALLEL)
8621 set = XVECEXP (set, 0, 0);
8622 if (GET_CODE (set) != SET)
8623 abort ();
8624 if (GET_CODE (SET_SRC (set)) != COMPARE)
8625 abort ();
8627 set_mode = GET_MODE (SET_DEST (set));
8628 switch (set_mode)
8630 case CCNOmode:
8631 if (req_mode != CCNOmode
8632 && (req_mode != CCmode
8633 || XEXP (SET_SRC (set), 1) != const0_rtx))
8634 return 0;
8635 break;
8636 case CCmode:
8637 if (req_mode == CCGCmode)
8638 return 0;
8639 /* FALLTHRU */
8640 case CCGCmode:
8641 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8642 return 0;
8643 /* FALLTHRU */
8644 case CCGOCmode:
8645 if (req_mode == CCZmode)
8646 return 0;
8647 /* FALLTHRU */
8648 case CCZmode:
8649 break;
8651 default:
8652 abort ();
8655 return (GET_MODE (SET_SRC (set)) == set_mode);
8658 /* Generate insn patterns to do an integer compare of OPERANDS. */
8660 static rtx
8661 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8663 enum machine_mode cmpmode;
8664 rtx tmp, flags;
8666 cmpmode = SELECT_CC_MODE (code, op0, op1);
8667 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8669 /* This is very simple, but making the interface the same as in the
8670 FP case makes the rest of the code easier. */
8671 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8672 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8674 /* Return the test that should be put into the flags user, i.e.
8675 the bcc, scc, or cmov instruction. */
8676 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8679 /* Figure out whether to use ordered or unordered fp comparisons.
8680 Return the appropriate mode to use. */
8682 enum machine_mode
8683 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8685 /* ??? In order to make all comparisons reversible, we do all comparisons
8686 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8687 all forms trapping and nontrapping comparisons, we can make inequality
8688 comparisons trapping again, since it results in better code when using
8689 FCOM based compares. */
8690 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8693 enum machine_mode
8694 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8696 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8697 return ix86_fp_compare_mode (code);
8698 switch (code)
8700 /* Only zero flag is needed. */
8701 case EQ: /* ZF=0 */
8702 case NE: /* ZF!=0 */
8703 return CCZmode;
8704 /* Codes needing carry flag. */
8705 case GEU: /* CF=0 */
8706 case GTU: /* CF=0 & ZF=0 */
8707 case LTU: /* CF=1 */
8708 case LEU: /* CF=1 | ZF=1 */
8709 return CCmode;
8710 /* Codes possibly doable only with sign flag when
8711 comparing against zero. */
8712 case GE: /* SF=OF or SF=0 */
8713 case LT: /* SF<>OF or SF=1 */
8714 if (op1 == const0_rtx)
8715 return CCGOCmode;
8716 else
8717 /* For other cases Carry flag is not required. */
8718 return CCGCmode;
8719 /* Codes doable only with sign flag when comparing
8720 against zero, but we miss jump instruction for it
8721 so we need to use relational tests against overflow
8722 that thus needs to be zero. */
8723 case GT: /* ZF=0 & SF=OF */
8724 case LE: /* ZF=1 | SF<>OF */
8725 if (op1 == const0_rtx)
8726 return CCNOmode;
8727 else
8728 return CCGCmode;
8729 /* strcmp pattern do (use flags) and combine may ask us for proper
8730 mode. */
8731 case USE:
8732 return CCmode;
8733 default:
8734 abort ();
8738 /* Return the fixed registers used for condition codes. */
8740 static bool
8741 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8743 *p1 = FLAGS_REG;
8744 *p2 = FPSR_REG;
8745 return true;
8748 /* If two condition code modes are compatible, return a condition code
8749 mode which is compatible with both. Otherwise, return
8750 VOIDmode. */
8752 static enum machine_mode
8753 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8755 if (m1 == m2)
8756 return m1;
8758 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8759 return VOIDmode;
8761 if ((m1 == CCGCmode && m2 == CCGOCmode)
8762 || (m1 == CCGOCmode && m2 == CCGCmode))
8763 return CCGCmode;
8765 switch (m1)
8767 default:
8768 abort ();
8770 case CCmode:
8771 case CCGCmode:
8772 case CCGOCmode:
8773 case CCNOmode:
8774 case CCZmode:
8775 switch (m2)
8777 default:
8778 return VOIDmode;
8780 case CCmode:
8781 case CCGCmode:
8782 case CCGOCmode:
8783 case CCNOmode:
8784 case CCZmode:
8785 return CCmode;
8788 case CCFPmode:
8789 case CCFPUmode:
8790 /* These are only compatible with themselves, which we already
8791 checked above. */
8792 return VOIDmode;
8796 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8799 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8801 enum rtx_code swapped_code = swap_condition (code);
8802 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8803 || (ix86_fp_comparison_cost (swapped_code)
8804 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8807 /* Swap, force into registers, or otherwise massage the two operands
8808 to a fp comparison. The operands are updated in place; the new
8809 comparison code is returned. */
8811 static enum rtx_code
8812 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8814 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8815 rtx op0 = *pop0, op1 = *pop1;
8816 enum machine_mode op_mode = GET_MODE (op0);
8817 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8819 /* All of the unordered compare instructions only work on registers.
8820 The same is true of the XFmode compare instructions. The same is
8821 true of the fcomi compare instructions. */
8823 if (!is_sse
8824 && (fpcmp_mode == CCFPUmode
8825 || op_mode == XFmode
8826 || ix86_use_fcomi_compare (code)))
8828 op0 = force_reg (op_mode, op0);
8829 op1 = force_reg (op_mode, op1);
8831 else
8833 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8834 things around if they appear profitable, otherwise force op0
8835 into a register. */
8837 if (standard_80387_constant_p (op0) == 0
8838 || (GET_CODE (op0) == MEM
8839 && ! (standard_80387_constant_p (op1) == 0
8840 || GET_CODE (op1) == MEM)))
8842 rtx tmp;
8843 tmp = op0, op0 = op1, op1 = tmp;
8844 code = swap_condition (code);
8847 if (GET_CODE (op0) != REG)
8848 op0 = force_reg (op_mode, op0);
8850 if (CONSTANT_P (op1))
8852 if (standard_80387_constant_p (op1))
8853 op1 = force_reg (op_mode, op1);
8854 else
8855 op1 = validize_mem (force_const_mem (op_mode, op1));
8859 /* Try to rearrange the comparison to make it cheaper. */
8860 if (ix86_fp_comparison_cost (code)
8861 > ix86_fp_comparison_cost (swap_condition (code))
8862 && (GET_CODE (op1) == REG || !no_new_pseudos))
8864 rtx tmp;
8865 tmp = op0, op0 = op1, op1 = tmp;
8866 code = swap_condition (code);
8867 if (GET_CODE (op0) != REG)
8868 op0 = force_reg (op_mode, op0);
8871 *pop0 = op0;
8872 *pop1 = op1;
8873 return code;
8876 /* Convert comparison codes we use to represent FP comparison to integer
8877 code that will result in proper branch. Return UNKNOWN if no such code
8878 is available. */
8879 static enum rtx_code
8880 ix86_fp_compare_code_to_integer (enum rtx_code code)
8882 switch (code)
8884 case GT:
8885 return GTU;
8886 case GE:
8887 return GEU;
8888 case ORDERED:
8889 case UNORDERED:
8890 return code;
8891 break;
8892 case UNEQ:
8893 return EQ;
8894 break;
8895 case UNLT:
8896 return LTU;
8897 break;
8898 case UNLE:
8899 return LEU;
8900 break;
8901 case LTGT:
8902 return NE;
8903 break;
8904 default:
8905 return UNKNOWN;
8909 /* Split comparison code CODE into comparisons we can do using branch
8910 instructions. BYPASS_CODE is comparison code for branch that will
8911 branch around FIRST_CODE and SECOND_CODE. If some of branches
8912 is not required, set value to NIL.
8913 We never require more than two branches. */
8914 static void
8915 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8916 enum rtx_code *first_code,
8917 enum rtx_code *second_code)
8919 *first_code = code;
8920 *bypass_code = NIL;
8921 *second_code = NIL;
8923 /* The fcomi comparison sets flags as follows:
8925 cmp ZF PF CF
8926 > 0 0 0
8927 < 0 0 1
8928 = 1 0 0
8929 un 1 1 1 */
8931 switch (code)
8933 case GT: /* GTU - CF=0 & ZF=0 */
8934 case GE: /* GEU - CF=0 */
8935 case ORDERED: /* PF=0 */
8936 case UNORDERED: /* PF=1 */
8937 case UNEQ: /* EQ - ZF=1 */
8938 case UNLT: /* LTU - CF=1 */
8939 case UNLE: /* LEU - CF=1 | ZF=1 */
8940 case LTGT: /* EQ - ZF=0 */
8941 break;
8942 case LT: /* LTU - CF=1 - fails on unordered */
8943 *first_code = UNLT;
8944 *bypass_code = UNORDERED;
8945 break;
8946 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8947 *first_code = UNLE;
8948 *bypass_code = UNORDERED;
8949 break;
8950 case EQ: /* EQ - ZF=1 - fails on unordered */
8951 *first_code = UNEQ;
8952 *bypass_code = UNORDERED;
8953 break;
8954 case NE: /* NE - ZF=0 - fails on unordered */
8955 *first_code = LTGT;
8956 *second_code = UNORDERED;
8957 break;
8958 case UNGE: /* GEU - CF=0 - fails on unordered */
8959 *first_code = GE;
8960 *second_code = UNORDERED;
8961 break;
8962 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8963 *first_code = GT;
8964 *second_code = UNORDERED;
8965 break;
8966 default:
8967 abort ();
8969 if (!TARGET_IEEE_FP)
8971 *second_code = NIL;
8972 *bypass_code = NIL;
8976 /* Return cost of comparison done fcom + arithmetics operations on AX.
8977 All following functions do use number of instructions as a cost metrics.
8978 In future this should be tweaked to compute bytes for optimize_size and
8979 take into account performance of various instructions on various CPUs. */
8980 static int
8981 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8983 if (!TARGET_IEEE_FP)
8984 return 4;
8985 /* The cost of code output by ix86_expand_fp_compare. */
8986 switch (code)
8988 case UNLE:
8989 case UNLT:
8990 case LTGT:
8991 case GT:
8992 case GE:
8993 case UNORDERED:
8994 case ORDERED:
8995 case UNEQ:
8996 return 4;
8997 break;
8998 case LT:
8999 case NE:
9000 case EQ:
9001 case UNGE:
9002 return 5;
9003 break;
9004 case LE:
9005 case UNGT:
9006 return 6;
9007 break;
9008 default:
9009 abort ();
9013 /* Return cost of comparison done using fcomi operation.
9014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9015 static int
9016 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9018 enum rtx_code bypass_code, first_code, second_code;
9019 /* Return arbitrarily high cost when instruction is not supported - this
9020 prevents gcc from using it. */
9021 if (!TARGET_CMOVE)
9022 return 1024;
9023 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9024 return (bypass_code != NIL || second_code != NIL) + 2;
9027 /* Return cost of comparison done using sahf operation.
9028 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9029 static int
9030 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9032 enum rtx_code bypass_code, first_code, second_code;
9033 /* Return arbitrarily high cost when instruction is not preferred - this
9034 avoids gcc from using it. */
9035 if (!TARGET_USE_SAHF && !optimize_size)
9036 return 1024;
9037 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9038 return (bypass_code != NIL || second_code != NIL) + 3;
9041 /* Compute cost of the comparison done using any method.
9042 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9043 static int
9044 ix86_fp_comparison_cost (enum rtx_code code)
9046 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9047 int min;
9049 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9050 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9052 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9053 if (min > sahf_cost)
9054 min = sahf_cost;
9055 if (min > fcomi_cost)
9056 min = fcomi_cost;
9057 return min;
9060 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9062 static rtx
9063 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9064 rtx *second_test, rtx *bypass_test)
9066 enum machine_mode fpcmp_mode, intcmp_mode;
9067 rtx tmp, tmp2;
9068 int cost = ix86_fp_comparison_cost (code);
9069 enum rtx_code bypass_code, first_code, second_code;
9071 fpcmp_mode = ix86_fp_compare_mode (code);
9072 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9074 if (second_test)
9075 *second_test = NULL_RTX;
9076 if (bypass_test)
9077 *bypass_test = NULL_RTX;
9079 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9081 /* Do fcomi/sahf based test when profitable. */
9082 if ((bypass_code == NIL || bypass_test)
9083 && (second_code == NIL || second_test)
9084 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9086 if (TARGET_CMOVE)
9088 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9089 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9090 tmp);
9091 emit_insn (tmp);
9093 else
9095 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9096 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9097 if (!scratch)
9098 scratch = gen_reg_rtx (HImode);
9099 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9100 emit_insn (gen_x86_sahf_1 (scratch));
9103 /* The FP codes work out to act like unsigned. */
9104 intcmp_mode = fpcmp_mode;
9105 code = first_code;
9106 if (bypass_code != NIL)
9107 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9108 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9109 const0_rtx);
9110 if (second_code != NIL)
9111 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9112 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9113 const0_rtx);
9115 else
9117 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9118 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9119 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9120 if (!scratch)
9121 scratch = gen_reg_rtx (HImode);
9122 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9124 /* In the unordered case, we have to check C2 for NaN's, which
9125 doesn't happen to work out to anything nice combination-wise.
9126 So do some bit twiddling on the value we've got in AH to come
9127 up with an appropriate set of condition codes. */
9129 intcmp_mode = CCNOmode;
9130 switch (code)
9132 case GT:
9133 case UNGT:
9134 if (code == GT || !TARGET_IEEE_FP)
9136 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9137 code = EQ;
9139 else
9141 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9142 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9143 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9144 intcmp_mode = CCmode;
9145 code = GEU;
9147 break;
9148 case LT:
9149 case UNLT:
9150 if (code == LT && TARGET_IEEE_FP)
9152 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9153 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9154 intcmp_mode = CCmode;
9155 code = EQ;
9157 else
9159 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9160 code = NE;
9162 break;
9163 case GE:
9164 case UNGE:
9165 if (code == GE || !TARGET_IEEE_FP)
9167 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9168 code = EQ;
9170 else
9172 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9173 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9174 GEN_INT (0x01)));
9175 code = NE;
9177 break;
9178 case LE:
9179 case UNLE:
9180 if (code == LE && TARGET_IEEE_FP)
9182 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9183 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9184 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9185 intcmp_mode = CCmode;
9186 code = LTU;
9188 else
9190 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9191 code = NE;
9193 break;
9194 case EQ:
9195 case UNEQ:
9196 if (code == EQ && TARGET_IEEE_FP)
9198 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9199 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9200 intcmp_mode = CCmode;
9201 code = EQ;
9203 else
9205 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9206 code = NE;
9207 break;
9209 break;
9210 case NE:
9211 case LTGT:
9212 if (code == NE && TARGET_IEEE_FP)
9214 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9215 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9216 GEN_INT (0x40)));
9217 code = NE;
9219 else
9221 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9222 code = EQ;
9224 break;
9226 case UNORDERED:
9227 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9228 code = NE;
9229 break;
9230 case ORDERED:
9231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9232 code = EQ;
9233 break;
9235 default:
9236 abort ();
9240 /* Return the test that should be put into the flags user, i.e.
9241 the bcc, scc, or cmov instruction. */
9242 return gen_rtx_fmt_ee (code, VOIDmode,
9243 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9244 const0_rtx);
9248 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9250 rtx op0, op1, ret;
9251 op0 = ix86_compare_op0;
9252 op1 = ix86_compare_op1;
9254 if (second_test)
9255 *second_test = NULL_RTX;
9256 if (bypass_test)
9257 *bypass_test = NULL_RTX;
9259 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9260 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9261 second_test, bypass_test);
9262 else
9263 ret = ix86_expand_int_compare (code, op0, op1);
9265 return ret;
9268 /* Return true if the CODE will result in nontrivial jump sequence. */
9269 bool
9270 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9272 enum rtx_code bypass_code, first_code, second_code;
9273 if (!TARGET_CMOVE)
9274 return true;
9275 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9276 return bypass_code != NIL || second_code != NIL;
9279 void
9280 ix86_expand_branch (enum rtx_code code, rtx label)
9282 rtx tmp;
9284 switch (GET_MODE (ix86_compare_op0))
9286 case QImode:
9287 case HImode:
9288 case SImode:
9289 simple:
9290 tmp = ix86_expand_compare (code, NULL, NULL);
9291 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9292 gen_rtx_LABEL_REF (VOIDmode, label),
9293 pc_rtx);
9294 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9295 return;
9297 case SFmode:
9298 case DFmode:
9299 case XFmode:
9301 rtvec vec;
9302 int use_fcomi;
9303 enum rtx_code bypass_code, first_code, second_code;
9305 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9306 &ix86_compare_op1);
9308 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9310 /* Check whether we will use the natural sequence with one jump. If
9311 so, we can expand jump early. Otherwise delay expansion by
9312 creating compound insn to not confuse optimizers. */
9313 if (bypass_code == NIL && second_code == NIL
9314 && TARGET_CMOVE)
9316 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9317 gen_rtx_LABEL_REF (VOIDmode, label),
9318 pc_rtx, NULL_RTX);
9320 else
9322 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9323 ix86_compare_op0, ix86_compare_op1);
9324 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9325 gen_rtx_LABEL_REF (VOIDmode, label),
9326 pc_rtx);
9327 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9329 use_fcomi = ix86_use_fcomi_compare (code);
9330 vec = rtvec_alloc (3 + !use_fcomi);
9331 RTVEC_ELT (vec, 0) = tmp;
9332 RTVEC_ELT (vec, 1)
9333 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9334 RTVEC_ELT (vec, 2)
9335 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9336 if (! use_fcomi)
9337 RTVEC_ELT (vec, 3)
9338 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9340 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9342 return;
9345 case DImode:
9346 if (TARGET_64BIT)
9347 goto simple;
9348 /* Expand DImode branch into multiple compare+branch. */
9350 rtx lo[2], hi[2], label2;
9351 enum rtx_code code1, code2, code3;
9353 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9355 tmp = ix86_compare_op0;
9356 ix86_compare_op0 = ix86_compare_op1;
9357 ix86_compare_op1 = tmp;
9358 code = swap_condition (code);
9360 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9361 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9363 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9364 avoid two branches. This costs one extra insn, so disable when
9365 optimizing for size. */
9367 if ((code == EQ || code == NE)
9368 && (!optimize_size
9369 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9371 rtx xor0, xor1;
9373 xor1 = hi[0];
9374 if (hi[1] != const0_rtx)
9375 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9376 NULL_RTX, 0, OPTAB_WIDEN);
9378 xor0 = lo[0];
9379 if (lo[1] != const0_rtx)
9380 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9381 NULL_RTX, 0, OPTAB_WIDEN);
9383 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9384 NULL_RTX, 0, OPTAB_WIDEN);
9386 ix86_compare_op0 = tmp;
9387 ix86_compare_op1 = const0_rtx;
9388 ix86_expand_branch (code, label);
9389 return;
9392 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9393 op1 is a constant and the low word is zero, then we can just
9394 examine the high word. */
9396 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9397 switch (code)
9399 case LT: case LTU: case GE: case GEU:
9400 ix86_compare_op0 = hi[0];
9401 ix86_compare_op1 = hi[1];
9402 ix86_expand_branch (code, label);
9403 return;
9404 default:
9405 break;
9408 /* Otherwise, we need two or three jumps. */
9410 label2 = gen_label_rtx ();
9412 code1 = code;
9413 code2 = swap_condition (code);
9414 code3 = unsigned_condition (code);
9416 switch (code)
9418 case LT: case GT: case LTU: case GTU:
9419 break;
9421 case LE: code1 = LT; code2 = GT; break;
9422 case GE: code1 = GT; code2 = LT; break;
9423 case LEU: code1 = LTU; code2 = GTU; break;
9424 case GEU: code1 = GTU; code2 = LTU; break;
9426 case EQ: code1 = NIL; code2 = NE; break;
9427 case NE: code2 = NIL; break;
9429 default:
9430 abort ();
9434 * a < b =>
9435 * if (hi(a) < hi(b)) goto true;
9436 * if (hi(a) > hi(b)) goto false;
9437 * if (lo(a) < lo(b)) goto true;
9438 * false:
9441 ix86_compare_op0 = hi[0];
9442 ix86_compare_op1 = hi[1];
9444 if (code1 != NIL)
9445 ix86_expand_branch (code1, label);
9446 if (code2 != NIL)
9447 ix86_expand_branch (code2, label2);
9449 ix86_compare_op0 = lo[0];
9450 ix86_compare_op1 = lo[1];
9451 ix86_expand_branch (code3, label);
9453 if (code2 != NIL)
9454 emit_label (label2);
9455 return;
9458 default:
9459 abort ();
9463 /* Split branch based on floating point condition. */
9464 void
9465 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9466 rtx target1, rtx target2, rtx tmp)
9468 rtx second, bypass;
9469 rtx label = NULL_RTX;
9470 rtx condition;
9471 int bypass_probability = -1, second_probability = -1, probability = -1;
9472 rtx i;
9474 if (target2 != pc_rtx)
9476 rtx tmp = target2;
9477 code = reverse_condition_maybe_unordered (code);
9478 target2 = target1;
9479 target1 = tmp;
9482 condition = ix86_expand_fp_compare (code, op1, op2,
9483 tmp, &second, &bypass);
9485 if (split_branch_probability >= 0)
9487 /* Distribute the probabilities across the jumps.
9488 Assume the BYPASS and SECOND to be always test
9489 for UNORDERED. */
9490 probability = split_branch_probability;
9492 /* Value of 1 is low enough to make no need for probability
9493 to be updated. Later we may run some experiments and see
9494 if unordered values are more frequent in practice. */
9495 if (bypass)
9496 bypass_probability = 1;
9497 if (second)
9498 second_probability = 1;
9500 if (bypass != NULL_RTX)
9502 label = gen_label_rtx ();
9503 i = emit_jump_insn (gen_rtx_SET
9504 (VOIDmode, pc_rtx,
9505 gen_rtx_IF_THEN_ELSE (VOIDmode,
9506 bypass,
9507 gen_rtx_LABEL_REF (VOIDmode,
9508 label),
9509 pc_rtx)));
9510 if (bypass_probability >= 0)
9511 REG_NOTES (i)
9512 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9513 GEN_INT (bypass_probability),
9514 REG_NOTES (i));
9516 i = emit_jump_insn (gen_rtx_SET
9517 (VOIDmode, pc_rtx,
9518 gen_rtx_IF_THEN_ELSE (VOIDmode,
9519 condition, target1, target2)));
9520 if (probability >= 0)
9521 REG_NOTES (i)
9522 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9523 GEN_INT (probability),
9524 REG_NOTES (i));
9525 if (second != NULL_RTX)
9527 i = emit_jump_insn (gen_rtx_SET
9528 (VOIDmode, pc_rtx,
9529 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9530 target2)));
9531 if (second_probability >= 0)
9532 REG_NOTES (i)
9533 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9534 GEN_INT (second_probability),
9535 REG_NOTES (i));
9537 if (label != NULL_RTX)
9538 emit_label (label);
9542 ix86_expand_setcc (enum rtx_code code, rtx dest)
9544 rtx ret, tmp, tmpreg, equiv;
9545 rtx second_test, bypass_test;
9547 if (GET_MODE (ix86_compare_op0) == DImode
9548 && !TARGET_64BIT)
9549 return 0; /* FAIL */
9551 if (GET_MODE (dest) != QImode)
9552 abort ();
9554 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9555 PUT_MODE (ret, QImode);
9557 tmp = dest;
9558 tmpreg = dest;
9560 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9561 if (bypass_test || second_test)
9563 rtx test = second_test;
9564 int bypass = 0;
9565 rtx tmp2 = gen_reg_rtx (QImode);
9566 if (bypass_test)
9568 if (second_test)
9569 abort ();
9570 test = bypass_test;
9571 bypass = 1;
9572 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9574 PUT_MODE (test, QImode);
9575 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9577 if (bypass)
9578 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9579 else
9580 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9583 /* Attach a REG_EQUAL note describing the comparison result. */
9584 equiv = simplify_gen_relational (code, QImode,
9585 GET_MODE (ix86_compare_op0),
9586 ix86_compare_op0, ix86_compare_op1);
9587 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9589 return 1; /* DONE */
9592 /* Expand comparison setting or clearing carry flag. Return true when
9593 successful and set pop for the operation. */
9594 static bool
9595 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9597 enum machine_mode mode =
9598 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9600 /* Do not handle DImode compares that go trought special path. Also we can't
9601 deal with FP compares yet. This is possible to add. */
9602 if ((mode == DImode && !TARGET_64BIT))
9603 return false;
9604 if (FLOAT_MODE_P (mode))
9606 rtx second_test = NULL, bypass_test = NULL;
9607 rtx compare_op, compare_seq;
9609 /* Shortcut: following common codes never translate into carry flag compares. */
9610 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9611 || code == ORDERED || code == UNORDERED)
9612 return false;
9614 /* These comparisons require zero flag; swap operands so they won't. */
9615 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9616 && !TARGET_IEEE_FP)
9618 rtx tmp = op0;
9619 op0 = op1;
9620 op1 = tmp;
9621 code = swap_condition (code);
9624 /* Try to expand the comparison and verify that we end up with carry flag
9625 based comparison. This is fails to be true only when we decide to expand
9626 comparison using arithmetic that is not too common scenario. */
9627 start_sequence ();
9628 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9629 &second_test, &bypass_test);
9630 compare_seq = get_insns ();
9631 end_sequence ();
9633 if (second_test || bypass_test)
9634 return false;
9635 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9636 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9637 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9638 else
9639 code = GET_CODE (compare_op);
9640 if (code != LTU && code != GEU)
9641 return false;
9642 emit_insn (compare_seq);
9643 *pop = compare_op;
9644 return true;
9646 if (!INTEGRAL_MODE_P (mode))
9647 return false;
9648 switch (code)
9650 case LTU:
9651 case GEU:
9652 break;
9654 /* Convert a==0 into (unsigned)a<1. */
9655 case EQ:
9656 case NE:
9657 if (op1 != const0_rtx)
9658 return false;
9659 op1 = const1_rtx;
9660 code = (code == EQ ? LTU : GEU);
9661 break;
9663 /* Convert a>b into b<a or a>=b-1. */
9664 case GTU:
9665 case LEU:
9666 if (GET_CODE (op1) == CONST_INT)
9668 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9669 /* Bail out on overflow. We still can swap operands but that
9670 would force loading of the constant into register. */
9671 if (op1 == const0_rtx
9672 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9673 return false;
9674 code = (code == GTU ? GEU : LTU);
9676 else
9678 rtx tmp = op1;
9679 op1 = op0;
9680 op0 = tmp;
9681 code = (code == GTU ? LTU : GEU);
9683 break;
9685 /* Convert a>=0 into (unsigned)a<0x80000000. */
9686 case LT:
9687 case GE:
9688 if (mode == DImode || op1 != const0_rtx)
9689 return false;
9690 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9691 code = (code == LT ? GEU : LTU);
9692 break;
9693 case LE:
9694 case GT:
9695 if (mode == DImode || op1 != constm1_rtx)
9696 return false;
9697 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9698 code = (code == LE ? GEU : LTU);
9699 break;
9701 default:
9702 return false;
9704 /* Swapping operands may cause constant to appear as first operand. */
9705 if (!nonimmediate_operand (op0, VOIDmode))
9707 if (no_new_pseudos)
9708 return false;
9709 op0 = force_reg (mode, op0);
9711 ix86_compare_op0 = op0;
9712 ix86_compare_op1 = op1;
9713 *pop = ix86_expand_compare (code, NULL, NULL);
9714 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9715 abort ();
9716 return true;
9720 ix86_expand_int_movcc (rtx operands[])
9722 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9723 rtx compare_seq, compare_op;
9724 rtx second_test, bypass_test;
9725 enum machine_mode mode = GET_MODE (operands[0]);
9726 bool sign_bit_compare_p = false;;
9728 start_sequence ();
9729 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9730 compare_seq = get_insns ();
9731 end_sequence ();
9733 compare_code = GET_CODE (compare_op);
9735 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9736 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9737 sign_bit_compare_p = true;
9739 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9740 HImode insns, we'd be swallowed in word prefix ops. */
9742 if ((mode != HImode || TARGET_FAST_PREFIX)
9743 && (mode != DImode || TARGET_64BIT)
9744 && GET_CODE (operands[2]) == CONST_INT
9745 && GET_CODE (operands[3]) == CONST_INT)
9747 rtx out = operands[0];
9748 HOST_WIDE_INT ct = INTVAL (operands[2]);
9749 HOST_WIDE_INT cf = INTVAL (operands[3]);
9750 HOST_WIDE_INT diff;
9752 diff = ct - cf;
9753 /* Sign bit compares are better done using shifts than we do by using
9754 sbb. */
9755 if (sign_bit_compare_p
9756 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9757 ix86_compare_op1, &compare_op))
9759 /* Detect overlap between destination and compare sources. */
9760 rtx tmp = out;
9762 if (!sign_bit_compare_p)
9764 bool fpcmp = false;
9766 compare_code = GET_CODE (compare_op);
9768 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9769 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9771 fpcmp = true;
9772 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9775 /* To simplify rest of code, restrict to the GEU case. */
9776 if (compare_code == LTU)
9778 HOST_WIDE_INT tmp = ct;
9779 ct = cf;
9780 cf = tmp;
9781 compare_code = reverse_condition (compare_code);
9782 code = reverse_condition (code);
9784 else
9786 if (fpcmp)
9787 PUT_CODE (compare_op,
9788 reverse_condition_maybe_unordered
9789 (GET_CODE (compare_op)));
9790 else
9791 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9793 diff = ct - cf;
9795 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9796 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9797 tmp = gen_reg_rtx (mode);
9799 if (mode == DImode)
9800 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9801 else
9802 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9804 else
9806 if (code == GT || code == GE)
9807 code = reverse_condition (code);
9808 else
9810 HOST_WIDE_INT tmp = ct;
9811 ct = cf;
9812 cf = tmp;
9813 diff = ct - cf;
9815 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9816 ix86_compare_op1, VOIDmode, 0, -1);
9819 if (diff == 1)
9822 * cmpl op0,op1
9823 * sbbl dest,dest
9824 * [addl dest, ct]
9826 * Size 5 - 8.
9828 if (ct)
9829 tmp = expand_simple_binop (mode, PLUS,
9830 tmp, GEN_INT (ct),
9831 copy_rtx (tmp), 1, OPTAB_DIRECT);
9833 else if (cf == -1)
9836 * cmpl op0,op1
9837 * sbbl dest,dest
9838 * orl $ct, dest
9840 * Size 8.
9842 tmp = expand_simple_binop (mode, IOR,
9843 tmp, GEN_INT (ct),
9844 copy_rtx (tmp), 1, OPTAB_DIRECT);
9846 else if (diff == -1 && ct)
9849 * cmpl op0,op1
9850 * sbbl dest,dest
9851 * notl dest
9852 * [addl dest, cf]
9854 * Size 8 - 11.
9856 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9857 if (cf)
9858 tmp = expand_simple_binop (mode, PLUS,
9859 copy_rtx (tmp), GEN_INT (cf),
9860 copy_rtx (tmp), 1, OPTAB_DIRECT);
9862 else
9865 * cmpl op0,op1
9866 * sbbl dest,dest
9867 * [notl dest]
9868 * andl cf - ct, dest
9869 * [addl dest, ct]
9871 * Size 8 - 11.
9874 if (cf == 0)
9876 cf = ct;
9877 ct = 0;
9878 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9881 tmp = expand_simple_binop (mode, AND,
9882 copy_rtx (tmp),
9883 gen_int_mode (cf - ct, mode),
9884 copy_rtx (tmp), 1, OPTAB_DIRECT);
9885 if (ct)
9886 tmp = expand_simple_binop (mode, PLUS,
9887 copy_rtx (tmp), GEN_INT (ct),
9888 copy_rtx (tmp), 1, OPTAB_DIRECT);
9891 if (!rtx_equal_p (tmp, out))
9892 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9894 return 1; /* DONE */
9897 if (diff < 0)
9899 HOST_WIDE_INT tmp;
9900 tmp = ct, ct = cf, cf = tmp;
9901 diff = -diff;
9902 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9904 /* We may be reversing unordered compare to normal compare, that
9905 is not valid in general (we may convert non-trapping condition
9906 to trapping one), however on i386 we currently emit all
9907 comparisons unordered. */
9908 compare_code = reverse_condition_maybe_unordered (compare_code);
9909 code = reverse_condition_maybe_unordered (code);
9911 else
9913 compare_code = reverse_condition (compare_code);
9914 code = reverse_condition (code);
9918 compare_code = NIL;
9919 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9920 && GET_CODE (ix86_compare_op1) == CONST_INT)
9922 if (ix86_compare_op1 == const0_rtx
9923 && (code == LT || code == GE))
9924 compare_code = code;
9925 else if (ix86_compare_op1 == constm1_rtx)
9927 if (code == LE)
9928 compare_code = LT;
9929 else if (code == GT)
9930 compare_code = GE;
9934 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9935 if (compare_code != NIL
9936 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9937 && (cf == -1 || ct == -1))
9939 /* If lea code below could be used, only optimize
9940 if it results in a 2 insn sequence. */
9942 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9943 || diff == 3 || diff == 5 || diff == 9)
9944 || (compare_code == LT && ct == -1)
9945 || (compare_code == GE && cf == -1))
9948 * notl op1 (if necessary)
9949 * sarl $31, op1
9950 * orl cf, op1
9952 if (ct != -1)
9954 cf = ct;
9955 ct = -1;
9956 code = reverse_condition (code);
9959 out = emit_store_flag (out, code, ix86_compare_op0,
9960 ix86_compare_op1, VOIDmode, 0, -1);
9962 out = expand_simple_binop (mode, IOR,
9963 out, GEN_INT (cf),
9964 out, 1, OPTAB_DIRECT);
9965 if (out != operands[0])
9966 emit_move_insn (operands[0], out);
9968 return 1; /* DONE */
9973 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9974 || diff == 3 || diff == 5 || diff == 9)
9975 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9976 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9979 * xorl dest,dest
9980 * cmpl op1,op2
9981 * setcc dest
9982 * lea cf(dest*(ct-cf)),dest
9984 * Size 14.
9986 * This also catches the degenerate setcc-only case.
9989 rtx tmp;
9990 int nops;
9992 out = emit_store_flag (out, code, ix86_compare_op0,
9993 ix86_compare_op1, VOIDmode, 0, 1);
9995 nops = 0;
9996 /* On x86_64 the lea instruction operates on Pmode, so we need
9997 to get arithmetics done in proper mode to match. */
9998 if (diff == 1)
9999 tmp = copy_rtx (out);
10000 else
10002 rtx out1;
10003 out1 = copy_rtx (out);
10004 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10005 nops++;
10006 if (diff & 1)
10008 tmp = gen_rtx_PLUS (mode, tmp, out1);
10009 nops++;
10012 if (cf != 0)
10014 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10015 nops++;
10017 if (!rtx_equal_p (tmp, out))
10019 if (nops == 1)
10020 out = force_operand (tmp, copy_rtx (out));
10021 else
10022 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10024 if (!rtx_equal_p (out, operands[0]))
10025 emit_move_insn (operands[0], copy_rtx (out));
10027 return 1; /* DONE */
10031 * General case: Jumpful:
10032 * xorl dest,dest cmpl op1, op2
10033 * cmpl op1, op2 movl ct, dest
10034 * setcc dest jcc 1f
10035 * decl dest movl cf, dest
10036 * andl (cf-ct),dest 1:
10037 * addl ct,dest
10039 * Size 20. Size 14.
10041 * This is reasonably steep, but branch mispredict costs are
10042 * high on modern cpus, so consider failing only if optimizing
10043 * for space.
10046 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10047 && BRANCH_COST >= 2)
10049 if (cf == 0)
10051 cf = ct;
10052 ct = 0;
10053 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10054 /* We may be reversing unordered compare to normal compare,
10055 that is not valid in general (we may convert non-trapping
10056 condition to trapping one), however on i386 we currently
10057 emit all comparisons unordered. */
10058 code = reverse_condition_maybe_unordered (code);
10059 else
10061 code = reverse_condition (code);
10062 if (compare_code != NIL)
10063 compare_code = reverse_condition (compare_code);
10067 if (compare_code != NIL)
10069 /* notl op1 (if needed)
10070 sarl $31, op1
10071 andl (cf-ct), op1
10072 addl ct, op1
10074 For x < 0 (resp. x <= -1) there will be no notl,
10075 so if possible swap the constants to get rid of the
10076 complement.
10077 True/false will be -1/0 while code below (store flag
10078 followed by decrement) is 0/-1, so the constants need
10079 to be exchanged once more. */
10081 if (compare_code == GE || !cf)
10083 code = reverse_condition (code);
10084 compare_code = LT;
10086 else
10088 HOST_WIDE_INT tmp = cf;
10089 cf = ct;
10090 ct = tmp;
10093 out = emit_store_flag (out, code, ix86_compare_op0,
10094 ix86_compare_op1, VOIDmode, 0, -1);
10096 else
10098 out = emit_store_flag (out, code, ix86_compare_op0,
10099 ix86_compare_op1, VOIDmode, 0, 1);
10101 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10102 copy_rtx (out), 1, OPTAB_DIRECT);
10105 out = expand_simple_binop (mode, AND, copy_rtx (out),
10106 gen_int_mode (cf - ct, mode),
10107 copy_rtx (out), 1, OPTAB_DIRECT);
10108 if (ct)
10109 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10110 copy_rtx (out), 1, OPTAB_DIRECT);
10111 if (!rtx_equal_p (out, operands[0]))
10112 emit_move_insn (operands[0], copy_rtx (out));
10114 return 1; /* DONE */
10118 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10120 /* Try a few things more with specific constants and a variable. */
10122 optab op;
10123 rtx var, orig_out, out, tmp;
10125 if (BRANCH_COST <= 2)
10126 return 0; /* FAIL */
10128 /* If one of the two operands is an interesting constant, load a
10129 constant with the above and mask it in with a logical operation. */
10131 if (GET_CODE (operands[2]) == CONST_INT)
10133 var = operands[3];
10134 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10135 operands[3] = constm1_rtx, op = and_optab;
10136 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10137 operands[3] = const0_rtx, op = ior_optab;
10138 else
10139 return 0; /* FAIL */
10141 else if (GET_CODE (operands[3]) == CONST_INT)
10143 var = operands[2];
10144 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10145 operands[2] = constm1_rtx, op = and_optab;
10146 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10147 operands[2] = const0_rtx, op = ior_optab;
10148 else
10149 return 0; /* FAIL */
10151 else
10152 return 0; /* FAIL */
10154 orig_out = operands[0];
10155 tmp = gen_reg_rtx (mode);
10156 operands[0] = tmp;
10158 /* Recurse to get the constant loaded. */
10159 if (ix86_expand_int_movcc (operands) == 0)
10160 return 0; /* FAIL */
10162 /* Mask in the interesting variable. */
10163 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10164 OPTAB_WIDEN);
10165 if (!rtx_equal_p (out, orig_out))
10166 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10168 return 1; /* DONE */
10172 * For comparison with above,
10174 * movl cf,dest
10175 * movl ct,tmp
10176 * cmpl op1,op2
10177 * cmovcc tmp,dest
10179 * Size 15.
10182 if (! nonimmediate_operand (operands[2], mode))
10183 operands[2] = force_reg (mode, operands[2]);
10184 if (! nonimmediate_operand (operands[3], mode))
10185 operands[3] = force_reg (mode, operands[3]);
10187 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10189 rtx tmp = gen_reg_rtx (mode);
10190 emit_move_insn (tmp, operands[3]);
10191 operands[3] = tmp;
10193 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10195 rtx tmp = gen_reg_rtx (mode);
10196 emit_move_insn (tmp, operands[2]);
10197 operands[2] = tmp;
10200 if (! register_operand (operands[2], VOIDmode)
10201 && (mode == QImode
10202 || ! register_operand (operands[3], VOIDmode)))
10203 operands[2] = force_reg (mode, operands[2]);
10205 if (mode == QImode
10206 && ! register_operand (operands[3], VOIDmode))
10207 operands[3] = force_reg (mode, operands[3]);
10209 emit_insn (compare_seq);
10210 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10211 gen_rtx_IF_THEN_ELSE (mode,
10212 compare_op, operands[2],
10213 operands[3])));
10214 if (bypass_test)
10215 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10216 gen_rtx_IF_THEN_ELSE (mode,
10217 bypass_test,
10218 copy_rtx (operands[3]),
10219 copy_rtx (operands[0]))));
10220 if (second_test)
10221 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10222 gen_rtx_IF_THEN_ELSE (mode,
10223 second_test,
10224 copy_rtx (operands[2]),
10225 copy_rtx (operands[0]))));
10227 return 1; /* DONE */
10231 ix86_expand_fp_movcc (rtx operands[])
10233 enum rtx_code code;
10234 rtx tmp;
10235 rtx compare_op, second_test, bypass_test;
10237 /* For SF/DFmode conditional moves based on comparisons
10238 in same mode, we may want to use SSE min/max instructions. */
10239 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10240 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10241 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10242 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10243 && (!TARGET_IEEE_FP
10244 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10245 /* We may be called from the post-reload splitter. */
10246 && (!REG_P (operands[0])
10247 || SSE_REG_P (operands[0])
10248 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10250 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10251 code = GET_CODE (operands[1]);
10253 /* See if we have (cross) match between comparison operands and
10254 conditional move operands. */
10255 if (rtx_equal_p (operands[2], op1))
10257 rtx tmp = op0;
10258 op0 = op1;
10259 op1 = tmp;
10260 code = reverse_condition_maybe_unordered (code);
10262 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10264 /* Check for min operation. */
10265 if (code == LT || code == UNLE)
10267 if (code == UNLE)
10269 rtx tmp = op0;
10270 op0 = op1;
10271 op1 = tmp;
10273 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10274 if (memory_operand (op0, VOIDmode))
10275 op0 = force_reg (GET_MODE (operands[0]), op0);
10276 if (GET_MODE (operands[0]) == SFmode)
10277 emit_insn (gen_minsf3 (operands[0], op0, op1));
10278 else
10279 emit_insn (gen_mindf3 (operands[0], op0, op1));
10280 return 1;
10282 /* Check for max operation. */
10283 if (code == GT || code == UNGE)
10285 if (code == UNGE)
10287 rtx tmp = op0;
10288 op0 = op1;
10289 op1 = tmp;
10291 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10292 if (memory_operand (op0, VOIDmode))
10293 op0 = force_reg (GET_MODE (operands[0]), op0);
10294 if (GET_MODE (operands[0]) == SFmode)
10295 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10296 else
10297 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10298 return 1;
10301 /* Manage condition to be sse_comparison_operator. In case we are
10302 in non-ieee mode, try to canonicalize the destination operand
10303 to be first in the comparison - this helps reload to avoid extra
10304 moves. */
10305 if (!sse_comparison_operator (operands[1], VOIDmode)
10306 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10308 rtx tmp = ix86_compare_op0;
10309 ix86_compare_op0 = ix86_compare_op1;
10310 ix86_compare_op1 = tmp;
10311 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10312 VOIDmode, ix86_compare_op0,
10313 ix86_compare_op1);
10315 /* Similarly try to manage result to be first operand of conditional
10316 move. We also don't support the NE comparison on SSE, so try to
10317 avoid it. */
10318 if ((rtx_equal_p (operands[0], operands[3])
10319 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10320 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10322 rtx tmp = operands[2];
10323 operands[2] = operands[3];
10324 operands[3] = tmp;
10325 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10326 (GET_CODE (operands[1])),
10327 VOIDmode, ix86_compare_op0,
10328 ix86_compare_op1);
10330 if (GET_MODE (operands[0]) == SFmode)
10331 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10332 operands[2], operands[3],
10333 ix86_compare_op0, ix86_compare_op1));
10334 else
10335 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10336 operands[2], operands[3],
10337 ix86_compare_op0, ix86_compare_op1));
10338 return 1;
10341 /* The floating point conditional move instructions don't directly
10342 support conditions resulting from a signed integer comparison. */
10344 code = GET_CODE (operands[1]);
10345 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10347 /* The floating point conditional move instructions don't directly
10348 support signed integer comparisons. */
10350 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10352 if (second_test != NULL || bypass_test != NULL)
10353 abort ();
10354 tmp = gen_reg_rtx (QImode);
10355 ix86_expand_setcc (code, tmp);
10356 code = NE;
10357 ix86_compare_op0 = tmp;
10358 ix86_compare_op1 = const0_rtx;
10359 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10361 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10363 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10364 emit_move_insn (tmp, operands[3]);
10365 operands[3] = tmp;
10367 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10369 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10370 emit_move_insn (tmp, operands[2]);
10371 operands[2] = tmp;
10374 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10375 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10376 compare_op,
10377 operands[2],
10378 operands[3])));
10379 if (bypass_test)
10380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10382 bypass_test,
10383 operands[3],
10384 operands[0])));
10385 if (second_test)
10386 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10387 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10388 second_test,
10389 operands[2],
10390 operands[0])));
10392 return 1;
10395 /* Expand conditional increment or decrement using adb/sbb instructions.
10396 The default case using setcc followed by the conditional move can be
10397 done by generic code. */
10399 ix86_expand_int_addcc (rtx operands[])
10401 enum rtx_code code = GET_CODE (operands[1]);
10402 rtx compare_op;
10403 rtx val = const0_rtx;
10404 bool fpcmp = false;
10405 enum machine_mode mode = GET_MODE (operands[0]);
10407 if (operands[3] != const1_rtx
10408 && operands[3] != constm1_rtx)
10409 return 0;
10410 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10411 ix86_compare_op1, &compare_op))
10412 return 0;
10413 code = GET_CODE (compare_op);
10415 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10416 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10418 fpcmp = true;
10419 code = ix86_fp_compare_code_to_integer (code);
10422 if (code != LTU)
10424 val = constm1_rtx;
10425 if (fpcmp)
10426 PUT_CODE (compare_op,
10427 reverse_condition_maybe_unordered
10428 (GET_CODE (compare_op)));
10429 else
10430 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10432 PUT_MODE (compare_op, mode);
10434 /* Construct either adc or sbb insn. */
10435 if ((code == LTU) == (operands[3] == constm1_rtx))
10437 switch (GET_MODE (operands[0]))
10439 case QImode:
10440 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10441 break;
10442 case HImode:
10443 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10444 break;
10445 case SImode:
10446 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10447 break;
10448 case DImode:
10449 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10450 break;
10451 default:
10452 abort ();
10455 else
10457 switch (GET_MODE (operands[0]))
10459 case QImode:
10460 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10461 break;
10462 case HImode:
10463 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10464 break;
10465 case SImode:
10466 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10467 break;
10468 case DImode:
10469 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10470 break;
10471 default:
10472 abort ();
10475 return 1; /* DONE */
10479 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10480 works for floating pointer parameters and nonoffsetable memories.
10481 For pushes, it returns just stack offsets; the values will be saved
10482 in the right order. Maximally three parts are generated. */
10484 static int
10485 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10487 int size;
10489 if (!TARGET_64BIT)
10490 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10491 else
10492 size = (GET_MODE_SIZE (mode) + 4) / 8;
10494 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10495 abort ();
10496 if (size < 2 || size > 3)
10497 abort ();
10499 /* Optimize constant pool reference to immediates. This is used by fp
10500 moves, that force all constants to memory to allow combining. */
10501 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10503 rtx tmp = maybe_get_pool_constant (operand);
10504 if (tmp)
10505 operand = tmp;
10508 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10510 /* The only non-offsetable memories we handle are pushes. */
10511 if (! push_operand (operand, VOIDmode))
10512 abort ();
10514 operand = copy_rtx (operand);
10515 PUT_MODE (operand, Pmode);
10516 parts[0] = parts[1] = parts[2] = operand;
10518 else if (!TARGET_64BIT)
10520 if (mode == DImode)
10521 split_di (&operand, 1, &parts[0], &parts[1]);
10522 else
10524 if (REG_P (operand))
10526 if (!reload_completed)
10527 abort ();
10528 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10529 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10530 if (size == 3)
10531 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10533 else if (offsettable_memref_p (operand))
10535 operand = adjust_address (operand, SImode, 0);
10536 parts[0] = operand;
10537 parts[1] = adjust_address (operand, SImode, 4);
10538 if (size == 3)
10539 parts[2] = adjust_address (operand, SImode, 8);
10541 else if (GET_CODE (operand) == CONST_DOUBLE)
10543 REAL_VALUE_TYPE r;
10544 long l[4];
10546 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10547 switch (mode)
10549 case XFmode:
10550 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10551 parts[2] = gen_int_mode (l[2], SImode);
10552 break;
10553 case DFmode:
10554 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10555 break;
10556 default:
10557 abort ();
10559 parts[1] = gen_int_mode (l[1], SImode);
10560 parts[0] = gen_int_mode (l[0], SImode);
10562 else
10563 abort ();
10566 else
10568 if (mode == TImode)
10569 split_ti (&operand, 1, &parts[0], &parts[1]);
10570 if (mode == XFmode || mode == TFmode)
10572 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10573 if (REG_P (operand))
10575 if (!reload_completed)
10576 abort ();
10577 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10578 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10580 else if (offsettable_memref_p (operand))
10582 operand = adjust_address (operand, DImode, 0);
10583 parts[0] = operand;
10584 parts[1] = adjust_address (operand, upper_mode, 8);
10586 else if (GET_CODE (operand) == CONST_DOUBLE)
10588 REAL_VALUE_TYPE r;
10589 long l[3];
10591 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10592 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10593 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10594 if (HOST_BITS_PER_WIDE_INT >= 64)
10595 parts[0]
10596 = gen_int_mode
10597 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10598 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10599 DImode);
10600 else
10601 parts[0] = immed_double_const (l[0], l[1], DImode);
10602 if (upper_mode == SImode)
10603 parts[1] = gen_int_mode (l[2], SImode);
10604 else if (HOST_BITS_PER_WIDE_INT >= 64)
10605 parts[1]
10606 = gen_int_mode
10607 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10608 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10609 DImode);
10610 else
10611 parts[1] = immed_double_const (l[2], l[3], DImode);
10613 else
10614 abort ();
10618 return size;
10621 /* Emit insns to perform a move or push of DI, DF, and XF values.
10622 Return false when normal moves are needed; true when all required
10623 insns have been emitted. Operands 2-4 contain the input values
10624 int the correct order; operands 5-7 contain the output values. */
10626 void
10627 ix86_split_long_move (rtx operands[])
10629 rtx part[2][3];
10630 int nparts;
10631 int push = 0;
10632 int collisions = 0;
10633 enum machine_mode mode = GET_MODE (operands[0]);
10635 /* The DFmode expanders may ask us to move double.
10636 For 64bit target this is single move. By hiding the fact
10637 here we simplify i386.md splitters. */
10638 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10640 /* Optimize constant pool reference to immediates. This is used by
10641 fp moves, that force all constants to memory to allow combining. */
10643 if (GET_CODE (operands[1]) == MEM
10644 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10645 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10646 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10647 if (push_operand (operands[0], VOIDmode))
10649 operands[0] = copy_rtx (operands[0]);
10650 PUT_MODE (operands[0], Pmode);
10652 else
10653 operands[0] = gen_lowpart (DImode, operands[0]);
10654 operands[1] = gen_lowpart (DImode, operands[1]);
10655 emit_move_insn (operands[0], operands[1]);
10656 return;
10659 /* The only non-offsettable memory we handle is push. */
10660 if (push_operand (operands[0], VOIDmode))
10661 push = 1;
10662 else if (GET_CODE (operands[0]) == MEM
10663 && ! offsettable_memref_p (operands[0]))
10664 abort ();
10666 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10667 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10669 /* When emitting push, take care for source operands on the stack. */
10670 if (push && GET_CODE (operands[1]) == MEM
10671 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10673 if (nparts == 3)
10674 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10675 XEXP (part[1][2], 0));
10676 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10677 XEXP (part[1][1], 0));
10680 /* We need to do copy in the right order in case an address register
10681 of the source overlaps the destination. */
10682 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10684 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10685 collisions++;
10686 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10687 collisions++;
10688 if (nparts == 3
10689 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10690 collisions++;
10692 /* Collision in the middle part can be handled by reordering. */
10693 if (collisions == 1 && nparts == 3
10694 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10696 rtx tmp;
10697 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10698 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10701 /* If there are more collisions, we can't handle it by reordering.
10702 Do an lea to the last part and use only one colliding move. */
10703 else if (collisions > 1)
10705 rtx base;
10707 collisions = 1;
10709 base = part[0][nparts - 1];
10711 /* Handle the case when the last part isn't valid for lea.
10712 Happens in 64-bit mode storing the 12-byte XFmode. */
10713 if (GET_MODE (base) != Pmode)
10714 base = gen_rtx_REG (Pmode, REGNO (base));
10716 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10717 part[1][0] = replace_equiv_address (part[1][0], base);
10718 part[1][1] = replace_equiv_address (part[1][1],
10719 plus_constant (base, UNITS_PER_WORD));
10720 if (nparts == 3)
10721 part[1][2] = replace_equiv_address (part[1][2],
10722 plus_constant (base, 8));
10726 if (push)
10728 if (!TARGET_64BIT)
10730 if (nparts == 3)
10732 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10733 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10734 emit_move_insn (part[0][2], part[1][2]);
10737 else
10739 /* In 64bit mode we don't have 32bit push available. In case this is
10740 register, it is OK - we will just use larger counterpart. We also
10741 retype memory - these comes from attempt to avoid REX prefix on
10742 moving of second half of TFmode value. */
10743 if (GET_MODE (part[1][1]) == SImode)
10745 if (GET_CODE (part[1][1]) == MEM)
10746 part[1][1] = adjust_address (part[1][1], DImode, 0);
10747 else if (REG_P (part[1][1]))
10748 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10749 else
10750 abort ();
10751 if (GET_MODE (part[1][0]) == SImode)
10752 part[1][0] = part[1][1];
10755 emit_move_insn (part[0][1], part[1][1]);
10756 emit_move_insn (part[0][0], part[1][0]);
10757 return;
10760 /* Choose correct order to not overwrite the source before it is copied. */
10761 if ((REG_P (part[0][0])
10762 && REG_P (part[1][1])
10763 && (REGNO (part[0][0]) == REGNO (part[1][1])
10764 || (nparts == 3
10765 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10766 || (collisions > 0
10767 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10769 if (nparts == 3)
10771 operands[2] = part[0][2];
10772 operands[3] = part[0][1];
10773 operands[4] = part[0][0];
10774 operands[5] = part[1][2];
10775 operands[6] = part[1][1];
10776 operands[7] = part[1][0];
10778 else
10780 operands[2] = part[0][1];
10781 operands[3] = part[0][0];
10782 operands[5] = part[1][1];
10783 operands[6] = part[1][0];
10786 else
10788 if (nparts == 3)
10790 operands[2] = part[0][0];
10791 operands[3] = part[0][1];
10792 operands[4] = part[0][2];
10793 operands[5] = part[1][0];
10794 operands[6] = part[1][1];
10795 operands[7] = part[1][2];
10797 else
10799 operands[2] = part[0][0];
10800 operands[3] = part[0][1];
10801 operands[5] = part[1][0];
10802 operands[6] = part[1][1];
10805 emit_move_insn (operands[2], operands[5]);
10806 emit_move_insn (operands[3], operands[6]);
10807 if (nparts == 3)
10808 emit_move_insn (operands[4], operands[7]);
10810 return;
10813 void
10814 ix86_split_ashldi (rtx *operands, rtx scratch)
10816 rtx low[2], high[2];
10817 int count;
10819 if (GET_CODE (operands[2]) == CONST_INT)
10821 split_di (operands, 2, low, high);
10822 count = INTVAL (operands[2]) & 63;
10824 if (count >= 32)
10826 emit_move_insn (high[0], low[1]);
10827 emit_move_insn (low[0], const0_rtx);
10829 if (count > 32)
10830 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10832 else
10834 if (!rtx_equal_p (operands[0], operands[1]))
10835 emit_move_insn (operands[0], operands[1]);
10836 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10837 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10840 else
10842 if (!rtx_equal_p (operands[0], operands[1]))
10843 emit_move_insn (operands[0], operands[1]);
10845 split_di (operands, 1, low, high);
10847 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10848 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10850 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10852 if (! no_new_pseudos)
10853 scratch = force_reg (SImode, const0_rtx);
10854 else
10855 emit_move_insn (scratch, const0_rtx);
10857 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10858 scratch));
10860 else
10861 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10865 void
10866 ix86_split_ashrdi (rtx *operands, rtx scratch)
10868 rtx low[2], high[2];
10869 int count;
10871 if (GET_CODE (operands[2]) == CONST_INT)
10873 split_di (operands, 2, low, high);
10874 count = INTVAL (operands[2]) & 63;
10876 if (count >= 32)
10878 emit_move_insn (low[0], high[1]);
10880 if (! reload_completed)
10881 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10882 else
10884 emit_move_insn (high[0], low[0]);
10885 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10888 if (count > 32)
10889 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10891 else
10893 if (!rtx_equal_p (operands[0], operands[1]))
10894 emit_move_insn (operands[0], operands[1]);
10895 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10896 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10899 else
10901 if (!rtx_equal_p (operands[0], operands[1]))
10902 emit_move_insn (operands[0], operands[1]);
10904 split_di (operands, 1, low, high);
10906 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10907 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10909 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10911 if (! no_new_pseudos)
10912 scratch = gen_reg_rtx (SImode);
10913 emit_move_insn (scratch, high[0]);
10914 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10915 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10916 scratch));
10918 else
10919 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10923 void
10924 ix86_split_lshrdi (rtx *operands, rtx scratch)
10926 rtx low[2], high[2];
10927 int count;
10929 if (GET_CODE (operands[2]) == CONST_INT)
10931 split_di (operands, 2, low, high);
10932 count = INTVAL (operands[2]) & 63;
10934 if (count >= 32)
10936 emit_move_insn (low[0], high[1]);
10937 emit_move_insn (high[0], const0_rtx);
10939 if (count > 32)
10940 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10942 else
10944 if (!rtx_equal_p (operands[0], operands[1]))
10945 emit_move_insn (operands[0], operands[1]);
10946 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10947 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10950 else
10952 if (!rtx_equal_p (operands[0], operands[1]))
10953 emit_move_insn (operands[0], operands[1]);
10955 split_di (operands, 1, low, high);
10957 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10958 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10960 /* Heh. By reversing the arguments, we can reuse this pattern. */
10961 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10963 if (! no_new_pseudos)
10964 scratch = force_reg (SImode, const0_rtx);
10965 else
10966 emit_move_insn (scratch, const0_rtx);
10968 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10969 scratch));
10971 else
10972 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10976 /* Helper function for the string operations below. Dest VARIABLE whether
10977 it is aligned to VALUE bytes. If true, jump to the label. */
10978 static rtx
10979 ix86_expand_aligntest (rtx variable, int value)
10981 rtx label = gen_label_rtx ();
10982 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10983 if (GET_MODE (variable) == DImode)
10984 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10985 else
10986 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10987 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10988 1, label);
10989 return label;
10992 /* Adjust COUNTER by the VALUE. */
10993 static void
10994 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10996 if (GET_MODE (countreg) == DImode)
10997 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10998 else
10999 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11002 /* Zero extend possibly SImode EXP to Pmode register. */
11004 ix86_zero_extend_to_Pmode (rtx exp)
11006 rtx r;
11007 if (GET_MODE (exp) == VOIDmode)
11008 return force_reg (Pmode, exp);
11009 if (GET_MODE (exp) == Pmode)
11010 return copy_to_mode_reg (Pmode, exp);
11011 r = gen_reg_rtx (Pmode);
11012 emit_insn (gen_zero_extendsidi2 (r, exp));
11013 return r;
11016 /* Expand string move (memcpy) operation. Use i386 string operations when
11017 profitable. expand_clrstr contains similar code. */
11019 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11021 rtx srcreg, destreg, countreg, srcexp, destexp;
11022 enum machine_mode counter_mode;
11023 HOST_WIDE_INT align = 0;
11024 unsigned HOST_WIDE_INT count = 0;
11026 if (GET_CODE (align_exp) == CONST_INT)
11027 align = INTVAL (align_exp);
11029 /* Can't use any of this if the user has appropriated esi or edi. */
11030 if (global_regs[4] || global_regs[5])
11031 return 0;
11033 /* This simple hack avoids all inlining code and simplifies code below. */
11034 if (!TARGET_ALIGN_STRINGOPS)
11035 align = 64;
11037 if (GET_CODE (count_exp) == CONST_INT)
11039 count = INTVAL (count_exp);
11040 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11041 return 0;
11044 /* Figure out proper mode for counter. For 32bits it is always SImode,
11045 for 64bits use SImode when possible, otherwise DImode.
11046 Set count to number of bytes copied when known at compile time. */
11047 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11048 || x86_64_zero_extended_value (count_exp))
11049 counter_mode = SImode;
11050 else
11051 counter_mode = DImode;
11053 if (counter_mode != SImode && counter_mode != DImode)
11054 abort ();
11056 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11057 if (destreg != XEXP (dst, 0))
11058 dst = replace_equiv_address_nv (dst, destreg);
11059 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11060 if (srcreg != XEXP (src, 0))
11061 src = replace_equiv_address_nv (src, srcreg);
11063 /* When optimizing for size emit simple rep ; movsb instruction for
11064 counts not divisible by 4. */
11066 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11068 emit_insn (gen_cld ());
11069 countreg = ix86_zero_extend_to_Pmode (count_exp);
11070 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11071 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11072 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11073 destexp, srcexp));
11076 /* For constant aligned (or small unaligned) copies use rep movsl
11077 followed by code copying the rest. For PentiumPro ensure 8 byte
11078 alignment to allow rep movsl acceleration. */
11080 else if (count != 0
11081 && (align >= 8
11082 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11083 || optimize_size || count < (unsigned int) 64))
11085 unsigned HOST_WIDE_INT offset = 0;
11086 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11087 rtx srcmem, dstmem;
11089 emit_insn (gen_cld ());
11090 if (count & ~(size - 1))
11092 countreg = copy_to_mode_reg (counter_mode,
11093 GEN_INT ((count >> (size == 4 ? 2 : 3))
11094 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11095 countreg = ix86_zero_extend_to_Pmode (countreg);
11097 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11098 GEN_INT (size == 4 ? 2 : 3));
11099 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11100 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11102 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11103 countreg, destexp, srcexp));
11104 offset = count & ~(size - 1);
11106 if (size == 8 && (count & 0x04))
11108 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11109 offset);
11110 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11111 offset);
11112 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11113 offset += 4;
11115 if (count & 0x02)
11117 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11118 offset);
11119 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11120 offset);
11121 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11122 offset += 2;
11124 if (count & 0x01)
11126 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11127 offset);
11128 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11129 offset);
11130 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11133 /* The generic code based on the glibc implementation:
11134 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11135 allowing accelerated copying there)
11136 - copy the data using rep movsl
11137 - copy the rest. */
11138 else
11140 rtx countreg2;
11141 rtx label = NULL;
11142 rtx srcmem, dstmem;
11143 int desired_alignment = (TARGET_PENTIUMPRO
11144 && (count == 0 || count >= (unsigned int) 260)
11145 ? 8 : UNITS_PER_WORD);
11146 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11147 dst = change_address (dst, BLKmode, destreg);
11148 src = change_address (src, BLKmode, srcreg);
11150 /* In case we don't know anything about the alignment, default to
11151 library version, since it is usually equally fast and result in
11152 shorter code.
11154 Also emit call when we know that the count is large and call overhead
11155 will not be important. */
11156 if (!TARGET_INLINE_ALL_STRINGOPS
11157 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11158 return 0;
11160 if (TARGET_SINGLE_STRINGOP)
11161 emit_insn (gen_cld ());
11163 countreg2 = gen_reg_rtx (Pmode);
11164 countreg = copy_to_mode_reg (counter_mode, count_exp);
11166 /* We don't use loops to align destination and to copy parts smaller
11167 than 4 bytes, because gcc is able to optimize such code better (in
11168 the case the destination or the count really is aligned, gcc is often
11169 able to predict the branches) and also it is friendlier to the
11170 hardware branch prediction.
11172 Using loops is beneficial for generic case, because we can
11173 handle small counts using the loops. Many CPUs (such as Athlon)
11174 have large REP prefix setup costs.
11176 This is quite costly. Maybe we can revisit this decision later or
11177 add some customizability to this code. */
11179 if (count == 0 && align < desired_alignment)
11181 label = gen_label_rtx ();
11182 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11183 LEU, 0, counter_mode, 1, label);
11185 if (align <= 1)
11187 rtx label = ix86_expand_aligntest (destreg, 1);
11188 srcmem = change_address (src, QImode, srcreg);
11189 dstmem = change_address (dst, QImode, destreg);
11190 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11191 ix86_adjust_counter (countreg, 1);
11192 emit_label (label);
11193 LABEL_NUSES (label) = 1;
11195 if (align <= 2)
11197 rtx label = ix86_expand_aligntest (destreg, 2);
11198 srcmem = change_address (src, HImode, srcreg);
11199 dstmem = change_address (dst, HImode, destreg);
11200 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11201 ix86_adjust_counter (countreg, 2);
11202 emit_label (label);
11203 LABEL_NUSES (label) = 1;
11205 if (align <= 4 && desired_alignment > 4)
11207 rtx label = ix86_expand_aligntest (destreg, 4);
11208 srcmem = change_address (src, SImode, srcreg);
11209 dstmem = change_address (dst, SImode, destreg);
11210 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11211 ix86_adjust_counter (countreg, 4);
11212 emit_label (label);
11213 LABEL_NUSES (label) = 1;
11216 if (label && desired_alignment > 4 && !TARGET_64BIT)
11218 emit_label (label);
11219 LABEL_NUSES (label) = 1;
11220 label = NULL_RTX;
11222 if (!TARGET_SINGLE_STRINGOP)
11223 emit_insn (gen_cld ());
11224 if (TARGET_64BIT)
11226 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11227 GEN_INT (3)));
11228 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11230 else
11232 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11233 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11235 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11236 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11237 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11238 countreg2, destexp, srcexp));
11240 if (label)
11242 emit_label (label);
11243 LABEL_NUSES (label) = 1;
11245 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11247 srcmem = change_address (src, SImode, srcreg);
11248 dstmem = change_address (dst, SImode, destreg);
11249 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11251 if ((align <= 4 || count == 0) && TARGET_64BIT)
11253 rtx label = ix86_expand_aligntest (countreg, 4);
11254 srcmem = change_address (src, SImode, srcreg);
11255 dstmem = change_address (dst, SImode, destreg);
11256 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11257 emit_label (label);
11258 LABEL_NUSES (label) = 1;
11260 if (align > 2 && count != 0 && (count & 2))
11262 srcmem = change_address (src, HImode, srcreg);
11263 dstmem = change_address (dst, HImode, destreg);
11264 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11266 if (align <= 2 || count == 0)
11268 rtx label = ix86_expand_aligntest (countreg, 2);
11269 srcmem = change_address (src, HImode, srcreg);
11270 dstmem = change_address (dst, HImode, destreg);
11271 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11272 emit_label (label);
11273 LABEL_NUSES (label) = 1;
11275 if (align > 1 && count != 0 && (count & 1))
11277 srcmem = change_address (src, QImode, srcreg);
11278 dstmem = change_address (dst, QImode, destreg);
11279 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11281 if (align <= 1 || count == 0)
11283 rtx label = ix86_expand_aligntest (countreg, 1);
11284 srcmem = change_address (src, QImode, srcreg);
11285 dstmem = change_address (dst, QImode, destreg);
11286 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11287 emit_label (label);
11288 LABEL_NUSES (label) = 1;
11292 return 1;
11295 /* Expand string clear operation (bzero). Use i386 string operations when
11296 profitable. expand_movstr contains similar code. */
11298 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11300 rtx destreg, zeroreg, countreg, destexp;
11301 enum machine_mode counter_mode;
11302 HOST_WIDE_INT align = 0;
11303 unsigned HOST_WIDE_INT count = 0;
11305 if (GET_CODE (align_exp) == CONST_INT)
11306 align = INTVAL (align_exp);
11308 /* Can't use any of this if the user has appropriated esi. */
11309 if (global_regs[4])
11310 return 0;
11312 /* This simple hack avoids all inlining code and simplifies code below. */
11313 if (!TARGET_ALIGN_STRINGOPS)
11314 align = 32;
11316 if (GET_CODE (count_exp) == CONST_INT)
11318 count = INTVAL (count_exp);
11319 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11320 return 0;
11322 /* Figure out proper mode for counter. For 32bits it is always SImode,
11323 for 64bits use SImode when possible, otherwise DImode.
11324 Set count to number of bytes copied when known at compile time. */
11325 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11326 || x86_64_zero_extended_value (count_exp))
11327 counter_mode = SImode;
11328 else
11329 counter_mode = DImode;
11331 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11332 if (destreg != XEXP (dst, 0))
11333 dst = replace_equiv_address_nv (dst, destreg);
11335 emit_insn (gen_cld ());
11337 /* When optimizing for size emit simple rep ; movsb instruction for
11338 counts not divisible by 4. */
11340 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11342 countreg = ix86_zero_extend_to_Pmode (count_exp);
11343 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11344 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11345 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11347 else if (count != 0
11348 && (align >= 8
11349 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11350 || optimize_size || count < (unsigned int) 64))
11352 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11353 unsigned HOST_WIDE_INT offset = 0;
11355 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11356 if (count & ~(size - 1))
11358 countreg = copy_to_mode_reg (counter_mode,
11359 GEN_INT ((count >> (size == 4 ? 2 : 3))
11360 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11361 countreg = ix86_zero_extend_to_Pmode (countreg);
11362 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11363 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11364 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11365 offset = count & ~(size - 1);
11367 if (size == 8 && (count & 0x04))
11369 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11370 offset);
11371 emit_insn (gen_strset (destreg, mem,
11372 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11373 offset += 4;
11375 if (count & 0x02)
11377 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11378 offset);
11379 emit_insn (gen_strset (destreg, mem,
11380 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11381 offset += 2;
11383 if (count & 0x01)
11385 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11386 offset);
11387 emit_insn (gen_strset (destreg, mem,
11388 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11391 else
11393 rtx countreg2;
11394 rtx label = NULL;
11395 /* Compute desired alignment of the string operation. */
11396 int desired_alignment = (TARGET_PENTIUMPRO
11397 && (count == 0 || count >= (unsigned int) 260)
11398 ? 8 : UNITS_PER_WORD);
11400 /* In case we don't know anything about the alignment, default to
11401 library version, since it is usually equally fast and result in
11402 shorter code.
11404 Also emit call when we know that the count is large and call overhead
11405 will not be important. */
11406 if (!TARGET_INLINE_ALL_STRINGOPS
11407 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11408 return 0;
11410 if (TARGET_SINGLE_STRINGOP)
11411 emit_insn (gen_cld ());
11413 countreg2 = gen_reg_rtx (Pmode);
11414 countreg = copy_to_mode_reg (counter_mode, count_exp);
11415 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11416 /* Get rid of MEM_OFFSET, it won't be accurate. */
11417 dst = change_address (dst, BLKmode, destreg);
11419 if (count == 0 && align < desired_alignment)
11421 label = gen_label_rtx ();
11422 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11423 LEU, 0, counter_mode, 1, label);
11425 if (align <= 1)
11427 rtx label = ix86_expand_aligntest (destreg, 1);
11428 emit_insn (gen_strset (destreg, dst,
11429 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11430 ix86_adjust_counter (countreg, 1);
11431 emit_label (label);
11432 LABEL_NUSES (label) = 1;
11434 if (align <= 2)
11436 rtx label = ix86_expand_aligntest (destreg, 2);
11437 emit_insn (gen_strset (destreg, dst,
11438 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11439 ix86_adjust_counter (countreg, 2);
11440 emit_label (label);
11441 LABEL_NUSES (label) = 1;
11443 if (align <= 4 && desired_alignment > 4)
11445 rtx label = ix86_expand_aligntest (destreg, 4);
11446 emit_insn (gen_strset (destreg, dst,
11447 (TARGET_64BIT
11448 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11449 : zeroreg)));
11450 ix86_adjust_counter (countreg, 4);
11451 emit_label (label);
11452 LABEL_NUSES (label) = 1;
11455 if (label && desired_alignment > 4 && !TARGET_64BIT)
11457 emit_label (label);
11458 LABEL_NUSES (label) = 1;
11459 label = NULL_RTX;
11462 if (!TARGET_SINGLE_STRINGOP)
11463 emit_insn (gen_cld ());
11464 if (TARGET_64BIT)
11466 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11467 GEN_INT (3)));
11468 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11470 else
11472 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11473 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11475 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11476 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11478 if (label)
11480 emit_label (label);
11481 LABEL_NUSES (label) = 1;
11484 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11485 emit_insn (gen_strset (destreg, dst,
11486 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11487 if (TARGET_64BIT && (align <= 4 || count == 0))
11489 rtx label = ix86_expand_aligntest (countreg, 4);
11490 emit_insn (gen_strset (destreg, dst,
11491 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11492 emit_label (label);
11493 LABEL_NUSES (label) = 1;
11495 if (align > 2 && count != 0 && (count & 2))
11496 emit_insn (gen_strset (destreg, dst,
11497 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11498 if (align <= 2 || count == 0)
11500 rtx label = ix86_expand_aligntest (countreg, 2);
11501 emit_insn (gen_strset (destreg, dst,
11502 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11503 emit_label (label);
11504 LABEL_NUSES (label) = 1;
11506 if (align > 1 && count != 0 && (count & 1))
11507 emit_insn (gen_strset (destreg, dst,
11508 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11509 if (align <= 1 || count == 0)
11511 rtx label = ix86_expand_aligntest (countreg, 1);
11512 emit_insn (gen_strset (destreg, dst,
11513 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11514 emit_label (label);
11515 LABEL_NUSES (label) = 1;
11518 return 1;
11521 /* Expand strlen. */
11523 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11525 rtx addr, scratch1, scratch2, scratch3, scratch4;
11527 /* The generic case of strlen expander is long. Avoid it's
11528 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11530 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11531 && !TARGET_INLINE_ALL_STRINGOPS
11532 && !optimize_size
11533 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11534 return 0;
11536 addr = force_reg (Pmode, XEXP (src, 0));
11537 scratch1 = gen_reg_rtx (Pmode);
11539 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11540 && !optimize_size)
11542 /* Well it seems that some optimizer does not combine a call like
11543 foo(strlen(bar), strlen(bar));
11544 when the move and the subtraction is done here. It does calculate
11545 the length just once when these instructions are done inside of
11546 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11547 often used and I use one fewer register for the lifetime of
11548 output_strlen_unroll() this is better. */
11550 emit_move_insn (out, addr);
11552 ix86_expand_strlensi_unroll_1 (out, src, align);
11554 /* strlensi_unroll_1 returns the address of the zero at the end of
11555 the string, like memchr(), so compute the length by subtracting
11556 the start address. */
11557 if (TARGET_64BIT)
11558 emit_insn (gen_subdi3 (out, out, addr));
11559 else
11560 emit_insn (gen_subsi3 (out, out, addr));
11562 else
11564 rtx unspec;
11565 scratch2 = gen_reg_rtx (Pmode);
11566 scratch3 = gen_reg_rtx (Pmode);
11567 scratch4 = force_reg (Pmode, constm1_rtx);
11569 emit_move_insn (scratch3, addr);
11570 eoschar = force_reg (QImode, eoschar);
11572 emit_insn (gen_cld ());
11573 src = replace_equiv_address_nv (src, scratch3);
11575 /* If .md starts supporting :P, this can be done in .md. */
11576 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11577 scratch4), UNSPEC_SCAS);
11578 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11579 if (TARGET_64BIT)
11581 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11582 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11584 else
11586 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11587 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11590 return 1;
11593 /* Expand the appropriate insns for doing strlen if not just doing
11594 repnz; scasb
11596 out = result, initialized with the start address
11597 align_rtx = alignment of the address.
11598 scratch = scratch register, initialized with the startaddress when
11599 not aligned, otherwise undefined
11601 This is just the body. It needs the initializations mentioned above and
11602 some address computing at the end. These things are done in i386.md. */
11604 static void
11605 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11607 int align;
11608 rtx tmp;
11609 rtx align_2_label = NULL_RTX;
11610 rtx align_3_label = NULL_RTX;
11611 rtx align_4_label = gen_label_rtx ();
11612 rtx end_0_label = gen_label_rtx ();
11613 rtx mem;
11614 rtx tmpreg = gen_reg_rtx (SImode);
11615 rtx scratch = gen_reg_rtx (SImode);
11616 rtx cmp;
11618 align = 0;
11619 if (GET_CODE (align_rtx) == CONST_INT)
11620 align = INTVAL (align_rtx);
11622 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11624 /* Is there a known alignment and is it less than 4? */
11625 if (align < 4)
11627 rtx scratch1 = gen_reg_rtx (Pmode);
11628 emit_move_insn (scratch1, out);
11629 /* Is there a known alignment and is it not 2? */
11630 if (align != 2)
11632 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11633 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11635 /* Leave just the 3 lower bits. */
11636 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11637 NULL_RTX, 0, OPTAB_WIDEN);
11639 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11640 Pmode, 1, align_4_label);
11641 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11642 Pmode, 1, align_2_label);
11643 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11644 Pmode, 1, align_3_label);
11646 else
11648 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11649 check if is aligned to 4 - byte. */
11651 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11652 NULL_RTX, 0, OPTAB_WIDEN);
11654 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11655 Pmode, 1, align_4_label);
11658 mem = change_address (src, QImode, out);
11660 /* Now compare the bytes. */
11662 /* Compare the first n unaligned byte on a byte per byte basis. */
11663 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11664 QImode, 1, end_0_label);
11666 /* Increment the address. */
11667 if (TARGET_64BIT)
11668 emit_insn (gen_adddi3 (out, out, const1_rtx));
11669 else
11670 emit_insn (gen_addsi3 (out, out, const1_rtx));
11672 /* Not needed with an alignment of 2 */
11673 if (align != 2)
11675 emit_label (align_2_label);
11677 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11678 end_0_label);
11680 if (TARGET_64BIT)
11681 emit_insn (gen_adddi3 (out, out, const1_rtx));
11682 else
11683 emit_insn (gen_addsi3 (out, out, const1_rtx));
11685 emit_label (align_3_label);
11688 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11689 end_0_label);
11691 if (TARGET_64BIT)
11692 emit_insn (gen_adddi3 (out, out, const1_rtx));
11693 else
11694 emit_insn (gen_addsi3 (out, out, const1_rtx));
11697 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11698 align this loop. It gives only huge programs, but does not help to
11699 speed up. */
11700 emit_label (align_4_label);
11702 mem = change_address (src, SImode, out);
11703 emit_move_insn (scratch, mem);
11704 if (TARGET_64BIT)
11705 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11706 else
11707 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11709 /* This formula yields a nonzero result iff one of the bytes is zero.
11710 This saves three branches inside loop and many cycles. */
11712 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11713 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11714 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11715 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11716 gen_int_mode (0x80808080, SImode)));
11717 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11718 align_4_label);
11720 if (TARGET_CMOVE)
11722 rtx reg = gen_reg_rtx (SImode);
11723 rtx reg2 = gen_reg_rtx (Pmode);
11724 emit_move_insn (reg, tmpreg);
11725 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11727 /* If zero is not in the first two bytes, move two bytes forward. */
11728 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11729 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11730 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11731 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11732 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11733 reg,
11734 tmpreg)));
11735 /* Emit lea manually to avoid clobbering of flags. */
11736 emit_insn (gen_rtx_SET (SImode, reg2,
11737 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11739 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11740 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11741 emit_insn (gen_rtx_SET (VOIDmode, out,
11742 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11743 reg2,
11744 out)));
11747 else
11749 rtx end_2_label = gen_label_rtx ();
11750 /* Is zero in the first two bytes? */
11752 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11753 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11754 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11755 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11756 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11757 pc_rtx);
11758 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11759 JUMP_LABEL (tmp) = end_2_label;
11761 /* Not in the first two. Move two bytes forward. */
11762 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11763 if (TARGET_64BIT)
11764 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11765 else
11766 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11768 emit_label (end_2_label);
11772 /* Avoid branch in fixing the byte. */
11773 tmpreg = gen_lowpart (QImode, tmpreg);
11774 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11775 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11776 if (TARGET_64BIT)
11777 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11778 else
11779 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11781 emit_label (end_0_label);
11784 void
11785 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11786 rtx callarg2 ATTRIBUTE_UNUSED,
11787 rtx pop, int sibcall)
11789 rtx use = NULL, call;
11791 if (pop == const0_rtx)
11792 pop = NULL;
11793 if (TARGET_64BIT && pop)
11794 abort ();
11796 #if TARGET_MACHO
11797 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11798 fnaddr = machopic_indirect_call_target (fnaddr);
11799 #else
11800 /* Static functions and indirect calls don't need the pic register. */
11801 if (! TARGET_64BIT && flag_pic
11802 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11803 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11804 use_reg (&use, pic_offset_table_rtx);
11806 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11808 rtx al = gen_rtx_REG (QImode, 0);
11809 emit_move_insn (al, callarg2);
11810 use_reg (&use, al);
11812 #endif /* TARGET_MACHO */
11814 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11816 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11817 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11819 if (sibcall && TARGET_64BIT
11820 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11822 rtx addr;
11823 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11824 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11825 emit_move_insn (fnaddr, addr);
11826 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11829 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11830 if (retval)
11831 call = gen_rtx_SET (VOIDmode, retval, call);
11832 if (pop)
11834 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11835 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11836 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11839 call = emit_call_insn (call);
11840 if (use)
11841 CALL_INSN_FUNCTION_USAGE (call) = use;
11845 /* Clear stack slot assignments remembered from previous functions.
11846 This is called from INIT_EXPANDERS once before RTL is emitted for each
11847 function. */
11849 static struct machine_function *
11850 ix86_init_machine_status (void)
11852 struct machine_function *f;
11854 f = ggc_alloc_cleared (sizeof (struct machine_function));
11855 f->use_fast_prologue_epilogue_nregs = -1;
11857 return f;
11860 /* Return a MEM corresponding to a stack slot with mode MODE.
11861 Allocate a new slot if necessary.
11863 The RTL for a function can have several slots available: N is
11864 which slot to use. */
11867 assign_386_stack_local (enum machine_mode mode, int n)
11869 struct stack_local_entry *s;
11871 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11872 abort ();
11874 for (s = ix86_stack_locals; s; s = s->next)
11875 if (s->mode == mode && s->n == n)
11876 return s->rtl;
11878 s = (struct stack_local_entry *)
11879 ggc_alloc (sizeof (struct stack_local_entry));
11880 s->n = n;
11881 s->mode = mode;
11882 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11884 s->next = ix86_stack_locals;
11885 ix86_stack_locals = s;
11886 return s->rtl;
11889 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11891 static GTY(()) rtx ix86_tls_symbol;
11893 ix86_tls_get_addr (void)
11896 if (!ix86_tls_symbol)
11898 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11899 (TARGET_GNU_TLS && !TARGET_64BIT)
11900 ? "___tls_get_addr"
11901 : "__tls_get_addr");
11904 return ix86_tls_symbol;
11907 /* Calculate the length of the memory address in the instruction
11908 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11910 static int
11911 memory_address_length (rtx addr)
11913 struct ix86_address parts;
11914 rtx base, index, disp;
11915 int len;
11917 if (GET_CODE (addr) == PRE_DEC
11918 || GET_CODE (addr) == POST_INC
11919 || GET_CODE (addr) == PRE_MODIFY
11920 || GET_CODE (addr) == POST_MODIFY)
11921 return 0;
11923 if (! ix86_decompose_address (addr, &parts))
11924 abort ();
11926 base = parts.base;
11927 index = parts.index;
11928 disp = parts.disp;
11929 len = 0;
11931 /* Rule of thumb:
11932 - esp as the base always wants an index,
11933 - ebp as the base always wants a displacement. */
11935 /* Register Indirect. */
11936 if (base && !index && !disp)
11938 /* esp (for its index) and ebp (for its displacement) need
11939 the two-byte modrm form. */
11940 if (addr == stack_pointer_rtx
11941 || addr == arg_pointer_rtx
11942 || addr == frame_pointer_rtx
11943 || addr == hard_frame_pointer_rtx)
11944 len = 1;
11947 /* Direct Addressing. */
11948 else if (disp && !base && !index)
11949 len = 4;
11951 else
11953 /* Find the length of the displacement constant. */
11954 if (disp)
11956 if (GET_CODE (disp) == CONST_INT
11957 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11958 && base)
11959 len = 1;
11960 else
11961 len = 4;
11963 /* ebp always wants a displacement. */
11964 else if (base == hard_frame_pointer_rtx)
11965 len = 1;
11967 /* An index requires the two-byte modrm form.... */
11968 if (index
11969 /* ...like esp, which always wants an index. */
11970 || base == stack_pointer_rtx
11971 || base == arg_pointer_rtx
11972 || base == frame_pointer_rtx)
11973 len += 1;
11976 return len;
11979 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11980 is set, expect that insn have 8bit immediate alternative. */
11982 ix86_attr_length_immediate_default (rtx insn, int shortform)
11984 int len = 0;
11985 int i;
11986 extract_insn_cached (insn);
11987 for (i = recog_data.n_operands - 1; i >= 0; --i)
11988 if (CONSTANT_P (recog_data.operand[i]))
11990 if (len)
11991 abort ();
11992 if (shortform
11993 && GET_CODE (recog_data.operand[i]) == CONST_INT
11994 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11995 len = 1;
11996 else
11998 switch (get_attr_mode (insn))
12000 case MODE_QI:
12001 len+=1;
12002 break;
12003 case MODE_HI:
12004 len+=2;
12005 break;
12006 case MODE_SI:
12007 len+=4;
12008 break;
12009 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12010 case MODE_DI:
12011 len+=4;
12012 break;
12013 default:
12014 fatal_insn ("unknown insn mode", insn);
12018 return len;
12020 /* Compute default value for "length_address" attribute. */
12022 ix86_attr_length_address_default (rtx insn)
12024 int i;
12026 if (get_attr_type (insn) == TYPE_LEA)
12028 rtx set = PATTERN (insn);
12029 if (GET_CODE (set) == SET)
12031 else if (GET_CODE (set) == PARALLEL
12032 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12033 set = XVECEXP (set, 0, 0);
12034 else
12036 #ifdef ENABLE_CHECKING
12037 abort ();
12038 #endif
12039 return 0;
12042 return memory_address_length (SET_SRC (set));
12045 extract_insn_cached (insn);
12046 for (i = recog_data.n_operands - 1; i >= 0; --i)
12047 if (GET_CODE (recog_data.operand[i]) == MEM)
12049 return memory_address_length (XEXP (recog_data.operand[i], 0));
12050 break;
12052 return 0;
12055 /* Return the maximum number of instructions a cpu can issue. */
12057 static int
12058 ix86_issue_rate (void)
12060 switch (ix86_tune)
12062 case PROCESSOR_PENTIUM:
12063 case PROCESSOR_K6:
12064 return 2;
12066 case PROCESSOR_PENTIUMPRO:
12067 case PROCESSOR_PENTIUM4:
12068 case PROCESSOR_ATHLON:
12069 case PROCESSOR_K8:
12070 return 3;
12072 default:
12073 return 1;
12077 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12078 by DEP_INSN and nothing set by DEP_INSN. */
12080 static int
12081 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12083 rtx set, set2;
12085 /* Simplify the test for uninteresting insns. */
12086 if (insn_type != TYPE_SETCC
12087 && insn_type != TYPE_ICMOV
12088 && insn_type != TYPE_FCMOV
12089 && insn_type != TYPE_IBR)
12090 return 0;
12092 if ((set = single_set (dep_insn)) != 0)
12094 set = SET_DEST (set);
12095 set2 = NULL_RTX;
12097 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12098 && XVECLEN (PATTERN (dep_insn), 0) == 2
12099 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12100 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12102 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12103 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12105 else
12106 return 0;
12108 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12109 return 0;
12111 /* This test is true if the dependent insn reads the flags but
12112 not any other potentially set register. */
12113 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12114 return 0;
12116 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12117 return 0;
12119 return 1;
12122 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12123 address with operands set by DEP_INSN. */
12125 static int
12126 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12128 rtx addr;
12130 if (insn_type == TYPE_LEA
12131 && TARGET_PENTIUM)
12133 addr = PATTERN (insn);
12134 if (GET_CODE (addr) == SET)
12136 else if (GET_CODE (addr) == PARALLEL
12137 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12138 addr = XVECEXP (addr, 0, 0);
12139 else
12140 abort ();
12141 addr = SET_SRC (addr);
12143 else
12145 int i;
12146 extract_insn_cached (insn);
12147 for (i = recog_data.n_operands - 1; i >= 0; --i)
12148 if (GET_CODE (recog_data.operand[i]) == MEM)
12150 addr = XEXP (recog_data.operand[i], 0);
12151 goto found;
12153 return 0;
12154 found:;
12157 return modified_in_p (addr, dep_insn);
12160 static int
12161 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12163 enum attr_type insn_type, dep_insn_type;
12164 enum attr_memory memory, dep_memory;
12165 rtx set, set2;
12166 int dep_insn_code_number;
12168 /* Anti and output dependencies have zero cost on all CPUs. */
12169 if (REG_NOTE_KIND (link) != 0)
12170 return 0;
12172 dep_insn_code_number = recog_memoized (dep_insn);
12174 /* If we can't recognize the insns, we can't really do anything. */
12175 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12176 return cost;
12178 insn_type = get_attr_type (insn);
12179 dep_insn_type = get_attr_type (dep_insn);
12181 switch (ix86_tune)
12183 case PROCESSOR_PENTIUM:
12184 /* Address Generation Interlock adds a cycle of latency. */
12185 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12186 cost += 1;
12188 /* ??? Compares pair with jump/setcc. */
12189 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12190 cost = 0;
12192 /* Floating point stores require value to be ready one cycle earlier. */
12193 if (insn_type == TYPE_FMOV
12194 && get_attr_memory (insn) == MEMORY_STORE
12195 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12196 cost += 1;
12197 break;
12199 case PROCESSOR_PENTIUMPRO:
12200 memory = get_attr_memory (insn);
12201 dep_memory = get_attr_memory (dep_insn);
12203 /* Since we can't represent delayed latencies of load+operation,
12204 increase the cost here for non-imov insns. */
12205 if (dep_insn_type != TYPE_IMOV
12206 && dep_insn_type != TYPE_FMOV
12207 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12208 cost += 1;
12210 /* INT->FP conversion is expensive. */
12211 if (get_attr_fp_int_src (dep_insn))
12212 cost += 5;
12214 /* There is one cycle extra latency between an FP op and a store. */
12215 if (insn_type == TYPE_FMOV
12216 && (set = single_set (dep_insn)) != NULL_RTX
12217 && (set2 = single_set (insn)) != NULL_RTX
12218 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12219 && GET_CODE (SET_DEST (set2)) == MEM)
12220 cost += 1;
12222 /* Show ability of reorder buffer to hide latency of load by executing
12223 in parallel with previous instruction in case
12224 previous instruction is not needed to compute the address. */
12225 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12226 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12228 /* Claim moves to take one cycle, as core can issue one load
12229 at time and the next load can start cycle later. */
12230 if (dep_insn_type == TYPE_IMOV
12231 || dep_insn_type == TYPE_FMOV)
12232 cost = 1;
12233 else if (cost > 1)
12234 cost--;
12236 break;
12238 case PROCESSOR_K6:
12239 memory = get_attr_memory (insn);
12240 dep_memory = get_attr_memory (dep_insn);
12241 /* The esp dependency is resolved before the instruction is really
12242 finished. */
12243 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12244 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12245 return 1;
12247 /* Since we can't represent delayed latencies of load+operation,
12248 increase the cost here for non-imov insns. */
12249 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12250 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12252 /* INT->FP conversion is expensive. */
12253 if (get_attr_fp_int_src (dep_insn))
12254 cost += 5;
12256 /* Show ability of reorder buffer to hide latency of load by executing
12257 in parallel with previous instruction in case
12258 previous instruction is not needed to compute the address. */
12259 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12260 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12262 /* Claim moves to take one cycle, as core can issue one load
12263 at time and the next load can start cycle later. */
12264 if (dep_insn_type == TYPE_IMOV
12265 || dep_insn_type == TYPE_FMOV)
12266 cost = 1;
12267 else if (cost > 2)
12268 cost -= 2;
12269 else
12270 cost = 1;
12272 break;
12274 case PROCESSOR_ATHLON:
12275 case PROCESSOR_K8:
12276 memory = get_attr_memory (insn);
12277 dep_memory = get_attr_memory (dep_insn);
12279 /* Show ability of reorder buffer to hide latency of load by executing
12280 in parallel with previous instruction in case
12281 previous instruction is not needed to compute the address. */
12282 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12283 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12285 enum attr_unit unit = get_attr_unit (insn);
12286 int loadcost = 3;
12288 /* Because of the difference between the length of integer and
12289 floating unit pipeline preparation stages, the memory operands
12290 for floating point are cheaper.
12292 ??? For Athlon it the difference is most probably 2. */
12293 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12294 loadcost = 3;
12295 else
12296 loadcost = TARGET_ATHLON ? 2 : 0;
12298 if (cost >= loadcost)
12299 cost -= loadcost;
12300 else
12301 cost = 0;
12304 default:
12305 break;
12308 return cost;
12311 static union
12313 struct ppro_sched_data
12315 rtx decode[3];
12316 int issued_this_cycle;
12317 } ppro;
12318 } ix86_sched_data;
12320 static enum attr_ppro_uops
12321 ix86_safe_ppro_uops (rtx insn)
12323 if (recog_memoized (insn) >= 0)
12324 return get_attr_ppro_uops (insn);
12325 else
12326 return PPRO_UOPS_MANY;
12329 static void
12330 ix86_dump_ppro_packet (FILE *dump)
12332 if (ix86_sched_data.ppro.decode[0])
12334 fprintf (dump, "PPRO packet: %d",
12335 INSN_UID (ix86_sched_data.ppro.decode[0]));
12336 if (ix86_sched_data.ppro.decode[1])
12337 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12338 if (ix86_sched_data.ppro.decode[2])
12339 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12340 fputc ('\n', dump);
12344 /* We're beginning a new block. Initialize data structures as necessary. */
12346 static void
12347 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12348 int sched_verbose ATTRIBUTE_UNUSED,
12349 int veclen ATTRIBUTE_UNUSED)
12351 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12354 /* Shift INSN to SLOT, and shift everything else down. */
12356 static void
12357 ix86_reorder_insn (rtx *insnp, rtx *slot)
12359 if (insnp != slot)
12361 rtx insn = *insnp;
12363 insnp[0] = insnp[1];
12364 while (++insnp != slot);
12365 *insnp = insn;
12369 static void
12370 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12372 rtx decode[3];
12373 enum attr_ppro_uops cur_uops;
12374 int issued_this_cycle;
12375 rtx *insnp;
12376 int i;
12378 /* At this point .ppro.decode contains the state of the three
12379 decoders from last "cycle". That is, those insns that were
12380 actually independent. But here we're scheduling for the
12381 decoder, and we may find things that are decodable in the
12382 same cycle. */
12384 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12385 issued_this_cycle = 0;
12387 insnp = e_ready;
12388 cur_uops = ix86_safe_ppro_uops (*insnp);
12390 /* If the decoders are empty, and we've a complex insn at the
12391 head of the priority queue, let it issue without complaint. */
12392 if (decode[0] == NULL)
12394 if (cur_uops == PPRO_UOPS_MANY)
12396 decode[0] = *insnp;
12397 goto ppro_done;
12400 /* Otherwise, search for a 2-4 uop unsn to issue. */
12401 while (cur_uops != PPRO_UOPS_FEW)
12403 if (insnp == ready)
12404 break;
12405 cur_uops = ix86_safe_ppro_uops (*--insnp);
12408 /* If so, move it to the head of the line. */
12409 if (cur_uops == PPRO_UOPS_FEW)
12410 ix86_reorder_insn (insnp, e_ready);
12412 /* Issue the head of the queue. */
12413 issued_this_cycle = 1;
12414 decode[0] = *e_ready--;
12417 /* Look for simple insns to fill in the other two slots. */
12418 for (i = 1; i < 3; ++i)
12419 if (decode[i] == NULL)
12421 if (ready > e_ready)
12422 goto ppro_done;
12424 insnp = e_ready;
12425 cur_uops = ix86_safe_ppro_uops (*insnp);
12426 while (cur_uops != PPRO_UOPS_ONE)
12428 if (insnp == ready)
12429 break;
12430 cur_uops = ix86_safe_ppro_uops (*--insnp);
12433 /* Found one. Move it to the head of the queue and issue it. */
12434 if (cur_uops == PPRO_UOPS_ONE)
12436 ix86_reorder_insn (insnp, e_ready);
12437 decode[i] = *e_ready--;
12438 issued_this_cycle++;
12439 continue;
12442 /* ??? Didn't find one. Ideally, here we would do a lazy split
12443 of 2-uop insns, issue one and queue the other. */
12446 ppro_done:
12447 if (issued_this_cycle == 0)
12448 issued_this_cycle = 1;
12449 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12452 /* We are about to being issuing insns for this clock cycle.
12453 Override the default sort algorithm to better slot instructions. */
12454 static int
12455 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12456 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12457 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12459 int n_ready = *n_readyp;
12460 rtx *e_ready = ready + n_ready - 1;
12462 /* Make sure to go ahead and initialize key items in
12463 ix86_sched_data if we are not going to bother trying to
12464 reorder the ready queue. */
12465 if (n_ready < 2)
12467 ix86_sched_data.ppro.issued_this_cycle = 1;
12468 goto out;
12471 switch (ix86_tune)
12473 default:
12474 break;
12476 case PROCESSOR_PENTIUMPRO:
12477 ix86_sched_reorder_ppro (ready, e_ready);
12478 break;
12481 out:
12482 return ix86_issue_rate ();
12485 /* We are about to issue INSN. Return the number of insns left on the
12486 ready queue that can be issued this cycle. */
12488 static int
12489 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12490 int can_issue_more)
12492 int i;
12493 switch (ix86_tune)
12495 default:
12496 return can_issue_more - 1;
12498 case PROCESSOR_PENTIUMPRO:
12500 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12502 if (uops == PPRO_UOPS_MANY)
12504 if (sched_verbose)
12505 ix86_dump_ppro_packet (dump);
12506 ix86_sched_data.ppro.decode[0] = insn;
12507 ix86_sched_data.ppro.decode[1] = NULL;
12508 ix86_sched_data.ppro.decode[2] = NULL;
12509 if (sched_verbose)
12510 ix86_dump_ppro_packet (dump);
12511 ix86_sched_data.ppro.decode[0] = NULL;
12513 else if (uops == PPRO_UOPS_FEW)
12515 if (sched_verbose)
12516 ix86_dump_ppro_packet (dump);
12517 ix86_sched_data.ppro.decode[0] = insn;
12518 ix86_sched_data.ppro.decode[1] = NULL;
12519 ix86_sched_data.ppro.decode[2] = NULL;
12521 else
12523 for (i = 0; i < 3; ++i)
12524 if (ix86_sched_data.ppro.decode[i] == NULL)
12526 ix86_sched_data.ppro.decode[i] = insn;
12527 break;
12529 if (i == 3)
12530 abort ();
12531 if (i == 2)
12533 if (sched_verbose)
12534 ix86_dump_ppro_packet (dump);
12535 ix86_sched_data.ppro.decode[0] = NULL;
12536 ix86_sched_data.ppro.decode[1] = NULL;
12537 ix86_sched_data.ppro.decode[2] = NULL;
12541 return --ix86_sched_data.ppro.issued_this_cycle;
12545 static int
12546 ia32_use_dfa_pipeline_interface (void)
12548 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12549 return 1;
12550 return 0;
12553 /* How many alternative schedules to try. This should be as wide as the
12554 scheduling freedom in the DFA, but no wider. Making this value too
12555 large results extra work for the scheduler. */
12557 static int
12558 ia32_multipass_dfa_lookahead (void)
12560 if (ix86_tune == PROCESSOR_PENTIUM)
12561 return 2;
12562 else
12563 return 0;
12567 /* Compute the alignment given to a constant that is being placed in memory.
12568 EXP is the constant and ALIGN is the alignment that the object would
12569 ordinarily have.
12570 The value of this function is used instead of that alignment to align
12571 the object. */
12574 ix86_constant_alignment (tree exp, int align)
12576 if (TREE_CODE (exp) == REAL_CST)
12578 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12579 return 64;
12580 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12581 return 128;
12583 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12584 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12585 return BITS_PER_WORD;
12587 return align;
12590 /* Compute the alignment for a static variable.
12591 TYPE is the data type, and ALIGN is the alignment that
12592 the object would ordinarily have. The value of this function is used
12593 instead of that alignment to align the object. */
12596 ix86_data_alignment (tree type, int align)
12598 if (AGGREGATE_TYPE_P (type)
12599 && TYPE_SIZE (type)
12600 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12601 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12602 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12603 return 256;
12605 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12606 to 16byte boundary. */
12607 if (TARGET_64BIT)
12609 if (AGGREGATE_TYPE_P (type)
12610 && TYPE_SIZE (type)
12611 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12612 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12613 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12614 return 128;
12617 if (TREE_CODE (type) == ARRAY_TYPE)
12619 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12620 return 64;
12621 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12622 return 128;
12624 else if (TREE_CODE (type) == COMPLEX_TYPE)
12627 if (TYPE_MODE (type) == DCmode && align < 64)
12628 return 64;
12629 if (TYPE_MODE (type) == XCmode && align < 128)
12630 return 128;
12632 else if ((TREE_CODE (type) == RECORD_TYPE
12633 || TREE_CODE (type) == UNION_TYPE
12634 || TREE_CODE (type) == QUAL_UNION_TYPE)
12635 && TYPE_FIELDS (type))
12637 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12638 return 64;
12639 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12640 return 128;
12642 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12643 || TREE_CODE (type) == INTEGER_TYPE)
12645 if (TYPE_MODE (type) == DFmode && align < 64)
12646 return 64;
12647 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12648 return 128;
12651 return align;
12654 /* Compute the alignment for a local variable.
12655 TYPE is the data type, and ALIGN is the alignment that
12656 the object would ordinarily have. The value of this macro is used
12657 instead of that alignment to align the object. */
12660 ix86_local_alignment (tree type, int align)
12662 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12663 to 16byte boundary. */
12664 if (TARGET_64BIT)
12666 if (AGGREGATE_TYPE_P (type)
12667 && TYPE_SIZE (type)
12668 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12669 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12670 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12671 return 128;
12673 if (TREE_CODE (type) == ARRAY_TYPE)
12675 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12676 return 64;
12677 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12678 return 128;
12680 else if (TREE_CODE (type) == COMPLEX_TYPE)
12682 if (TYPE_MODE (type) == DCmode && align < 64)
12683 return 64;
12684 if (TYPE_MODE (type) == XCmode && align < 128)
12685 return 128;
12687 else if ((TREE_CODE (type) == RECORD_TYPE
12688 || TREE_CODE (type) == UNION_TYPE
12689 || TREE_CODE (type) == QUAL_UNION_TYPE)
12690 && TYPE_FIELDS (type))
12692 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12693 return 64;
12694 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12695 return 128;
12697 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12698 || TREE_CODE (type) == INTEGER_TYPE)
12701 if (TYPE_MODE (type) == DFmode && align < 64)
12702 return 64;
12703 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12704 return 128;
12706 return align;
12709 /* Emit RTL insns to initialize the variable parts of a trampoline.
12710 FNADDR is an RTX for the address of the function's pure code.
12711 CXT is an RTX for the static chain value for the function. */
12712 void
12713 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12715 if (!TARGET_64BIT)
12717 /* Compute offset from the end of the jmp to the target function. */
12718 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12719 plus_constant (tramp, 10),
12720 NULL_RTX, 1, OPTAB_DIRECT);
12721 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12722 gen_int_mode (0xb9, QImode));
12723 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12724 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12725 gen_int_mode (0xe9, QImode));
12726 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12728 else
12730 int offset = 0;
12731 /* Try to load address using shorter movl instead of movabs.
12732 We may want to support movq for kernel mode, but kernel does not use
12733 trampolines at the moment. */
12734 if (x86_64_zero_extended_value (fnaddr))
12736 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12737 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12738 gen_int_mode (0xbb41, HImode));
12739 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12740 gen_lowpart (SImode, fnaddr));
12741 offset += 6;
12743 else
12745 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12746 gen_int_mode (0xbb49, HImode));
12747 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12748 fnaddr);
12749 offset += 10;
12751 /* Load static chain using movabs to r10. */
12752 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12753 gen_int_mode (0xba49, HImode));
12754 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12755 cxt);
12756 offset += 10;
12757 /* Jump to the r11 */
12758 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12759 gen_int_mode (0xff49, HImode));
12760 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12761 gen_int_mode (0xe3, QImode));
12762 offset += 3;
12763 if (offset > TRAMPOLINE_SIZE)
12764 abort ();
12767 #ifdef TRANSFER_FROM_TRAMPOLINE
12768 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12769 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12770 #endif
12773 #define def_builtin(MASK, NAME, TYPE, CODE) \
12774 do { \
12775 if ((MASK) & target_flags \
12776 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12777 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12778 NULL, NULL_TREE); \
12779 } while (0)
12781 struct builtin_description
12783 const unsigned int mask;
12784 const enum insn_code icode;
12785 const char *const name;
12786 const enum ix86_builtins code;
12787 const enum rtx_code comparison;
12788 const unsigned int flag;
12791 static const struct builtin_description bdesc_comi[] =
12793 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12794 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12795 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12796 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12797 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12798 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12799 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12800 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12801 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12802 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12803 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12804 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12812 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12816 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12819 static const struct builtin_description bdesc_2arg[] =
12821 /* SSE */
12822 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12823 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12824 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12825 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12826 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12827 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12828 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12829 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12831 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12832 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12833 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12834 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12835 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12836 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12837 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12838 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12839 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12840 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12841 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12842 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12843 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12844 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12845 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12846 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12847 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12848 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12849 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12850 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12852 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12854 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12857 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12860 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12862 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12863 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12864 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12865 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12866 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12868 /* MMX */
12869 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12870 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12871 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12872 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12873 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12874 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12876 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12878 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12879 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12880 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12881 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12882 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12883 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12884 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12885 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12887 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12888 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12889 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12891 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12892 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12893 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12894 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12896 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12897 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12899 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12900 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12901 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12902 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12903 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12904 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12906 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12907 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12908 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12909 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12913 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12915 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12916 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12918 /* Special. */
12919 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12920 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12921 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12923 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12924 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12925 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12927 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12928 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12931 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12934 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12935 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12936 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12937 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12938 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12941 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12942 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12943 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12944 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12946 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12947 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12949 /* SSE2 */
12950 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12960 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12961 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12962 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12963 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12964 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12965 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12966 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12967 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12968 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12969 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12970 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12971 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12972 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12973 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12974 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12975 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12976 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12977 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12978 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12980 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12994 /* SSE2 MMX */
12995 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13004 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13005 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13006 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13007 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13008 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13009 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13010 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13011 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13073 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13075 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13076 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13077 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13078 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13080 /* SSE3 MMX */
13081 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13082 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13083 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13084 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13085 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13086 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13089 static const struct builtin_description bdesc_1arg[] =
13091 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13092 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13094 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13095 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13096 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13098 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13099 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13100 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13101 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13102 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13103 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13113 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13117 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13121 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13125 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13126 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13128 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13129 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13132 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13134 /* SSE3 */
13135 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13136 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13137 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13140 void
13141 ix86_init_builtins (void)
13143 if (TARGET_MMX)
13144 ix86_init_mmx_sse_builtins ();
13147 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13148 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13149 builtins. */
13150 static void
13151 ix86_init_mmx_sse_builtins (void)
13153 const struct builtin_description * d;
13154 size_t i;
13156 tree pchar_type_node = build_pointer_type (char_type_node);
13157 tree pcchar_type_node = build_pointer_type (
13158 build_type_variant (char_type_node, 1, 0));
13159 tree pfloat_type_node = build_pointer_type (float_type_node);
13160 tree pcfloat_type_node = build_pointer_type (
13161 build_type_variant (float_type_node, 1, 0));
13162 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13163 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13164 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13166 /* Comparisons. */
13167 tree int_ftype_v4sf_v4sf
13168 = build_function_type_list (integer_type_node,
13169 V4SF_type_node, V4SF_type_node, NULL_TREE);
13170 tree v4si_ftype_v4sf_v4sf
13171 = build_function_type_list (V4SI_type_node,
13172 V4SF_type_node, V4SF_type_node, NULL_TREE);
13173 /* MMX/SSE/integer conversions. */
13174 tree int_ftype_v4sf
13175 = build_function_type_list (integer_type_node,
13176 V4SF_type_node, NULL_TREE);
13177 tree int64_ftype_v4sf
13178 = build_function_type_list (long_long_integer_type_node,
13179 V4SF_type_node, NULL_TREE);
13180 tree int_ftype_v8qi
13181 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13182 tree v4sf_ftype_v4sf_int
13183 = build_function_type_list (V4SF_type_node,
13184 V4SF_type_node, integer_type_node, NULL_TREE);
13185 tree v4sf_ftype_v4sf_int64
13186 = build_function_type_list (V4SF_type_node,
13187 V4SF_type_node, long_long_integer_type_node,
13188 NULL_TREE);
13189 tree v4sf_ftype_v4sf_v2si
13190 = build_function_type_list (V4SF_type_node,
13191 V4SF_type_node, V2SI_type_node, NULL_TREE);
13192 tree int_ftype_v4hi_int
13193 = build_function_type_list (integer_type_node,
13194 V4HI_type_node, integer_type_node, NULL_TREE);
13195 tree v4hi_ftype_v4hi_int_int
13196 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13197 integer_type_node, integer_type_node,
13198 NULL_TREE);
13199 /* Miscellaneous. */
13200 tree v8qi_ftype_v4hi_v4hi
13201 = build_function_type_list (V8QI_type_node,
13202 V4HI_type_node, V4HI_type_node, NULL_TREE);
13203 tree v4hi_ftype_v2si_v2si
13204 = build_function_type_list (V4HI_type_node,
13205 V2SI_type_node, V2SI_type_node, NULL_TREE);
13206 tree v4sf_ftype_v4sf_v4sf_int
13207 = build_function_type_list (V4SF_type_node,
13208 V4SF_type_node, V4SF_type_node,
13209 integer_type_node, NULL_TREE);
13210 tree v2si_ftype_v4hi_v4hi
13211 = build_function_type_list (V2SI_type_node,
13212 V4HI_type_node, V4HI_type_node, NULL_TREE);
13213 tree v4hi_ftype_v4hi_int
13214 = build_function_type_list (V4HI_type_node,
13215 V4HI_type_node, integer_type_node, NULL_TREE);
13216 tree v4hi_ftype_v4hi_di
13217 = build_function_type_list (V4HI_type_node,
13218 V4HI_type_node, long_long_unsigned_type_node,
13219 NULL_TREE);
13220 tree v2si_ftype_v2si_di
13221 = build_function_type_list (V2SI_type_node,
13222 V2SI_type_node, long_long_unsigned_type_node,
13223 NULL_TREE);
13224 tree void_ftype_void
13225 = build_function_type (void_type_node, void_list_node);
13226 tree void_ftype_unsigned
13227 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13228 tree void_ftype_unsigned_unsigned
13229 = build_function_type_list (void_type_node, unsigned_type_node,
13230 unsigned_type_node, NULL_TREE);
13231 tree void_ftype_pcvoid_unsigned_unsigned
13232 = build_function_type_list (void_type_node, const_ptr_type_node,
13233 unsigned_type_node, unsigned_type_node,
13234 NULL_TREE);
13235 tree unsigned_ftype_void
13236 = build_function_type (unsigned_type_node, void_list_node);
13237 tree di_ftype_void
13238 = build_function_type (long_long_unsigned_type_node, void_list_node);
13239 tree v4sf_ftype_void
13240 = build_function_type (V4SF_type_node, void_list_node);
13241 tree v2si_ftype_v4sf
13242 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13243 /* Loads/stores. */
13244 tree void_ftype_v8qi_v8qi_pchar
13245 = build_function_type_list (void_type_node,
13246 V8QI_type_node, V8QI_type_node,
13247 pchar_type_node, NULL_TREE);
13248 tree v4sf_ftype_pcfloat
13249 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13250 /* @@@ the type is bogus */
13251 tree v4sf_ftype_v4sf_pv2si
13252 = build_function_type_list (V4SF_type_node,
13253 V4SF_type_node, pv2si_type_node, NULL_TREE);
13254 tree void_ftype_pv2si_v4sf
13255 = build_function_type_list (void_type_node,
13256 pv2si_type_node, V4SF_type_node, NULL_TREE);
13257 tree void_ftype_pfloat_v4sf
13258 = build_function_type_list (void_type_node,
13259 pfloat_type_node, V4SF_type_node, NULL_TREE);
13260 tree void_ftype_pdi_di
13261 = build_function_type_list (void_type_node,
13262 pdi_type_node, long_long_unsigned_type_node,
13263 NULL_TREE);
13264 tree void_ftype_pv2di_v2di
13265 = build_function_type_list (void_type_node,
13266 pv2di_type_node, V2DI_type_node, NULL_TREE);
13267 /* Normal vector unops. */
13268 tree v4sf_ftype_v4sf
13269 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13271 /* Normal vector binops. */
13272 tree v4sf_ftype_v4sf_v4sf
13273 = build_function_type_list (V4SF_type_node,
13274 V4SF_type_node, V4SF_type_node, NULL_TREE);
13275 tree v8qi_ftype_v8qi_v8qi
13276 = build_function_type_list (V8QI_type_node,
13277 V8QI_type_node, V8QI_type_node, NULL_TREE);
13278 tree v4hi_ftype_v4hi_v4hi
13279 = build_function_type_list (V4HI_type_node,
13280 V4HI_type_node, V4HI_type_node, NULL_TREE);
13281 tree v2si_ftype_v2si_v2si
13282 = build_function_type_list (V2SI_type_node,
13283 V2SI_type_node, V2SI_type_node, NULL_TREE);
13284 tree di_ftype_di_di
13285 = build_function_type_list (long_long_unsigned_type_node,
13286 long_long_unsigned_type_node,
13287 long_long_unsigned_type_node, NULL_TREE);
13289 tree v2si_ftype_v2sf
13290 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13291 tree v2sf_ftype_v2si
13292 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13293 tree v2si_ftype_v2si
13294 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13295 tree v2sf_ftype_v2sf
13296 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13297 tree v2sf_ftype_v2sf_v2sf
13298 = build_function_type_list (V2SF_type_node,
13299 V2SF_type_node, V2SF_type_node, NULL_TREE);
13300 tree v2si_ftype_v2sf_v2sf
13301 = build_function_type_list (V2SI_type_node,
13302 V2SF_type_node, V2SF_type_node, NULL_TREE);
13303 tree pint_type_node = build_pointer_type (integer_type_node);
13304 tree pcint_type_node = build_pointer_type (
13305 build_type_variant (integer_type_node, 1, 0));
13306 tree pdouble_type_node = build_pointer_type (double_type_node);
13307 tree pcdouble_type_node = build_pointer_type (
13308 build_type_variant (double_type_node, 1, 0));
13309 tree int_ftype_v2df_v2df
13310 = build_function_type_list (integer_type_node,
13311 V2DF_type_node, V2DF_type_node, NULL_TREE);
13313 tree ti_ftype_void
13314 = build_function_type (intTI_type_node, void_list_node);
13315 tree v2di_ftype_void
13316 = build_function_type (V2DI_type_node, void_list_node);
13317 tree ti_ftype_ti_ti
13318 = build_function_type_list (intTI_type_node,
13319 intTI_type_node, intTI_type_node, NULL_TREE);
13320 tree void_ftype_pcvoid
13321 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13322 tree v2di_ftype_di
13323 = build_function_type_list (V2DI_type_node,
13324 long_long_unsigned_type_node, NULL_TREE);
13325 tree di_ftype_v2di
13326 = build_function_type_list (long_long_unsigned_type_node,
13327 V2DI_type_node, NULL_TREE);
13328 tree v4sf_ftype_v4si
13329 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13330 tree v4si_ftype_v4sf
13331 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13332 tree v2df_ftype_v4si
13333 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13334 tree v4si_ftype_v2df
13335 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13336 tree v2si_ftype_v2df
13337 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13338 tree v4sf_ftype_v2df
13339 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13340 tree v2df_ftype_v2si
13341 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13342 tree v2df_ftype_v4sf
13343 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13344 tree int_ftype_v2df
13345 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13346 tree int64_ftype_v2df
13347 = build_function_type_list (long_long_integer_type_node,
13348 V2DF_type_node, NULL_TREE);
13349 tree v2df_ftype_v2df_int
13350 = build_function_type_list (V2DF_type_node,
13351 V2DF_type_node, integer_type_node, NULL_TREE);
13352 tree v2df_ftype_v2df_int64
13353 = build_function_type_list (V2DF_type_node,
13354 V2DF_type_node, long_long_integer_type_node,
13355 NULL_TREE);
13356 tree v4sf_ftype_v4sf_v2df
13357 = build_function_type_list (V4SF_type_node,
13358 V4SF_type_node, V2DF_type_node, NULL_TREE);
13359 tree v2df_ftype_v2df_v4sf
13360 = build_function_type_list (V2DF_type_node,
13361 V2DF_type_node, V4SF_type_node, NULL_TREE);
13362 tree v2df_ftype_v2df_v2df_int
13363 = build_function_type_list (V2DF_type_node,
13364 V2DF_type_node, V2DF_type_node,
13365 integer_type_node,
13366 NULL_TREE);
13367 tree v2df_ftype_v2df_pv2si
13368 = build_function_type_list (V2DF_type_node,
13369 V2DF_type_node, pv2si_type_node, NULL_TREE);
13370 tree void_ftype_pv2si_v2df
13371 = build_function_type_list (void_type_node,
13372 pv2si_type_node, V2DF_type_node, NULL_TREE);
13373 tree void_ftype_pdouble_v2df
13374 = build_function_type_list (void_type_node,
13375 pdouble_type_node, V2DF_type_node, NULL_TREE);
13376 tree void_ftype_pint_int
13377 = build_function_type_list (void_type_node,
13378 pint_type_node, integer_type_node, NULL_TREE);
13379 tree void_ftype_v16qi_v16qi_pchar
13380 = build_function_type_list (void_type_node,
13381 V16QI_type_node, V16QI_type_node,
13382 pchar_type_node, NULL_TREE);
13383 tree v2df_ftype_pcdouble
13384 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13385 tree v2df_ftype_v2df_v2df
13386 = build_function_type_list (V2DF_type_node,
13387 V2DF_type_node, V2DF_type_node, NULL_TREE);
13388 tree v16qi_ftype_v16qi_v16qi
13389 = build_function_type_list (V16QI_type_node,
13390 V16QI_type_node, V16QI_type_node, NULL_TREE);
13391 tree v8hi_ftype_v8hi_v8hi
13392 = build_function_type_list (V8HI_type_node,
13393 V8HI_type_node, V8HI_type_node, NULL_TREE);
13394 tree v4si_ftype_v4si_v4si
13395 = build_function_type_list (V4SI_type_node,
13396 V4SI_type_node, V4SI_type_node, NULL_TREE);
13397 tree v2di_ftype_v2di_v2di
13398 = build_function_type_list (V2DI_type_node,
13399 V2DI_type_node, V2DI_type_node, NULL_TREE);
13400 tree v2di_ftype_v2df_v2df
13401 = build_function_type_list (V2DI_type_node,
13402 V2DF_type_node, V2DF_type_node, NULL_TREE);
13403 tree v2df_ftype_v2df
13404 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13405 tree v2df_ftype_double
13406 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13407 tree v2df_ftype_double_double
13408 = build_function_type_list (V2DF_type_node,
13409 double_type_node, double_type_node, NULL_TREE);
13410 tree int_ftype_v8hi_int
13411 = build_function_type_list (integer_type_node,
13412 V8HI_type_node, integer_type_node, NULL_TREE);
13413 tree v8hi_ftype_v8hi_int_int
13414 = build_function_type_list (V8HI_type_node,
13415 V8HI_type_node, integer_type_node,
13416 integer_type_node, NULL_TREE);
13417 tree v2di_ftype_v2di_int
13418 = build_function_type_list (V2DI_type_node,
13419 V2DI_type_node, integer_type_node, NULL_TREE);
13420 tree v4si_ftype_v4si_int
13421 = build_function_type_list (V4SI_type_node,
13422 V4SI_type_node, integer_type_node, NULL_TREE);
13423 tree v8hi_ftype_v8hi_int
13424 = build_function_type_list (V8HI_type_node,
13425 V8HI_type_node, integer_type_node, NULL_TREE);
13426 tree v8hi_ftype_v8hi_v2di
13427 = build_function_type_list (V8HI_type_node,
13428 V8HI_type_node, V2DI_type_node, NULL_TREE);
13429 tree v4si_ftype_v4si_v2di
13430 = build_function_type_list (V4SI_type_node,
13431 V4SI_type_node, V2DI_type_node, NULL_TREE);
13432 tree v4si_ftype_v8hi_v8hi
13433 = build_function_type_list (V4SI_type_node,
13434 V8HI_type_node, V8HI_type_node, NULL_TREE);
13435 tree di_ftype_v8qi_v8qi
13436 = build_function_type_list (long_long_unsigned_type_node,
13437 V8QI_type_node, V8QI_type_node, NULL_TREE);
13438 tree v2di_ftype_v16qi_v16qi
13439 = build_function_type_list (V2DI_type_node,
13440 V16QI_type_node, V16QI_type_node, NULL_TREE);
13441 tree int_ftype_v16qi
13442 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13443 tree v16qi_ftype_pcchar
13444 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13445 tree void_ftype_pchar_v16qi
13446 = build_function_type_list (void_type_node,
13447 pchar_type_node, V16QI_type_node, NULL_TREE);
13448 tree v4si_ftype_pcint
13449 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13450 tree void_ftype_pcint_v4si
13451 = build_function_type_list (void_type_node,
13452 pcint_type_node, V4SI_type_node, NULL_TREE);
13453 tree v2di_ftype_v2di
13454 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13456 tree float80_type;
13457 tree float128_type;
13459 /* The __float80 type. */
13460 if (TYPE_MODE (long_double_type_node) == XFmode)
13461 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13462 "__float80");
13463 else
13465 /* The __float80 type. */
13466 float80_type = make_node (REAL_TYPE);
13467 TYPE_PRECISION (float80_type) = 96;
13468 layout_type (float80_type);
13469 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13472 float128_type = make_node (REAL_TYPE);
13473 TYPE_PRECISION (float128_type) = 128;
13474 layout_type (float128_type);
13475 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13477 /* Add all builtins that are more or less simple operations on two
13478 operands. */
13479 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13481 /* Use one of the operands; the target can have a different mode for
13482 mask-generating compares. */
13483 enum machine_mode mode;
13484 tree type;
13486 if (d->name == 0)
13487 continue;
13488 mode = insn_data[d->icode].operand[1].mode;
13490 switch (mode)
13492 case V16QImode:
13493 type = v16qi_ftype_v16qi_v16qi;
13494 break;
13495 case V8HImode:
13496 type = v8hi_ftype_v8hi_v8hi;
13497 break;
13498 case V4SImode:
13499 type = v4si_ftype_v4si_v4si;
13500 break;
13501 case V2DImode:
13502 type = v2di_ftype_v2di_v2di;
13503 break;
13504 case V2DFmode:
13505 type = v2df_ftype_v2df_v2df;
13506 break;
13507 case TImode:
13508 type = ti_ftype_ti_ti;
13509 break;
13510 case V4SFmode:
13511 type = v4sf_ftype_v4sf_v4sf;
13512 break;
13513 case V8QImode:
13514 type = v8qi_ftype_v8qi_v8qi;
13515 break;
13516 case V4HImode:
13517 type = v4hi_ftype_v4hi_v4hi;
13518 break;
13519 case V2SImode:
13520 type = v2si_ftype_v2si_v2si;
13521 break;
13522 case DImode:
13523 type = di_ftype_di_di;
13524 break;
13526 default:
13527 abort ();
13530 /* Override for comparisons. */
13531 if (d->icode == CODE_FOR_maskcmpv4sf3
13532 || d->icode == CODE_FOR_maskncmpv4sf3
13533 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13534 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13535 type = v4si_ftype_v4sf_v4sf;
13537 if (d->icode == CODE_FOR_maskcmpv2df3
13538 || d->icode == CODE_FOR_maskncmpv2df3
13539 || d->icode == CODE_FOR_vmmaskcmpv2df3
13540 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13541 type = v2di_ftype_v2df_v2df;
13543 def_builtin (d->mask, d->name, type, d->code);
13546 /* Add the remaining MMX insns with somewhat more complicated types. */
13547 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13548 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13549 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13550 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13551 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13553 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13554 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13555 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13557 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13558 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13560 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13561 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13563 /* comi/ucomi insns. */
13564 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13565 if (d->mask == MASK_SSE2)
13566 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13567 else
13568 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13570 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13571 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13572 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13574 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13575 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13576 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13577 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13578 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13579 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13580 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13581 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13582 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13583 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13584 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13586 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13587 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13589 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13591 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13592 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13593 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13595 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13596 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13598 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13599 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13600 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13601 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13603 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13604 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13605 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13606 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13608 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13610 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13612 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13613 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13614 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13615 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13616 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13617 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13619 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13621 /* Original 3DNow! */
13622 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13623 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13624 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13625 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13635 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13636 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13637 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13638 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13639 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13640 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13641 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13643 /* 3DNow! extension as used in the Athlon CPU. */
13644 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13645 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13646 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13647 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13648 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13649 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13651 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13653 /* SSE2 */
13654 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13702 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13703 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13710 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13728 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13734 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13753 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13757 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13758 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13762 /* Prescott New Instructions. */
13763 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13764 void_ftype_pcvoid_unsigned_unsigned,
13765 IX86_BUILTIN_MONITOR);
13766 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13767 void_ftype_unsigned_unsigned,
13768 IX86_BUILTIN_MWAIT);
13769 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13770 v4sf_ftype_v4sf,
13771 IX86_BUILTIN_MOVSHDUP);
13772 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13773 v4sf_ftype_v4sf,
13774 IX86_BUILTIN_MOVSLDUP);
13775 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13776 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13777 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13778 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13779 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13780 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13783 /* Errors in the source file can cause expand_expr to return const0_rtx
13784 where we expect a vector. To avoid crashing, use one of the vector
13785 clear instructions. */
13786 static rtx
13787 safe_vector_operand (rtx x, enum machine_mode mode)
13789 if (x != const0_rtx)
13790 return x;
13791 x = gen_reg_rtx (mode);
13793 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13794 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13795 : gen_rtx_SUBREG (DImode, x, 0)));
13796 else
13797 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13798 : gen_rtx_SUBREG (V4SFmode, x, 0),
13799 CONST0_RTX (V4SFmode)));
13800 return x;
13803 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13805 static rtx
13806 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13808 rtx pat;
13809 tree arg0 = TREE_VALUE (arglist);
13810 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13811 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13812 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13813 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13814 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13815 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13817 if (VECTOR_MODE_P (mode0))
13818 op0 = safe_vector_operand (op0, mode0);
13819 if (VECTOR_MODE_P (mode1))
13820 op1 = safe_vector_operand (op1, mode1);
13822 if (! target
13823 || GET_MODE (target) != tmode
13824 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13825 target = gen_reg_rtx (tmode);
13827 if (GET_MODE (op1) == SImode && mode1 == TImode)
13829 rtx x = gen_reg_rtx (V4SImode);
13830 emit_insn (gen_sse2_loadd (x, op1));
13831 op1 = gen_lowpart (TImode, x);
13834 /* In case the insn wants input operands in modes different from
13835 the result, abort. */
13836 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13837 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13838 abort ();
13840 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13841 op0 = copy_to_mode_reg (mode0, op0);
13842 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13843 op1 = copy_to_mode_reg (mode1, op1);
13845 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13846 yet one of the two must not be a memory. This is normally enforced
13847 by expanders, but we didn't bother to create one here. */
13848 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13849 op0 = copy_to_mode_reg (mode0, op0);
13851 pat = GEN_FCN (icode) (target, op0, op1);
13852 if (! pat)
13853 return 0;
13854 emit_insn (pat);
13855 return target;
13858 /* Subroutine of ix86_expand_builtin to take care of stores. */
13860 static rtx
13861 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13863 rtx pat;
13864 tree arg0 = TREE_VALUE (arglist);
13865 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13866 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13867 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13868 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13869 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13871 if (VECTOR_MODE_P (mode1))
13872 op1 = safe_vector_operand (op1, mode1);
13874 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13875 op1 = copy_to_mode_reg (mode1, op1);
13877 pat = GEN_FCN (icode) (op0, op1);
13878 if (pat)
13879 emit_insn (pat);
13880 return 0;
13883 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13885 static rtx
13886 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13887 rtx target, int do_load)
13889 rtx pat;
13890 tree arg0 = TREE_VALUE (arglist);
13891 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13892 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13893 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13895 if (! target
13896 || GET_MODE (target) != tmode
13897 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13898 target = gen_reg_rtx (tmode);
13899 if (do_load)
13900 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13901 else
13903 if (VECTOR_MODE_P (mode0))
13904 op0 = safe_vector_operand (op0, mode0);
13906 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13907 op0 = copy_to_mode_reg (mode0, op0);
13910 pat = GEN_FCN (icode) (target, op0);
13911 if (! pat)
13912 return 0;
13913 emit_insn (pat);
13914 return target;
13917 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13918 sqrtss, rsqrtss, rcpss. */
13920 static rtx
13921 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13923 rtx pat;
13924 tree arg0 = TREE_VALUE (arglist);
13925 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13926 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13927 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13929 if (! target
13930 || GET_MODE (target) != tmode
13931 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13932 target = gen_reg_rtx (tmode);
13934 if (VECTOR_MODE_P (mode0))
13935 op0 = safe_vector_operand (op0, mode0);
13937 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13938 op0 = copy_to_mode_reg (mode0, op0);
13940 op1 = op0;
13941 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13942 op1 = copy_to_mode_reg (mode0, op1);
13944 pat = GEN_FCN (icode) (target, op0, op1);
13945 if (! pat)
13946 return 0;
13947 emit_insn (pat);
13948 return target;
13951 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13953 static rtx
13954 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13955 rtx target)
13957 rtx pat;
13958 tree arg0 = TREE_VALUE (arglist);
13959 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13960 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13961 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13962 rtx op2;
13963 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13964 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13965 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13966 enum rtx_code comparison = d->comparison;
13968 if (VECTOR_MODE_P (mode0))
13969 op0 = safe_vector_operand (op0, mode0);
13970 if (VECTOR_MODE_P (mode1))
13971 op1 = safe_vector_operand (op1, mode1);
13973 /* Swap operands if we have a comparison that isn't available in
13974 hardware. */
13975 if (d->flag)
13977 rtx tmp = gen_reg_rtx (mode1);
13978 emit_move_insn (tmp, op1);
13979 op1 = op0;
13980 op0 = tmp;
13983 if (! target
13984 || GET_MODE (target) != tmode
13985 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13986 target = gen_reg_rtx (tmode);
13988 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13989 op0 = copy_to_mode_reg (mode0, op0);
13990 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13991 op1 = copy_to_mode_reg (mode1, op1);
13993 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13994 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13995 if (! pat)
13996 return 0;
13997 emit_insn (pat);
13998 return target;
14001 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14003 static rtx
14004 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14005 rtx target)
14007 rtx pat;
14008 tree arg0 = TREE_VALUE (arglist);
14009 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14010 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14011 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14012 rtx op2;
14013 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14014 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14015 enum rtx_code comparison = d->comparison;
14017 if (VECTOR_MODE_P (mode0))
14018 op0 = safe_vector_operand (op0, mode0);
14019 if (VECTOR_MODE_P (mode1))
14020 op1 = safe_vector_operand (op1, mode1);
14022 /* Swap operands if we have a comparison that isn't available in
14023 hardware. */
14024 if (d->flag)
14026 rtx tmp = op1;
14027 op1 = op0;
14028 op0 = tmp;
14031 target = gen_reg_rtx (SImode);
14032 emit_move_insn (target, const0_rtx);
14033 target = gen_rtx_SUBREG (QImode, target, 0);
14035 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14036 op0 = copy_to_mode_reg (mode0, op0);
14037 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14038 op1 = copy_to_mode_reg (mode1, op1);
14040 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14041 pat = GEN_FCN (d->icode) (op0, op1);
14042 if (! pat)
14043 return 0;
14044 emit_insn (pat);
14045 emit_insn (gen_rtx_SET (VOIDmode,
14046 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14047 gen_rtx_fmt_ee (comparison, QImode,
14048 SET_DEST (pat),
14049 const0_rtx)));
14051 return SUBREG_REG (target);
14054 /* Expand an expression EXP that calls a built-in function,
14055 with result going to TARGET if that's convenient
14056 (and in mode MODE if that's convenient).
14057 SUBTARGET may be used as the target for computing one of EXP's operands.
14058 IGNORE is nonzero if the value is to be ignored. */
14061 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14062 enum machine_mode mode ATTRIBUTE_UNUSED,
14063 int ignore ATTRIBUTE_UNUSED)
14065 const struct builtin_description *d;
14066 size_t i;
14067 enum insn_code icode;
14068 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14069 tree arglist = TREE_OPERAND (exp, 1);
14070 tree arg0, arg1, arg2;
14071 rtx op0, op1, op2, pat;
14072 enum machine_mode tmode, mode0, mode1, mode2;
14073 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14075 switch (fcode)
14077 case IX86_BUILTIN_EMMS:
14078 emit_insn (gen_emms ());
14079 return 0;
14081 case IX86_BUILTIN_SFENCE:
14082 emit_insn (gen_sfence ());
14083 return 0;
14085 case IX86_BUILTIN_PEXTRW:
14086 case IX86_BUILTIN_PEXTRW128:
14087 icode = (fcode == IX86_BUILTIN_PEXTRW
14088 ? CODE_FOR_mmx_pextrw
14089 : CODE_FOR_sse2_pextrw);
14090 arg0 = TREE_VALUE (arglist);
14091 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14092 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14093 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14094 tmode = insn_data[icode].operand[0].mode;
14095 mode0 = insn_data[icode].operand[1].mode;
14096 mode1 = insn_data[icode].operand[2].mode;
14098 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14099 op0 = copy_to_mode_reg (mode0, op0);
14100 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14102 error ("selector must be an integer constant in the range 0..%i",
14103 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14104 return gen_reg_rtx (tmode);
14106 if (target == 0
14107 || GET_MODE (target) != tmode
14108 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14109 target = gen_reg_rtx (tmode);
14110 pat = GEN_FCN (icode) (target, op0, op1);
14111 if (! pat)
14112 return 0;
14113 emit_insn (pat);
14114 return target;
14116 case IX86_BUILTIN_PINSRW:
14117 case IX86_BUILTIN_PINSRW128:
14118 icode = (fcode == IX86_BUILTIN_PINSRW
14119 ? CODE_FOR_mmx_pinsrw
14120 : CODE_FOR_sse2_pinsrw);
14121 arg0 = TREE_VALUE (arglist);
14122 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14123 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14124 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14125 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14126 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14127 tmode = insn_data[icode].operand[0].mode;
14128 mode0 = insn_data[icode].operand[1].mode;
14129 mode1 = insn_data[icode].operand[2].mode;
14130 mode2 = insn_data[icode].operand[3].mode;
14132 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14133 op0 = copy_to_mode_reg (mode0, op0);
14134 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14135 op1 = copy_to_mode_reg (mode1, op1);
14136 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14138 error ("selector must be an integer constant in the range 0..%i",
14139 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14140 return const0_rtx;
14142 if (target == 0
14143 || GET_MODE (target) != tmode
14144 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14145 target = gen_reg_rtx (tmode);
14146 pat = GEN_FCN (icode) (target, op0, op1, op2);
14147 if (! pat)
14148 return 0;
14149 emit_insn (pat);
14150 return target;
14152 case IX86_BUILTIN_MASKMOVQ:
14153 case IX86_BUILTIN_MASKMOVDQU:
14154 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14155 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14156 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14157 : CODE_FOR_sse2_maskmovdqu));
14158 /* Note the arg order is different from the operand order. */
14159 arg1 = TREE_VALUE (arglist);
14160 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14161 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14162 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14163 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14164 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14165 mode0 = insn_data[icode].operand[0].mode;
14166 mode1 = insn_data[icode].operand[1].mode;
14167 mode2 = insn_data[icode].operand[2].mode;
14169 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14170 op0 = copy_to_mode_reg (mode0, op0);
14171 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14172 op1 = copy_to_mode_reg (mode1, op1);
14173 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14174 op2 = copy_to_mode_reg (mode2, op2);
14175 pat = GEN_FCN (icode) (op0, op1, op2);
14176 if (! pat)
14177 return 0;
14178 emit_insn (pat);
14179 return 0;
14181 case IX86_BUILTIN_SQRTSS:
14182 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14183 case IX86_BUILTIN_RSQRTSS:
14184 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14185 case IX86_BUILTIN_RCPSS:
14186 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14188 case IX86_BUILTIN_LOADAPS:
14189 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14191 case IX86_BUILTIN_LOADUPS:
14192 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14194 case IX86_BUILTIN_STOREAPS:
14195 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14197 case IX86_BUILTIN_STOREUPS:
14198 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14200 case IX86_BUILTIN_LOADSS:
14201 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14203 case IX86_BUILTIN_STORESS:
14204 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14206 case IX86_BUILTIN_LOADHPS:
14207 case IX86_BUILTIN_LOADLPS:
14208 case IX86_BUILTIN_LOADHPD:
14209 case IX86_BUILTIN_LOADLPD:
14210 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14211 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14212 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14213 : CODE_FOR_sse2_movsd);
14214 arg0 = TREE_VALUE (arglist);
14215 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14216 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14217 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14218 tmode = insn_data[icode].operand[0].mode;
14219 mode0 = insn_data[icode].operand[1].mode;
14220 mode1 = insn_data[icode].operand[2].mode;
14222 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14223 op0 = copy_to_mode_reg (mode0, op0);
14224 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14225 if (target == 0
14226 || GET_MODE (target) != tmode
14227 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14228 target = gen_reg_rtx (tmode);
14229 pat = GEN_FCN (icode) (target, op0, op1);
14230 if (! pat)
14231 return 0;
14232 emit_insn (pat);
14233 return target;
14235 case IX86_BUILTIN_STOREHPS:
14236 case IX86_BUILTIN_STORELPS:
14237 case IX86_BUILTIN_STOREHPD:
14238 case IX86_BUILTIN_STORELPD:
14239 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14240 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14241 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14242 : CODE_FOR_sse2_movsd);
14243 arg0 = TREE_VALUE (arglist);
14244 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14245 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14246 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14247 mode0 = insn_data[icode].operand[1].mode;
14248 mode1 = insn_data[icode].operand[2].mode;
14250 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14251 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14252 op1 = copy_to_mode_reg (mode1, op1);
14254 pat = GEN_FCN (icode) (op0, op0, op1);
14255 if (! pat)
14256 return 0;
14257 emit_insn (pat);
14258 return 0;
14260 case IX86_BUILTIN_MOVNTPS:
14261 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14262 case IX86_BUILTIN_MOVNTQ:
14263 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14265 case IX86_BUILTIN_LDMXCSR:
14266 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14267 target = assign_386_stack_local (SImode, 0);
14268 emit_move_insn (target, op0);
14269 emit_insn (gen_ldmxcsr (target));
14270 return 0;
14272 case IX86_BUILTIN_STMXCSR:
14273 target = assign_386_stack_local (SImode, 0);
14274 emit_insn (gen_stmxcsr (target));
14275 return copy_to_mode_reg (SImode, target);
14277 case IX86_BUILTIN_SHUFPS:
14278 case IX86_BUILTIN_SHUFPD:
14279 icode = (fcode == IX86_BUILTIN_SHUFPS
14280 ? CODE_FOR_sse_shufps
14281 : CODE_FOR_sse2_shufpd);
14282 arg0 = TREE_VALUE (arglist);
14283 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14284 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14285 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14286 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14287 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14288 tmode = insn_data[icode].operand[0].mode;
14289 mode0 = insn_data[icode].operand[1].mode;
14290 mode1 = insn_data[icode].operand[2].mode;
14291 mode2 = insn_data[icode].operand[3].mode;
14293 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14294 op0 = copy_to_mode_reg (mode0, op0);
14295 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14296 op1 = copy_to_mode_reg (mode1, op1);
14297 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14299 /* @@@ better error message */
14300 error ("mask must be an immediate");
14301 return gen_reg_rtx (tmode);
14303 if (target == 0
14304 || GET_MODE (target) != tmode
14305 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14306 target = gen_reg_rtx (tmode);
14307 pat = GEN_FCN (icode) (target, op0, op1, op2);
14308 if (! pat)
14309 return 0;
14310 emit_insn (pat);
14311 return target;
14313 case IX86_BUILTIN_PSHUFW:
14314 case IX86_BUILTIN_PSHUFD:
14315 case IX86_BUILTIN_PSHUFHW:
14316 case IX86_BUILTIN_PSHUFLW:
14317 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14318 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14319 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14320 : CODE_FOR_mmx_pshufw);
14321 arg0 = TREE_VALUE (arglist);
14322 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14323 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14324 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14325 tmode = insn_data[icode].operand[0].mode;
14326 mode1 = insn_data[icode].operand[1].mode;
14327 mode2 = insn_data[icode].operand[2].mode;
14329 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14330 op0 = copy_to_mode_reg (mode1, op0);
14331 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14333 /* @@@ better error message */
14334 error ("mask must be an immediate");
14335 return const0_rtx;
14337 if (target == 0
14338 || GET_MODE (target) != tmode
14339 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14340 target = gen_reg_rtx (tmode);
14341 pat = GEN_FCN (icode) (target, op0, op1);
14342 if (! pat)
14343 return 0;
14344 emit_insn (pat);
14345 return target;
14347 case IX86_BUILTIN_PSLLDQI128:
14348 case IX86_BUILTIN_PSRLDQI128:
14349 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14350 : CODE_FOR_sse2_lshrti3);
14351 arg0 = TREE_VALUE (arglist);
14352 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14353 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14354 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14355 tmode = insn_data[icode].operand[0].mode;
14356 mode1 = insn_data[icode].operand[1].mode;
14357 mode2 = insn_data[icode].operand[2].mode;
14359 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14361 op0 = copy_to_reg (op0);
14362 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14364 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14366 error ("shift must be an immediate");
14367 return const0_rtx;
14369 target = gen_reg_rtx (V2DImode);
14370 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14371 if (! pat)
14372 return 0;
14373 emit_insn (pat);
14374 return target;
14376 case IX86_BUILTIN_FEMMS:
14377 emit_insn (gen_femms ());
14378 return NULL_RTX;
14380 case IX86_BUILTIN_PAVGUSB:
14381 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14383 case IX86_BUILTIN_PF2ID:
14384 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14386 case IX86_BUILTIN_PFACC:
14387 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14389 case IX86_BUILTIN_PFADD:
14390 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14392 case IX86_BUILTIN_PFCMPEQ:
14393 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14395 case IX86_BUILTIN_PFCMPGE:
14396 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14398 case IX86_BUILTIN_PFCMPGT:
14399 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14401 case IX86_BUILTIN_PFMAX:
14402 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14404 case IX86_BUILTIN_PFMIN:
14405 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14407 case IX86_BUILTIN_PFMUL:
14408 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14410 case IX86_BUILTIN_PFRCP:
14411 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14413 case IX86_BUILTIN_PFRCPIT1:
14414 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14416 case IX86_BUILTIN_PFRCPIT2:
14417 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14419 case IX86_BUILTIN_PFRSQIT1:
14420 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14422 case IX86_BUILTIN_PFRSQRT:
14423 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14425 case IX86_BUILTIN_PFSUB:
14426 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14428 case IX86_BUILTIN_PFSUBR:
14429 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14431 case IX86_BUILTIN_PI2FD:
14432 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14434 case IX86_BUILTIN_PMULHRW:
14435 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14437 case IX86_BUILTIN_PF2IW:
14438 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14440 case IX86_BUILTIN_PFNACC:
14441 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14443 case IX86_BUILTIN_PFPNACC:
14444 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14446 case IX86_BUILTIN_PI2FW:
14447 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14449 case IX86_BUILTIN_PSWAPDSI:
14450 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14452 case IX86_BUILTIN_PSWAPDSF:
14453 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14455 case IX86_BUILTIN_SSE_ZERO:
14456 target = gen_reg_rtx (V4SFmode);
14457 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14458 return target;
14460 case IX86_BUILTIN_MMX_ZERO:
14461 target = gen_reg_rtx (DImode);
14462 emit_insn (gen_mmx_clrdi (target));
14463 return target;
14465 case IX86_BUILTIN_CLRTI:
14466 target = gen_reg_rtx (V2DImode);
14467 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14468 return target;
14471 case IX86_BUILTIN_SQRTSD:
14472 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14473 case IX86_BUILTIN_LOADAPD:
14474 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14475 case IX86_BUILTIN_LOADUPD:
14476 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14478 case IX86_BUILTIN_STOREAPD:
14479 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14480 case IX86_BUILTIN_STOREUPD:
14481 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14483 case IX86_BUILTIN_LOADSD:
14484 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14486 case IX86_BUILTIN_STORESD:
14487 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14489 case IX86_BUILTIN_SETPD1:
14490 target = assign_386_stack_local (DFmode, 0);
14491 arg0 = TREE_VALUE (arglist);
14492 emit_move_insn (adjust_address (target, DFmode, 0),
14493 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14494 op0 = gen_reg_rtx (V2DFmode);
14495 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14496 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14497 return op0;
14499 case IX86_BUILTIN_SETPD:
14500 target = assign_386_stack_local (V2DFmode, 0);
14501 arg0 = TREE_VALUE (arglist);
14502 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14503 emit_move_insn (adjust_address (target, DFmode, 0),
14504 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14505 emit_move_insn (adjust_address (target, DFmode, 8),
14506 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14507 op0 = gen_reg_rtx (V2DFmode);
14508 emit_insn (gen_sse2_movapd (op0, target));
14509 return op0;
14511 case IX86_BUILTIN_LOADRPD:
14512 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14513 gen_reg_rtx (V2DFmode), 1);
14514 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14515 return target;
14517 case IX86_BUILTIN_LOADPD1:
14518 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14519 gen_reg_rtx (V2DFmode), 1);
14520 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14521 return target;
14523 case IX86_BUILTIN_STOREPD1:
14524 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14525 case IX86_BUILTIN_STORERPD:
14526 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14528 case IX86_BUILTIN_CLRPD:
14529 target = gen_reg_rtx (V2DFmode);
14530 emit_insn (gen_sse_clrv2df (target));
14531 return target;
14533 case IX86_BUILTIN_MFENCE:
14534 emit_insn (gen_sse2_mfence ());
14535 return 0;
14536 case IX86_BUILTIN_LFENCE:
14537 emit_insn (gen_sse2_lfence ());
14538 return 0;
14540 case IX86_BUILTIN_CLFLUSH:
14541 arg0 = TREE_VALUE (arglist);
14542 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14543 icode = CODE_FOR_sse2_clflush;
14544 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14545 op0 = copy_to_mode_reg (Pmode, op0);
14547 emit_insn (gen_sse2_clflush (op0));
14548 return 0;
14550 case IX86_BUILTIN_MOVNTPD:
14551 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14552 case IX86_BUILTIN_MOVNTDQ:
14553 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14554 case IX86_BUILTIN_MOVNTI:
14555 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14557 case IX86_BUILTIN_LOADDQA:
14558 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14559 case IX86_BUILTIN_LOADDQU:
14560 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14561 case IX86_BUILTIN_LOADD:
14562 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14564 case IX86_BUILTIN_STOREDQA:
14565 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14566 case IX86_BUILTIN_STOREDQU:
14567 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14568 case IX86_BUILTIN_STORED:
14569 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14571 case IX86_BUILTIN_MONITOR:
14572 arg0 = TREE_VALUE (arglist);
14573 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14574 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14575 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14576 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14577 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14578 if (!REG_P (op0))
14579 op0 = copy_to_mode_reg (SImode, op0);
14580 if (!REG_P (op1))
14581 op1 = copy_to_mode_reg (SImode, op1);
14582 if (!REG_P (op2))
14583 op2 = copy_to_mode_reg (SImode, op2);
14584 emit_insn (gen_monitor (op0, op1, op2));
14585 return 0;
14587 case IX86_BUILTIN_MWAIT:
14588 arg0 = TREE_VALUE (arglist);
14589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14590 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14591 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14592 if (!REG_P (op0))
14593 op0 = copy_to_mode_reg (SImode, op0);
14594 if (!REG_P (op1))
14595 op1 = copy_to_mode_reg (SImode, op1);
14596 emit_insn (gen_mwait (op0, op1));
14597 return 0;
14599 case IX86_BUILTIN_LOADDDUP:
14600 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14602 case IX86_BUILTIN_LDDQU:
14603 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14606 default:
14607 break;
14610 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14611 if (d->code == fcode)
14613 /* Compares are treated specially. */
14614 if (d->icode == CODE_FOR_maskcmpv4sf3
14615 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14616 || d->icode == CODE_FOR_maskncmpv4sf3
14617 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14618 || d->icode == CODE_FOR_maskcmpv2df3
14619 || d->icode == CODE_FOR_vmmaskcmpv2df3
14620 || d->icode == CODE_FOR_maskncmpv2df3
14621 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14622 return ix86_expand_sse_compare (d, arglist, target);
14624 return ix86_expand_binop_builtin (d->icode, arglist, target);
14627 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14628 if (d->code == fcode)
14629 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14631 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14632 if (d->code == fcode)
14633 return ix86_expand_sse_comi (d, arglist, target);
14635 /* @@@ Should really do something sensible here. */
14636 return 0;
14639 /* Store OPERAND to the memory after reload is completed. This means
14640 that we can't easily use assign_stack_local. */
14642 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14644 rtx result;
14645 if (!reload_completed)
14646 abort ();
14647 if (TARGET_RED_ZONE)
14649 result = gen_rtx_MEM (mode,
14650 gen_rtx_PLUS (Pmode,
14651 stack_pointer_rtx,
14652 GEN_INT (-RED_ZONE_SIZE)));
14653 emit_move_insn (result, operand);
14655 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14657 switch (mode)
14659 case HImode:
14660 case SImode:
14661 operand = gen_lowpart (DImode, operand);
14662 /* FALLTHRU */
14663 case DImode:
14664 emit_insn (
14665 gen_rtx_SET (VOIDmode,
14666 gen_rtx_MEM (DImode,
14667 gen_rtx_PRE_DEC (DImode,
14668 stack_pointer_rtx)),
14669 operand));
14670 break;
14671 default:
14672 abort ();
14674 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14676 else
14678 switch (mode)
14680 case DImode:
14682 rtx operands[2];
14683 split_di (&operand, 1, operands, operands + 1);
14684 emit_insn (
14685 gen_rtx_SET (VOIDmode,
14686 gen_rtx_MEM (SImode,
14687 gen_rtx_PRE_DEC (Pmode,
14688 stack_pointer_rtx)),
14689 operands[1]));
14690 emit_insn (
14691 gen_rtx_SET (VOIDmode,
14692 gen_rtx_MEM (SImode,
14693 gen_rtx_PRE_DEC (Pmode,
14694 stack_pointer_rtx)),
14695 operands[0]));
14697 break;
14698 case HImode:
14699 /* It is better to store HImodes as SImodes. */
14700 if (!TARGET_PARTIAL_REG_STALL)
14701 operand = gen_lowpart (SImode, operand);
14702 /* FALLTHRU */
14703 case SImode:
14704 emit_insn (
14705 gen_rtx_SET (VOIDmode,
14706 gen_rtx_MEM (GET_MODE (operand),
14707 gen_rtx_PRE_DEC (SImode,
14708 stack_pointer_rtx)),
14709 operand));
14710 break;
14711 default:
14712 abort ();
14714 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14716 return result;
14719 /* Free operand from the memory. */
14720 void
14721 ix86_free_from_memory (enum machine_mode mode)
14723 if (!TARGET_RED_ZONE)
14725 int size;
14727 if (mode == DImode || TARGET_64BIT)
14728 size = 8;
14729 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14730 size = 2;
14731 else
14732 size = 4;
14733 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14734 to pop or add instruction if registers are available. */
14735 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14736 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14737 GEN_INT (size))));
14741 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14742 QImode must go into class Q_REGS.
14743 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14744 movdf to do mem-to-mem moves through integer regs. */
14745 enum reg_class
14746 ix86_preferred_reload_class (rtx x, enum reg_class class)
14748 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14749 return NO_REGS;
14750 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14752 /* SSE can't load any constant directly yet. */
14753 if (SSE_CLASS_P (class))
14754 return NO_REGS;
14755 /* Floats can load 0 and 1. */
14756 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14758 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14759 if (MAYBE_SSE_CLASS_P (class))
14760 return (reg_class_subset_p (class, GENERAL_REGS)
14761 ? GENERAL_REGS : FLOAT_REGS);
14762 else
14763 return class;
14765 /* General regs can load everything. */
14766 if (reg_class_subset_p (class, GENERAL_REGS))
14767 return GENERAL_REGS;
14768 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14769 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14770 return NO_REGS;
14772 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14773 return NO_REGS;
14774 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14775 return Q_REGS;
14776 return class;
14779 /* If we are copying between general and FP registers, we need a memory
14780 location. The same is true for SSE and MMX registers.
14782 The macro can't work reliably when one of the CLASSES is class containing
14783 registers from multiple units (SSE, MMX, integer). We avoid this by never
14784 combining those units in single alternative in the machine description.
14785 Ensure that this constraint holds to avoid unexpected surprises.
14787 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14788 enforce these sanity checks. */
14790 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14791 enum machine_mode mode, int strict)
14793 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14794 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14795 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14796 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14797 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14798 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14800 if (strict)
14801 abort ();
14802 else
14803 return 1;
14805 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14806 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14807 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14808 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14809 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14811 /* Return the cost of moving data from a register in class CLASS1 to
14812 one in class CLASS2.
14814 It is not required that the cost always equal 2 when FROM is the same as TO;
14815 on some machines it is expensive to move between registers if they are not
14816 general registers. */
14818 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14819 enum reg_class class2)
14821 /* In case we require secondary memory, compute cost of the store followed
14822 by load. In order to avoid bad register allocation choices, we need
14823 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14825 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14827 int cost = 1;
14829 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14830 MEMORY_MOVE_COST (mode, class1, 1));
14831 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14832 MEMORY_MOVE_COST (mode, class2, 1));
14834 /* In case of copying from general_purpose_register we may emit multiple
14835 stores followed by single load causing memory size mismatch stall.
14836 Count this as arbitrarily high cost of 20. */
14837 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14838 cost += 20;
14840 /* In the case of FP/MMX moves, the registers actually overlap, and we
14841 have to switch modes in order to treat them differently. */
14842 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14843 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14844 cost += 20;
14846 return cost;
14849 /* Moves between SSE/MMX and integer unit are expensive. */
14850 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14851 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14852 return ix86_cost->mmxsse_to_integer;
14853 if (MAYBE_FLOAT_CLASS_P (class1))
14854 return ix86_cost->fp_move;
14855 if (MAYBE_SSE_CLASS_P (class1))
14856 return ix86_cost->sse_move;
14857 if (MAYBE_MMX_CLASS_P (class1))
14858 return ix86_cost->mmx_move;
14859 return 2;
14862 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14864 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14866 /* Flags and only flags can only hold CCmode values. */
14867 if (CC_REGNO_P (regno))
14868 return GET_MODE_CLASS (mode) == MODE_CC;
14869 if (GET_MODE_CLASS (mode) == MODE_CC
14870 || GET_MODE_CLASS (mode) == MODE_RANDOM
14871 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14872 return 0;
14873 if (FP_REGNO_P (regno))
14874 return VALID_FP_MODE_P (mode);
14875 if (SSE_REGNO_P (regno))
14876 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14877 if (MMX_REGNO_P (regno))
14878 return (TARGET_MMX
14879 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14880 /* We handle both integer and floats in the general purpose registers.
14881 In future we should be able to handle vector modes as well. */
14882 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14883 return 0;
14884 /* Take care for QImode values - they can be in non-QI regs, but then
14885 they do cause partial register stalls. */
14886 if (regno < 4 || mode != QImode || TARGET_64BIT)
14887 return 1;
14888 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14891 /* Return the cost of moving data of mode M between a
14892 register and memory. A value of 2 is the default; this cost is
14893 relative to those in `REGISTER_MOVE_COST'.
14895 If moving between registers and memory is more expensive than
14896 between two registers, you should define this macro to express the
14897 relative cost.
14899 Model also increased moving costs of QImode registers in non
14900 Q_REGS classes.
14903 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14905 if (FLOAT_CLASS_P (class))
14907 int index;
14908 switch (mode)
14910 case SFmode:
14911 index = 0;
14912 break;
14913 case DFmode:
14914 index = 1;
14915 break;
14916 case XFmode:
14917 index = 2;
14918 break;
14919 default:
14920 return 100;
14922 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14924 if (SSE_CLASS_P (class))
14926 int index;
14927 switch (GET_MODE_SIZE (mode))
14929 case 4:
14930 index = 0;
14931 break;
14932 case 8:
14933 index = 1;
14934 break;
14935 case 16:
14936 index = 2;
14937 break;
14938 default:
14939 return 100;
14941 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14943 if (MMX_CLASS_P (class))
14945 int index;
14946 switch (GET_MODE_SIZE (mode))
14948 case 4:
14949 index = 0;
14950 break;
14951 case 8:
14952 index = 1;
14953 break;
14954 default:
14955 return 100;
14957 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14959 switch (GET_MODE_SIZE (mode))
14961 case 1:
14962 if (in)
14963 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14964 : ix86_cost->movzbl_load);
14965 else
14966 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14967 : ix86_cost->int_store[0] + 4);
14968 break;
14969 case 2:
14970 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14971 default:
14972 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14973 if (mode == TFmode)
14974 mode = XFmode;
14975 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14976 * (((int) GET_MODE_SIZE (mode)
14977 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14981 /* Compute a (partial) cost for rtx X. Return true if the complete
14982 cost has been computed, and false if subexpressions should be
14983 scanned. In either case, *TOTAL contains the cost result. */
14985 static bool
14986 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14988 enum machine_mode mode = GET_MODE (x);
14990 switch (code)
14992 case CONST_INT:
14993 case CONST:
14994 case LABEL_REF:
14995 case SYMBOL_REF:
14996 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14997 *total = 3;
14998 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14999 *total = 2;
15000 else if (flag_pic && SYMBOLIC_CONST (x)
15001 && (!TARGET_64BIT
15002 || (!GET_CODE (x) != LABEL_REF
15003 && (GET_CODE (x) != SYMBOL_REF
15004 || !SYMBOL_REF_LOCAL_P (x)))))
15005 *total = 1;
15006 else
15007 *total = 0;
15008 return true;
15010 case CONST_DOUBLE:
15011 if (mode == VOIDmode)
15012 *total = 0;
15013 else
15014 switch (standard_80387_constant_p (x))
15016 case 1: /* 0.0 */
15017 *total = 1;
15018 break;
15019 default: /* Other constants */
15020 *total = 2;
15021 break;
15022 case 0:
15023 case -1:
15024 /* Start with (MEM (SYMBOL_REF)), since that's where
15025 it'll probably end up. Add a penalty for size. */
15026 *total = (COSTS_N_INSNS (1)
15027 + (flag_pic != 0 && !TARGET_64BIT)
15028 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15029 break;
15031 return true;
15033 case ZERO_EXTEND:
15034 /* The zero extensions is often completely free on x86_64, so make
15035 it as cheap as possible. */
15036 if (TARGET_64BIT && mode == DImode
15037 && GET_MODE (XEXP (x, 0)) == SImode)
15038 *total = 1;
15039 else if (TARGET_ZERO_EXTEND_WITH_AND)
15040 *total = COSTS_N_INSNS (ix86_cost->add);
15041 else
15042 *total = COSTS_N_INSNS (ix86_cost->movzx);
15043 return false;
15045 case SIGN_EXTEND:
15046 *total = COSTS_N_INSNS (ix86_cost->movsx);
15047 return false;
15049 case ASHIFT:
15050 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15051 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15053 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15054 if (value == 1)
15056 *total = COSTS_N_INSNS (ix86_cost->add);
15057 return false;
15059 if ((value == 2 || value == 3)
15060 && !TARGET_DECOMPOSE_LEA
15061 && ix86_cost->lea <= ix86_cost->shift_const)
15063 *total = COSTS_N_INSNS (ix86_cost->lea);
15064 return false;
15067 /* FALLTHRU */
15069 case ROTATE:
15070 case ASHIFTRT:
15071 case LSHIFTRT:
15072 case ROTATERT:
15073 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15075 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15077 if (INTVAL (XEXP (x, 1)) > 32)
15078 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15079 else
15080 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15082 else
15084 if (GET_CODE (XEXP (x, 1)) == AND)
15085 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15086 else
15087 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15090 else
15092 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15093 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15094 else
15095 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15097 return false;
15099 case MULT:
15100 if (FLOAT_MODE_P (mode))
15101 *total = COSTS_N_INSNS (ix86_cost->fmul);
15102 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15104 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15105 int nbits;
15107 for (nbits = 0; value != 0; value >>= 1)
15108 nbits++;
15110 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15111 + nbits * ix86_cost->mult_bit);
15113 else
15115 /* This is arbitrary */
15116 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15117 + 7 * ix86_cost->mult_bit);
15119 return false;
15121 case DIV:
15122 case UDIV:
15123 case MOD:
15124 case UMOD:
15125 if (FLOAT_MODE_P (mode))
15126 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15127 else
15128 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15129 return false;
15131 case PLUS:
15132 if (FLOAT_MODE_P (mode))
15133 *total = COSTS_N_INSNS (ix86_cost->fadd);
15134 else if (!TARGET_DECOMPOSE_LEA
15135 && GET_MODE_CLASS (mode) == MODE_INT
15136 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15138 if (GET_CODE (XEXP (x, 0)) == PLUS
15139 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15140 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15141 && CONSTANT_P (XEXP (x, 1)))
15143 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15144 if (val == 2 || val == 4 || val == 8)
15146 *total = COSTS_N_INSNS (ix86_cost->lea);
15147 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15148 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15149 outer_code);
15150 *total += rtx_cost (XEXP (x, 1), outer_code);
15151 return true;
15154 else if (GET_CODE (XEXP (x, 0)) == MULT
15155 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15157 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15158 if (val == 2 || val == 4 || val == 8)
15160 *total = COSTS_N_INSNS (ix86_cost->lea);
15161 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15162 *total += rtx_cost (XEXP (x, 1), outer_code);
15163 return true;
15166 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15168 *total = COSTS_N_INSNS (ix86_cost->lea);
15169 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15170 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15171 *total += rtx_cost (XEXP (x, 1), outer_code);
15172 return true;
15175 /* FALLTHRU */
15177 case MINUS:
15178 if (FLOAT_MODE_P (mode))
15180 *total = COSTS_N_INSNS (ix86_cost->fadd);
15181 return false;
15183 /* FALLTHRU */
15185 case AND:
15186 case IOR:
15187 case XOR:
15188 if (!TARGET_64BIT && mode == DImode)
15190 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15191 + (rtx_cost (XEXP (x, 0), outer_code)
15192 << (GET_MODE (XEXP (x, 0)) != DImode))
15193 + (rtx_cost (XEXP (x, 1), outer_code)
15194 << (GET_MODE (XEXP (x, 1)) != DImode)));
15195 return true;
15197 /* FALLTHRU */
15199 case NEG:
15200 if (FLOAT_MODE_P (mode))
15202 *total = COSTS_N_INSNS (ix86_cost->fchs);
15203 return false;
15205 /* FALLTHRU */
15207 case NOT:
15208 if (!TARGET_64BIT && mode == DImode)
15209 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15210 else
15211 *total = COSTS_N_INSNS (ix86_cost->add);
15212 return false;
15214 case FLOAT_EXTEND:
15215 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15216 *total = 0;
15217 return false;
15219 case ABS:
15220 if (FLOAT_MODE_P (mode))
15221 *total = COSTS_N_INSNS (ix86_cost->fabs);
15222 return false;
15224 case SQRT:
15225 if (FLOAT_MODE_P (mode))
15226 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15227 return false;
15229 case UNSPEC:
15230 if (XINT (x, 1) == UNSPEC_TP)
15231 *total = 0;
15232 return false;
15234 default:
15235 return false;
15239 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15240 static void
15241 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15243 init_section ();
15244 fputs ("\tpushl $", asm_out_file);
15245 assemble_name (asm_out_file, XSTR (symbol, 0));
15246 fputc ('\n', asm_out_file);
15248 #endif
15250 #if TARGET_MACHO
15252 static int current_machopic_label_num;
15254 /* Given a symbol name and its associated stub, write out the
15255 definition of the stub. */
15257 void
15258 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15260 unsigned int length;
15261 char *binder_name, *symbol_name, lazy_ptr_name[32];
15262 int label = ++current_machopic_label_num;
15264 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15265 symb = (*targetm.strip_name_encoding) (symb);
15267 length = strlen (stub);
15268 binder_name = alloca (length + 32);
15269 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15271 length = strlen (symb);
15272 symbol_name = alloca (length + 32);
15273 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15275 sprintf (lazy_ptr_name, "L%d$lz", label);
15277 if (MACHOPIC_PURE)
15278 machopic_picsymbol_stub_section ();
15279 else
15280 machopic_symbol_stub_section ();
15282 fprintf (file, "%s:\n", stub);
15283 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15285 if (MACHOPIC_PURE)
15287 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15288 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15289 fprintf (file, "\tjmp %%edx\n");
15291 else
15292 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15294 fprintf (file, "%s:\n", binder_name);
15296 if (MACHOPIC_PURE)
15298 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15299 fprintf (file, "\tpushl %%eax\n");
15301 else
15302 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15304 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15306 machopic_lazy_symbol_ptr_section ();
15307 fprintf (file, "%s:\n", lazy_ptr_name);
15308 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15309 fprintf (file, "\t.long %s\n", binder_name);
15311 #endif /* TARGET_MACHO */
15313 /* Order the registers for register allocator. */
15315 void
15316 x86_order_regs_for_local_alloc (void)
15318 int pos = 0;
15319 int i;
15321 /* First allocate the local general purpose registers. */
15322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15323 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15324 reg_alloc_order [pos++] = i;
15326 /* Global general purpose registers. */
15327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15328 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15329 reg_alloc_order [pos++] = i;
15331 /* x87 registers come first in case we are doing FP math
15332 using them. */
15333 if (!TARGET_SSE_MATH)
15334 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15335 reg_alloc_order [pos++] = i;
15337 /* SSE registers. */
15338 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15339 reg_alloc_order [pos++] = i;
15340 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15341 reg_alloc_order [pos++] = i;
15343 /* x87 registers. */
15344 if (TARGET_SSE_MATH)
15345 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15346 reg_alloc_order [pos++] = i;
15348 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15349 reg_alloc_order [pos++] = i;
15351 /* Initialize the rest of array as we do not allocate some registers
15352 at all. */
15353 while (pos < FIRST_PSEUDO_REGISTER)
15354 reg_alloc_order [pos++] = 0;
15357 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15358 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15359 #endif
15361 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15362 struct attribute_spec.handler. */
15363 static tree
15364 ix86_handle_struct_attribute (tree *node, tree name,
15365 tree args ATTRIBUTE_UNUSED,
15366 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15368 tree *type = NULL;
15369 if (DECL_P (*node))
15371 if (TREE_CODE (*node) == TYPE_DECL)
15372 type = &TREE_TYPE (*node);
15374 else
15375 type = node;
15377 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15378 || TREE_CODE (*type) == UNION_TYPE)))
15380 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15381 *no_add_attrs = true;
15384 else if ((is_attribute_p ("ms_struct", name)
15385 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15386 || ((is_attribute_p ("gcc_struct", name)
15387 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15389 warning ("`%s' incompatible attribute ignored",
15390 IDENTIFIER_POINTER (name));
15391 *no_add_attrs = true;
15394 return NULL_TREE;
15397 static bool
15398 ix86_ms_bitfield_layout_p (tree record_type)
15400 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15401 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15402 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15405 /* Returns an expression indicating where the this parameter is
15406 located on entry to the FUNCTION. */
15408 static rtx
15409 x86_this_parameter (tree function)
15411 tree type = TREE_TYPE (function);
15413 if (TARGET_64BIT)
15415 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15416 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15419 if (ix86_function_regparm (type, function) > 0)
15421 tree parm;
15423 parm = TYPE_ARG_TYPES (type);
15424 /* Figure out whether or not the function has a variable number of
15425 arguments. */
15426 for (; parm; parm = TREE_CHAIN (parm))
15427 if (TREE_VALUE (parm) == void_type_node)
15428 break;
15429 /* If not, the this parameter is in the first argument. */
15430 if (parm)
15432 int regno = 0;
15433 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15434 regno = 2;
15435 return gen_rtx_REG (SImode, regno);
15439 if (aggregate_value_p (TREE_TYPE (type), type))
15440 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15441 else
15442 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15445 /* Determine whether x86_output_mi_thunk can succeed. */
15447 static bool
15448 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15449 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15450 HOST_WIDE_INT vcall_offset, tree function)
15452 /* 64-bit can handle anything. */
15453 if (TARGET_64BIT)
15454 return true;
15456 /* For 32-bit, everything's fine if we have one free register. */
15457 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15458 return true;
15460 /* Need a free register for vcall_offset. */
15461 if (vcall_offset)
15462 return false;
15464 /* Need a free register for GOT references. */
15465 if (flag_pic && !(*targetm.binds_local_p) (function))
15466 return false;
15468 /* Otherwise ok. */
15469 return true;
15472 /* Output the assembler code for a thunk function. THUNK_DECL is the
15473 declaration for the thunk function itself, FUNCTION is the decl for
15474 the target function. DELTA is an immediate constant offset to be
15475 added to THIS. If VCALL_OFFSET is nonzero, the word at
15476 *(*this + vcall_offset) should be added to THIS. */
15478 static void
15479 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15480 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15481 HOST_WIDE_INT vcall_offset, tree function)
15483 rtx xops[3];
15484 rtx this = x86_this_parameter (function);
15485 rtx this_reg, tmp;
15487 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15488 pull it in now and let DELTA benefit. */
15489 if (REG_P (this))
15490 this_reg = this;
15491 else if (vcall_offset)
15493 /* Put the this parameter into %eax. */
15494 xops[0] = this;
15495 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15496 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15498 else
15499 this_reg = NULL_RTX;
15501 /* Adjust the this parameter by a fixed constant. */
15502 if (delta)
15504 xops[0] = GEN_INT (delta);
15505 xops[1] = this_reg ? this_reg : this;
15506 if (TARGET_64BIT)
15508 if (!x86_64_general_operand (xops[0], DImode))
15510 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15511 xops[1] = tmp;
15512 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15513 xops[0] = tmp;
15514 xops[1] = this;
15516 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15518 else
15519 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15522 /* Adjust the this parameter by a value stored in the vtable. */
15523 if (vcall_offset)
15525 if (TARGET_64BIT)
15526 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15527 else
15529 int tmp_regno = 2 /* ECX */;
15530 if (lookup_attribute ("fastcall",
15531 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15532 tmp_regno = 0 /* EAX */;
15533 tmp = gen_rtx_REG (SImode, tmp_regno);
15536 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15537 xops[1] = tmp;
15538 if (TARGET_64BIT)
15539 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15540 else
15541 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15543 /* Adjust the this parameter. */
15544 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15545 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15547 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15548 xops[0] = GEN_INT (vcall_offset);
15549 xops[1] = tmp2;
15550 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15551 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15553 xops[1] = this_reg;
15554 if (TARGET_64BIT)
15555 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15556 else
15557 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15560 /* If necessary, drop THIS back to its stack slot. */
15561 if (this_reg && this_reg != this)
15563 xops[0] = this_reg;
15564 xops[1] = this;
15565 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15568 xops[0] = XEXP (DECL_RTL (function), 0);
15569 if (TARGET_64BIT)
15571 if (!flag_pic || (*targetm.binds_local_p) (function))
15572 output_asm_insn ("jmp\t%P0", xops);
15573 else
15575 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15576 tmp = gen_rtx_CONST (Pmode, tmp);
15577 tmp = gen_rtx_MEM (QImode, tmp);
15578 xops[0] = tmp;
15579 output_asm_insn ("jmp\t%A0", xops);
15582 else
15584 if (!flag_pic || (*targetm.binds_local_p) (function))
15585 output_asm_insn ("jmp\t%P0", xops);
15586 else
15587 #if TARGET_MACHO
15588 if (TARGET_MACHO)
15590 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15591 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15592 tmp = gen_rtx_MEM (QImode, tmp);
15593 xops[0] = tmp;
15594 output_asm_insn ("jmp\t%0", xops);
15596 else
15597 #endif /* TARGET_MACHO */
15599 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15600 output_set_got (tmp);
15602 xops[1] = tmp;
15603 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15604 output_asm_insn ("jmp\t{*}%1", xops);
15609 static void
15610 x86_file_start (void)
15612 default_file_start ();
15613 if (X86_FILE_START_VERSION_DIRECTIVE)
15614 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15615 if (X86_FILE_START_FLTUSED)
15616 fputs ("\t.global\t__fltused\n", asm_out_file);
15617 if (ix86_asm_dialect == ASM_INTEL)
15618 fputs ("\t.intel_syntax\n", asm_out_file);
15622 x86_field_alignment (tree field, int computed)
15624 enum machine_mode mode;
15625 tree type = TREE_TYPE (field);
15627 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15628 return computed;
15629 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15630 ? get_inner_array_type (type) : type);
15631 if (mode == DFmode || mode == DCmode
15632 || GET_MODE_CLASS (mode) == MODE_INT
15633 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15634 return MIN (32, computed);
15635 return computed;
15638 /* Output assembler code to FILE to increment profiler label # LABELNO
15639 for profiling a function entry. */
15640 void
15641 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15643 if (TARGET_64BIT)
15644 if (flag_pic)
15646 #ifndef NO_PROFILE_COUNTERS
15647 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15648 #endif
15649 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15651 else
15653 #ifndef NO_PROFILE_COUNTERS
15654 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15655 #endif
15656 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15658 else if (flag_pic)
15660 #ifndef NO_PROFILE_COUNTERS
15661 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15662 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15663 #endif
15664 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15666 else
15668 #ifndef NO_PROFILE_COUNTERS
15669 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15670 PROFILE_COUNT_REGISTER);
15671 #endif
15672 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15676 /* We don't have exact information about the insn sizes, but we may assume
15677 quite safely that we are informed about all 1 byte insns and memory
15678 address sizes. This is enough to eliminate unnecessary padding in
15679 99% of cases. */
15681 static int
15682 min_insn_size (rtx insn)
15684 int l = 0;
15686 if (!INSN_P (insn) || !active_insn_p (insn))
15687 return 0;
15689 /* Discard alignments we've emit and jump instructions. */
15690 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15691 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15692 return 0;
15693 if (GET_CODE (insn) == JUMP_INSN
15694 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15695 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15696 return 0;
15698 /* Important case - calls are always 5 bytes.
15699 It is common to have many calls in the row. */
15700 if (GET_CODE (insn) == CALL_INSN
15701 && symbolic_reference_mentioned_p (PATTERN (insn))
15702 && !SIBLING_CALL_P (insn))
15703 return 5;
15704 if (get_attr_length (insn) <= 1)
15705 return 1;
15707 /* For normal instructions we may rely on the sizes of addresses
15708 and the presence of symbol to require 4 bytes of encoding.
15709 This is not the case for jumps where references are PC relative. */
15710 if (GET_CODE (insn) != JUMP_INSN)
15712 l = get_attr_length_address (insn);
15713 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15714 l = 4;
15716 if (l)
15717 return 1+l;
15718 else
15719 return 2;
15722 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15723 window. */
15725 static void
15726 k8_avoid_jump_misspredicts (void)
15728 rtx insn, start = get_insns ();
15729 int nbytes = 0, njumps = 0;
15730 int isjump = 0;
15732 /* Look for all minimal intervals of instructions containing 4 jumps.
15733 The intervals are bounded by START and INSN. NBYTES is the total
15734 size of instructions in the interval including INSN and not including
15735 START. When the NBYTES is smaller than 16 bytes, it is possible
15736 that the end of START and INSN ends up in the same 16byte page.
15738 The smallest offset in the page INSN can start is the case where START
15739 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15740 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15742 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15745 nbytes += min_insn_size (insn);
15746 if (rtl_dump_file)
15747 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15748 INSN_UID (insn), min_insn_size (insn));
15749 if ((GET_CODE (insn) == JUMP_INSN
15750 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15751 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15752 || GET_CODE (insn) == CALL_INSN)
15753 njumps++;
15754 else
15755 continue;
15757 while (njumps > 3)
15759 start = NEXT_INSN (start);
15760 if ((GET_CODE (start) == JUMP_INSN
15761 && GET_CODE (PATTERN (start)) != ADDR_VEC
15762 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15763 || GET_CODE (start) == CALL_INSN)
15764 njumps--, isjump = 1;
15765 else
15766 isjump = 0;
15767 nbytes -= min_insn_size (start);
15769 if (njumps < 0)
15770 abort ();
15771 if (rtl_dump_file)
15772 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15773 INSN_UID (start), INSN_UID (insn), nbytes);
15775 if (njumps == 3 && isjump && nbytes < 16)
15777 int padsize = 15 - nbytes + min_insn_size (insn);
15779 if (rtl_dump_file)
15780 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15781 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15786 /* Implement machine specific optimizations.
15787 At the moment we implement single transformation: AMD Athlon works faster
15788 when RET is not destination of conditional jump or directly preceded
15789 by other jump instruction. We avoid the penalty by inserting NOP just
15790 before the RET instructions in such cases. */
15791 static void
15792 ix86_reorg (void)
15794 edge e;
15796 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15797 return;
15798 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15800 basic_block bb = e->src;
15801 rtx ret = BB_END (bb);
15802 rtx prev;
15803 bool replace = false;
15805 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15806 || !maybe_hot_bb_p (bb))
15807 continue;
15808 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15809 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15810 break;
15811 if (prev && GET_CODE (prev) == CODE_LABEL)
15813 edge e;
15814 for (e = bb->pred; e; e = e->pred_next)
15815 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15816 && !(e->flags & EDGE_FALLTHRU))
15817 replace = true;
15819 if (!replace)
15821 prev = prev_active_insn (ret);
15822 if (prev
15823 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15824 || GET_CODE (prev) == CALL_INSN))
15825 replace = true;
15826 /* Empty functions get branch mispredict even when the jump destination
15827 is not visible to us. */
15828 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15829 replace = true;
15831 if (replace)
15833 emit_insn_before (gen_return_internal_long (), ret);
15834 delete_insn (ret);
15837 k8_avoid_jump_misspredicts ();
15840 /* Return nonzero when QImode register that must be represented via REX prefix
15841 is used. */
15842 bool
15843 x86_extended_QIreg_mentioned_p (rtx insn)
15845 int i;
15846 extract_insn_cached (insn);
15847 for (i = 0; i < recog_data.n_operands; i++)
15848 if (REG_P (recog_data.operand[i])
15849 && REGNO (recog_data.operand[i]) >= 4)
15850 return true;
15851 return false;
15854 /* Return nonzero when P points to register encoded via REX prefix.
15855 Called via for_each_rtx. */
15856 static int
15857 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15859 unsigned int regno;
15860 if (!REG_P (*p))
15861 return 0;
15862 regno = REGNO (*p);
15863 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15866 /* Return true when INSN mentions register that must be encoded using REX
15867 prefix. */
15868 bool
15869 x86_extended_reg_mentioned_p (rtx insn)
15871 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15874 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15875 optabs would emit if we didn't have TFmode patterns. */
15877 void
15878 x86_emit_floatuns (rtx operands[2])
15880 rtx neglab, donelab, i0, i1, f0, in, out;
15881 enum machine_mode mode, inmode;
15883 inmode = GET_MODE (operands[1]);
15884 if (inmode != SImode
15885 && inmode != DImode)
15886 abort ();
15888 out = operands[0];
15889 in = force_reg (inmode, operands[1]);
15890 mode = GET_MODE (out);
15891 neglab = gen_label_rtx ();
15892 donelab = gen_label_rtx ();
15893 i1 = gen_reg_rtx (Pmode);
15894 f0 = gen_reg_rtx (mode);
15896 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15898 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15899 emit_jump_insn (gen_jump (donelab));
15900 emit_barrier ();
15902 emit_label (neglab);
15904 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15905 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15906 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15907 expand_float (f0, i0, 0);
15908 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15910 emit_label (donelab);
15913 /* Return if we do not know how to pass TYPE solely in registers. */
15914 bool
15915 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15917 if (default_must_pass_in_stack (mode, type))
15918 return true;
15919 return (!TARGET_64BIT && type && mode == TImode);
15922 /* Initialize vector TARGET via VALS. */
15923 void
15924 ix86_expand_vector_init (rtx target, rtx vals)
15926 enum machine_mode mode = GET_MODE (target);
15927 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15928 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15929 int i;
15931 for (i = n_elts - 1; i >= 0; i--)
15932 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15933 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15934 break;
15936 /* Few special cases first...
15937 ... constants are best loaded from constant pool. */
15938 if (i < 0)
15940 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15941 return;
15944 /* ... values where only first field is non-constant are best loaded
15945 from the pool and overwriten via move later. */
15946 if (!i)
15948 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15949 GET_MODE_INNER (mode), 0);
15951 op = force_reg (mode, op);
15952 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15953 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15954 switch (GET_MODE (target))
15956 case V2DFmode:
15957 emit_insn (gen_sse2_movsd (target, target, op));
15958 break;
15959 case V4SFmode:
15960 emit_insn (gen_sse_movss (target, target, op));
15961 break;
15962 default:
15963 break;
15965 return;
15968 /* And the busy sequence doing rotations. */
15969 switch (GET_MODE (target))
15971 case V2DFmode:
15973 rtx vecop0 =
15974 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15975 rtx vecop1 =
15976 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15978 vecop0 = force_reg (V2DFmode, vecop0);
15979 vecop1 = force_reg (V2DFmode, vecop1);
15980 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15982 break;
15983 case V4SFmode:
15985 rtx vecop0 =
15986 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15987 rtx vecop1 =
15988 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15989 rtx vecop2 =
15990 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15991 rtx vecop3 =
15992 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15993 rtx tmp1 = gen_reg_rtx (V4SFmode);
15994 rtx tmp2 = gen_reg_rtx (V4SFmode);
15996 vecop0 = force_reg (V4SFmode, vecop0);
15997 vecop1 = force_reg (V4SFmode, vecop1);
15998 vecop2 = force_reg (V4SFmode, vecop2);
15999 vecop3 = force_reg (V4SFmode, vecop3);
16000 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16001 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16002 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16004 break;
16005 default:
16006 abort ();
16010 #include "gt-i386.h"