PR target/24428
[official-gcc.git] / gcc / config / i386 / i386.c
blob5fe9e3f0062503283e90d42c821c2cd64f8d14e2
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
55 #endif
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
63 : 4)
65 /* Processor costs (relative to an add) */
66 static const
67 struct processor_costs size_cost = { /* costs for tunning for size */
68 2, /* cost of an add instruction */
69 3, /* cost of a lea instruction */
70 2, /* variable shift costs */
71 3, /* constant shift costs */
72 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
73 0, /* cost of multiply per each bit set */
74 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
75 3, /* cost of movsx */
76 3, /* cost of movzx */
77 0, /* "large" insn */
78 2, /* MOVE_RATIO */
79 2, /* cost for loading QImode using movzbl */
80 {2, 2, 2}, /* cost of loading integer registers
81 in QImode, HImode and SImode.
82 Relative to reg-reg move (2). */
83 {2, 2, 2}, /* cost of storing integer registers */
84 2, /* cost of reg,reg fld/fst */
85 {2, 2, 2}, /* cost of loading fp registers
86 in SFmode, DFmode and XFmode */
87 {2, 2, 2}, /* cost of loading integer registers */
88 3, /* cost of moving MMX register */
89 {3, 3}, /* cost of loading MMX registers
90 in SImode and DImode */
91 {3, 3}, /* cost of storing MMX registers
92 in SImode and DImode */
93 3, /* cost of moving SSE register */
94 {3, 3, 3}, /* cost of loading SSE registers
95 in SImode, DImode and TImode */
96 {3, 3, 3}, /* cost of storing SSE registers
97 in SImode, DImode and TImode */
98 3, /* MMX or SSE register to integer */
99 0, /* size of prefetch block */
100 0, /* number of parallel prefetches */
101 1, /* Branch cost */
102 2, /* cost of FADD and FSUB insns. */
103 2, /* cost of FMUL instruction. */
104 2, /* cost of FDIV instruction. */
105 2, /* cost of FABS instruction. */
106 2, /* cost of FCHS instruction. */
107 2, /* cost of FSQRT instruction. */
110 /* Processor costs (relative to an add) */
111 static const
112 struct processor_costs i386_cost = { /* 386 specific costs */
113 1, /* cost of an add instruction */
114 1, /* cost of a lea instruction */
115 3, /* variable shift costs */
116 2, /* constant shift costs */
117 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
118 1, /* cost of multiply per each bit set */
119 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
120 3, /* cost of movsx */
121 2, /* cost of movzx */
122 15, /* "large" insn */
123 3, /* MOVE_RATIO */
124 4, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {8, 8, 8}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {8, 8, 8}, /* cost of loading integer registers */
133 2, /* cost of moving MMX register */
134 {4, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {4, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3, /* MMX or SSE register to integer */
144 0, /* size of prefetch block */
145 0, /* number of parallel prefetches */
146 1, /* Branch cost */
147 23, /* cost of FADD and FSUB insns. */
148 27, /* cost of FMUL instruction. */
149 88, /* cost of FDIV instruction. */
150 22, /* cost of FABS instruction. */
151 24, /* cost of FCHS instruction. */
152 122, /* cost of FSQRT instruction. */
155 static const
156 struct processor_costs i486_cost = { /* 486 specific costs */
157 1, /* cost of an add instruction */
158 1, /* cost of a lea instruction */
159 3, /* variable shift costs */
160 2, /* constant shift costs */
161 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
162 1, /* cost of multiply per each bit set */
163 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
164 3, /* cost of movsx */
165 2, /* cost of movzx */
166 15, /* "large" insn */
167 3, /* MOVE_RATIO */
168 4, /* cost for loading QImode using movzbl */
169 {2, 4, 2}, /* cost of loading integer registers
170 in QImode, HImode and SImode.
171 Relative to reg-reg move (2). */
172 {2, 4, 2}, /* cost of storing integer registers */
173 2, /* cost of reg,reg fld/fst */
174 {8, 8, 8}, /* cost of loading fp registers
175 in SFmode, DFmode and XFmode */
176 {8, 8, 8}, /* cost of loading integer registers */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of prefetch block */
189 0, /* number of parallel prefetches */
190 1, /* Branch cost */
191 8, /* cost of FADD and FSUB insns. */
192 16, /* cost of FMUL instruction. */
193 73, /* cost of FDIV instruction. */
194 3, /* cost of FABS instruction. */
195 3, /* cost of FCHS instruction. */
196 83, /* cost of FSQRT instruction. */
199 static const
200 struct processor_costs pentium_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 4, /* variable shift costs */
204 1, /* constant shift costs */
205 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
208 3, /* cost of movsx */
209 2, /* cost of movzx */
210 8, /* "large" insn */
211 6, /* MOVE_RATIO */
212 6, /* cost for loading QImode using movzbl */
213 {2, 4, 2}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 4, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 8, /* cost of moving MMX register */
222 {8, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {8, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of prefetch block */
233 0, /* number of parallel prefetches */
234 2, /* Branch cost */
235 3, /* cost of FADD and FSUB insns. */
236 3, /* cost of FMUL instruction. */
237 39, /* cost of FDIV instruction. */
238 1, /* cost of FABS instruction. */
239 1, /* cost of FCHS instruction. */
240 70, /* cost of FSQRT instruction. */
243 static const
244 struct processor_costs pentiumpro_cost = {
245 1, /* cost of an add instruction */
246 1, /* cost of a lea instruction */
247 1, /* variable shift costs */
248 1, /* constant shift costs */
249 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
250 0, /* cost of multiply per each bit set */
251 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
252 1, /* cost of movsx */
253 1, /* cost of movzx */
254 8, /* "large" insn */
255 6, /* MOVE_RATIO */
256 2, /* cost for loading QImode using movzbl */
257 {4, 4, 4}, /* cost of loading integer registers
258 in QImode, HImode and SImode.
259 Relative to reg-reg move (2). */
260 {2, 2, 2}, /* cost of storing integer registers */
261 2, /* cost of reg,reg fld/fst */
262 {2, 2, 6}, /* cost of loading fp registers
263 in SFmode, DFmode and XFmode */
264 {4, 4, 6}, /* cost of loading integer registers */
265 2, /* cost of moving MMX register */
266 {2, 2}, /* cost of loading MMX registers
267 in SImode and DImode */
268 {2, 2}, /* cost of storing MMX registers
269 in SImode and DImode */
270 2, /* cost of moving SSE register */
271 {2, 2, 8}, /* cost of loading SSE registers
272 in SImode, DImode and TImode */
273 {2, 2, 8}, /* cost of storing SSE registers
274 in SImode, DImode and TImode */
275 3, /* MMX or SSE register to integer */
276 32, /* size of prefetch block */
277 6, /* number of parallel prefetches */
278 2, /* Branch cost */
279 3, /* cost of FADD and FSUB insns. */
280 5, /* cost of FMUL instruction. */
281 56, /* cost of FDIV instruction. */
282 2, /* cost of FABS instruction. */
283 2, /* cost of FCHS instruction. */
284 56, /* cost of FSQRT instruction. */
287 static const
288 struct processor_costs k6_cost = {
289 1, /* cost of an add instruction */
290 2, /* cost of a lea instruction */
291 1, /* variable shift costs */
292 1, /* constant shift costs */
293 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
294 0, /* cost of multiply per each bit set */
295 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
296 2, /* cost of movsx */
297 2, /* cost of movzx */
298 8, /* "large" insn */
299 4, /* MOVE_RATIO */
300 3, /* cost for loading QImode using movzbl */
301 {4, 5, 4}, /* cost of loading integer registers
302 in QImode, HImode and SImode.
303 Relative to reg-reg move (2). */
304 {2, 3, 2}, /* cost of storing integer registers */
305 4, /* cost of reg,reg fld/fst */
306 {6, 6, 6}, /* cost of loading fp registers
307 in SFmode, DFmode and XFmode */
308 {4, 4, 4}, /* cost of loading integer registers */
309 2, /* cost of moving MMX register */
310 {2, 2}, /* cost of loading MMX registers
311 in SImode and DImode */
312 {2, 2}, /* cost of storing MMX registers
313 in SImode and DImode */
314 2, /* cost of moving SSE register */
315 {2, 2, 8}, /* cost of loading SSE registers
316 in SImode, DImode and TImode */
317 {2, 2, 8}, /* cost of storing SSE registers
318 in SImode, DImode and TImode */
319 6, /* MMX or SSE register to integer */
320 32, /* size of prefetch block */
321 1, /* number of parallel prefetches */
322 1, /* Branch cost */
323 2, /* cost of FADD and FSUB insns. */
324 2, /* cost of FMUL instruction. */
325 56, /* cost of FDIV instruction. */
326 2, /* cost of FABS instruction. */
327 2, /* cost of FCHS instruction. */
328 56, /* cost of FSQRT instruction. */
331 static const
332 struct processor_costs athlon_cost = {
333 1, /* cost of an add instruction */
334 2, /* cost of a lea instruction */
335 1, /* variable shift costs */
336 1, /* constant shift costs */
337 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
338 0, /* cost of multiply per each bit set */
339 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
340 1, /* cost of movsx */
341 1, /* cost of movzx */
342 8, /* "large" insn */
343 9, /* MOVE_RATIO */
344 4, /* cost for loading QImode using movzbl */
345 {3, 4, 3}, /* cost of loading integer registers
346 in QImode, HImode and SImode.
347 Relative to reg-reg move (2). */
348 {3, 4, 3}, /* cost of storing integer registers */
349 4, /* cost of reg,reg fld/fst */
350 {4, 4, 12}, /* cost of loading fp registers
351 in SFmode, DFmode and XFmode */
352 {6, 6, 8}, /* cost of loading integer registers */
353 2, /* cost of moving MMX register */
354 {4, 4}, /* cost of loading MMX registers
355 in SImode and DImode */
356 {4, 4}, /* cost of storing MMX registers
357 in SImode and DImode */
358 2, /* cost of moving SSE register */
359 {4, 4, 6}, /* cost of loading SSE registers
360 in SImode, DImode and TImode */
361 {4, 4, 5}, /* cost of storing SSE registers
362 in SImode, DImode and TImode */
363 5, /* MMX or SSE register to integer */
364 64, /* size of prefetch block */
365 6, /* number of parallel prefetches */
366 5, /* Branch cost */
367 4, /* cost of FADD and FSUB insns. */
368 4, /* cost of FMUL instruction. */
369 24, /* cost of FDIV instruction. */
370 2, /* cost of FABS instruction. */
371 2, /* cost of FCHS instruction. */
372 35, /* cost of FSQRT instruction. */
375 static const
376 struct processor_costs k8_cost = {
377 1, /* cost of an add instruction */
378 2, /* cost of a lea instruction */
379 1, /* variable shift costs */
380 1, /* constant shift costs */
381 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
382 0, /* cost of multiply per each bit set */
383 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
384 1, /* cost of movsx */
385 1, /* cost of movzx */
386 8, /* "large" insn */
387 9, /* MOVE_RATIO */
388 4, /* cost for loading QImode using movzbl */
389 {3, 4, 3}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {3, 4, 3}, /* cost of storing integer registers */
393 4, /* cost of reg,reg fld/fst */
394 {4, 4, 12}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {6, 6, 8}, /* cost of loading integer registers */
397 2, /* cost of moving MMX register */
398 {3, 3}, /* cost of loading MMX registers
399 in SImode and DImode */
400 {4, 4}, /* cost of storing MMX registers
401 in SImode and DImode */
402 2, /* cost of moving SSE register */
403 {4, 3, 6}, /* cost of loading SSE registers
404 in SImode, DImode and TImode */
405 {4, 4, 5}, /* cost of storing SSE registers
406 in SImode, DImode and TImode */
407 5, /* MMX or SSE register to integer */
408 64, /* size of prefetch block */
409 6, /* number of parallel prefetches */
410 5, /* Branch cost */
411 4, /* cost of FADD and FSUB insns. */
412 4, /* cost of FMUL instruction. */
413 19, /* cost of FDIV instruction. */
414 2, /* cost of FABS instruction. */
415 2, /* cost of FCHS instruction. */
416 35, /* cost of FSQRT instruction. */
419 static const
420 struct processor_costs pentium4_cost = {
421 1, /* cost of an add instruction */
422 3, /* cost of a lea instruction */
423 4, /* variable shift costs */
424 4, /* constant shift costs */
425 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
426 0, /* cost of multiply per each bit set */
427 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
428 1, /* cost of movsx */
429 1, /* cost of movzx */
430 16, /* "large" insn */
431 6, /* MOVE_RATIO */
432 2, /* cost for loading QImode using movzbl */
433 {4, 5, 4}, /* cost of loading integer registers
434 in QImode, HImode and SImode.
435 Relative to reg-reg move (2). */
436 {2, 3, 2}, /* cost of storing integer registers */
437 2, /* cost of reg,reg fld/fst */
438 {2, 2, 6}, /* cost of loading fp registers
439 in SFmode, DFmode and XFmode */
440 {4, 4, 6}, /* cost of loading integer registers */
441 2, /* cost of moving MMX register */
442 {2, 2}, /* cost of loading MMX registers
443 in SImode and DImode */
444 {2, 2}, /* cost of storing MMX registers
445 in SImode and DImode */
446 12, /* cost of moving SSE register */
447 {12, 12, 12}, /* cost of loading SSE registers
448 in SImode, DImode and TImode */
449 {2, 2, 8}, /* cost of storing SSE registers
450 in SImode, DImode and TImode */
451 10, /* MMX or SSE register to integer */
452 64, /* size of prefetch block */
453 6, /* number of parallel prefetches */
454 2, /* Branch cost */
455 5, /* cost of FADD and FSUB insns. */
456 7, /* cost of FMUL instruction. */
457 43, /* cost of FDIV instruction. */
458 2, /* cost of FABS instruction. */
459 2, /* cost of FCHS instruction. */
460 43, /* cost of FSQRT instruction. */
463 static const
464 struct processor_costs nocona_cost = {
465 1, /* cost of an add instruction */
466 1, /* cost of a lea instruction */
467 1, /* variable shift costs */
468 1, /* constant shift costs */
469 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
470 0, /* cost of multiply per each bit set */
471 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
472 1, /* cost of movsx */
473 1, /* cost of movzx */
474 16, /* "large" insn */
475 17, /* MOVE_RATIO */
476 4, /* cost for loading QImode using movzbl */
477 {4, 4, 4}, /* cost of loading integer registers
478 in QImode, HImode and SImode.
479 Relative to reg-reg move (2). */
480 {4, 4, 4}, /* cost of storing integer registers */
481 3, /* cost of reg,reg fld/fst */
482 {12, 12, 12}, /* cost of loading fp registers
483 in SFmode, DFmode and XFmode */
484 {4, 4, 4}, /* cost of loading integer registers */
485 6, /* cost of moving MMX register */
486 {12, 12}, /* cost of loading MMX registers
487 in SImode and DImode */
488 {12, 12}, /* cost of storing MMX registers
489 in SImode and DImode */
490 6, /* cost of moving SSE register */
491 {12, 12, 12}, /* cost of loading SSE registers
492 in SImode, DImode and TImode */
493 {12, 12, 12}, /* cost of storing SSE registers
494 in SImode, DImode and TImode */
495 8, /* MMX or SSE register to integer */
496 128, /* size of prefetch block */
497 8, /* number of parallel prefetches */
498 1, /* Branch cost */
499 6, /* cost of FADD and FSUB insns. */
500 8, /* cost of FMUL instruction. */
501 40, /* cost of FDIV instruction. */
502 3, /* cost of FABS instruction. */
503 3, /* cost of FCHS instruction. */
504 44, /* cost of FSQRT instruction. */
507 const struct processor_costs *ix86_cost = &pentium_cost;
509 /* Processor feature/optimization bitmasks. */
510 #define m_386 (1<<PROCESSOR_I386)
511 #define m_486 (1<<PROCESSOR_I486)
512 #define m_PENT (1<<PROCESSOR_PENTIUM)
513 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
514 #define m_K6 (1<<PROCESSOR_K6)
515 #define m_ATHLON (1<<PROCESSOR_ATHLON)
516 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
517 #define m_K8 (1<<PROCESSOR_K8)
518 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
519 #define m_NOCONA (1<<PROCESSOR_NOCONA)
521 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
522 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
523 const int x86_zero_extend_with_and = m_486 | m_PENT;
524 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
525 const int x86_double_with_add = ~m_386;
526 const int x86_use_bit_test = m_386;
527 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
528 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
529 const int x86_fisttp = m_NOCONA;
530 const int x86_3dnow_a = m_ATHLON_K8;
531 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
532 /* Branch hints were put in P4 based on simulation result. But
533 after P4 was made, no performance benefit was observed with
534 branch hints. It also increases the code size. As the result,
535 icc never generates branch hints. */
536 const int x86_branch_hints = 0;
537 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
538 const int x86_partial_reg_stall = m_PPRO;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
570 undefined. */
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
587 /* Compare and exchange was added for 80486. */
588 const int x86_cmpxchg = ~m_386;
589 /* Exchange and add was added for 80486. */
590 const int x86_xadd = ~m_386;
592 /* In case the average insn count for single function invocation is
593 lower than this constant, emit fast (but longer) prologue and
594 epilogue code. */
595 #define FAST_PROLOGUE_INSN_COUNT 20
597 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
598 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
602 /* Array of the smallest class containing reg number REGNO, indexed by
603 REGNO. Used by REGNO_REG_CLASS in i386.h. */
605 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
607 /* ax, dx, cx, bx */
608 AREG, DREG, CREG, BREG,
609 /* si, di, bp, sp */
610 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
611 /* FP registers */
612 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
614 /* arg pointer */
615 NON_Q_REGS,
616 /* flags, fpsr, dirflag, frame */
617 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
619 SSE_REGS, SSE_REGS,
620 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
621 MMX_REGS, MMX_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
625 SSE_REGS, SSE_REGS,
628 /* The "default" register map used in 32bit mode. */
630 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
632 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
633 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
634 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
635 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
636 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
638 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
641 static int const x86_64_int_parameter_registers[6] =
643 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
647 static int const x86_64_int_return_registers[4] =
649 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
652 /* The "default" register map used in 64bit mode. */
653 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
655 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
656 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
657 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
658 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
659 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
660 8,9,10,11,12,13,14,15, /* extended integer registers */
661 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
664 /* Define the register numbers to be used in Dwarf debugging information.
665 The SVR4 reference port C compiler uses the following register numbers
666 in its Dwarf output code:
667 0 for %eax (gcc regno = 0)
668 1 for %ecx (gcc regno = 2)
669 2 for %edx (gcc regno = 1)
670 3 for %ebx (gcc regno = 3)
671 4 for %esp (gcc regno = 7)
672 5 for %ebp (gcc regno = 6)
673 6 for %esi (gcc regno = 4)
674 7 for %edi (gcc regno = 5)
675 The following three DWARF register numbers are never generated by
676 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677 believes these numbers have these meanings.
678 8 for %eip (no gcc equivalent)
679 9 for %eflags (gcc regno = 17)
680 10 for %trapno (no gcc equivalent)
681 It is not at all clear how we should number the FP stack registers
682 for the x86 architecture. If the version of SDB on x86/svr4 were
683 a bit less brain dead with respect to floating-point then we would
684 have a precedent to follow with respect to DWARF register numbers
685 for x86 FP registers, but the SDB on x86/svr4 is so completely
686 broken with respect to FP registers that it is hardly worth thinking
687 of it as something to strive for compatibility with.
688 The version of x86/svr4 SDB I have at the moment does (partially)
689 seem to believe that DWARF register number 11 is associated with
690 the x86 register %st(0), but that's about all. Higher DWARF
691 register numbers don't seem to be associated with anything in
692 particular, and even for DWARF regno 11, SDB only seems to under-
693 stand that it should say that a variable lives in %st(0) (when
694 asked via an `=' command) if we said it was in DWARF regno 11,
695 but SDB still prints garbage when asked for the value of the
696 variable in question (via a `/' command).
697 (Also note that the labels SDB prints for various FP stack regs
698 when doing an `x' command are all wrong.)
699 Note that these problems generally don't affect the native SVR4
700 C compiler because it doesn't allow the use of -O with -g and
701 because when it is *not* optimizing, it allocates a memory
702 location for each floating-point variable, and the memory
703 location is what gets described in the DWARF AT_location
704 attribute for the variable in question.
705 Regardless of the severe mental illness of the x86/svr4 SDB, we
706 do something sensible here and we use the following DWARF
707 register numbers. Note that these are all stack-top-relative
708 numbers.
709 11 for %st(0) (gcc regno = 8)
710 12 for %st(1) (gcc regno = 9)
711 13 for %st(2) (gcc regno = 10)
712 14 for %st(3) (gcc regno = 11)
713 15 for %st(4) (gcc regno = 12)
714 16 for %st(5) (gcc regno = 13)
715 17 for %st(6) (gcc regno = 14)
716 18 for %st(7) (gcc regno = 15)
718 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
720 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
721 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
722 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
723 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
724 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
726 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
729 /* Test and compare insns in i386.md store the information needed to
730 generate branch and scc insns here. */
732 rtx ix86_compare_op0 = NULL_RTX;
733 rtx ix86_compare_op1 = NULL_RTX;
734 rtx ix86_compare_emitted = NULL_RTX;
736 /* Size of the register save area. */
737 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
739 /* Define the structure for the machine field in struct function. */
741 struct stack_local_entry GTY(())
743 unsigned short mode;
744 unsigned short n;
745 rtx rtl;
746 struct stack_local_entry *next;
749 /* Structure describing stack frame layout.
750 Stack grows downward:
752 [arguments]
753 <- ARG_POINTER
754 saved pc
756 saved frame pointer if frame_pointer_needed
757 <- HARD_FRAME_POINTER
758 [saved regs]
760 [padding1] \
762 [va_arg registers] (
763 > to_allocate <- FRAME_POINTER
764 [frame] (
766 [padding2] /
768 struct ix86_frame
770 int nregs;
771 int padding1;
772 int va_arg_size;
773 HOST_WIDE_INT frame;
774 int padding2;
775 int outgoing_arguments_size;
776 int red_zone_size;
778 HOST_WIDE_INT to_allocate;
779 /* The offsets relative to ARG_POINTER. */
780 HOST_WIDE_INT frame_pointer_offset;
781 HOST_WIDE_INT hard_frame_pointer_offset;
782 HOST_WIDE_INT stack_pointer_offset;
784 /* When save_regs_using_mov is set, emit prologue using
785 move instead of push instructions. */
786 bool save_regs_using_mov;
789 /* Code model option. */
790 enum cmodel ix86_cmodel;
791 /* Asm dialect. */
792 enum asm_dialect ix86_asm_dialect = ASM_ATT;
793 /* TLS dialext. */
794 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
796 /* Which unit we are generating floating point math for. */
797 enum fpmath_unit ix86_fpmath;
799 /* Which cpu are we scheduling for. */
800 enum processor_type ix86_tune;
801 /* Which instruction set architecture to use. */
802 enum processor_type ix86_arch;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 static int ix86_regparm;
810 /* Preferred alignment for stack boundary in bits. */
811 unsigned int ix86_preferred_stack_boundary;
813 /* Values 1-5: see jump.c */
814 int ix86_branch_cost;
816 /* Variables which are this size or smaller are put in the data/bss
817 or ldata/lbss sections. */
819 int ix86_section_threshold = 65536;
821 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
822 char internal_label_prefix[16];
823 int internal_label_prefix_len;
825 static bool ix86_handle_option (size_t, const char *, int);
826 static void output_pic_addr_const (FILE *, rtx, int);
827 static void put_condition_code (enum rtx_code, enum machine_mode,
828 int, int, FILE *);
829 static const char *get_some_local_dynamic_name (void);
830 static int get_some_local_dynamic_name_1 (rtx *, void *);
831 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
832 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
833 rtx *);
834 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
835 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
836 enum machine_mode);
837 static rtx get_thread_pointer (int);
838 static rtx legitimize_tls_address (rtx, enum tls_model, int);
839 static void get_pc_thunk_name (char [32], unsigned int);
840 static rtx gen_push (rtx);
841 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
842 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
843 static struct machine_function * ix86_init_machine_status (void);
844 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
845 static int ix86_nsaved_regs (void);
846 static void ix86_emit_save_regs (void);
847 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
848 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
849 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
850 static HOST_WIDE_INT ix86_GOT_alias_set (void);
851 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
852 static rtx ix86_expand_aligntest (rtx, int);
853 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
854 static int ix86_issue_rate (void);
855 static int ix86_adjust_cost (rtx, rtx, rtx, int);
856 static int ia32_multipass_dfa_lookahead (void);
857 static void ix86_init_mmx_sse_builtins (void);
858 static rtx x86_this_parameter (tree);
859 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
860 HOST_WIDE_INT, tree);
861 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
862 static void x86_file_start (void);
863 static void ix86_reorg (void);
864 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
865 static tree ix86_build_builtin_va_list (void);
866 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
867 tree, int *, int);
868 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
869 static bool ix86_vector_mode_supported_p (enum machine_mode);
871 static int ix86_address_cost (rtx);
872 static bool ix86_cannot_force_const_mem (rtx);
873 static rtx ix86_delegitimize_address (rtx);
875 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
877 struct builtin_description;
878 static rtx ix86_expand_sse_comi (const struct builtin_description *,
879 tree, rtx);
880 static rtx ix86_expand_sse_compare (const struct builtin_description *,
881 tree, rtx);
882 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
883 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
884 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
885 static rtx ix86_expand_store_builtin (enum insn_code, tree);
886 static rtx safe_vector_operand (rtx, enum machine_mode);
887 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
888 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
889 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
890 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
891 static int ix86_fp_comparison_cost (enum rtx_code code);
892 static unsigned int ix86_select_alt_pic_regnum (void);
893 static int ix86_save_reg (unsigned int, int);
894 static void ix86_compute_frame_layout (struct ix86_frame *);
895 static int ix86_comp_type_attributes (tree, tree);
896 static int ix86_function_regparm (tree, tree);
897 const struct attribute_spec ix86_attribute_table[];
898 static bool ix86_function_ok_for_sibcall (tree, tree);
899 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
900 static int ix86_value_regno (enum machine_mode, tree, tree);
901 static bool contains_128bit_aligned_vector_p (tree);
902 static rtx ix86_struct_value_rtx (tree, int);
903 static bool ix86_ms_bitfield_layout_p (tree);
904 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
905 static int extended_reg_mentioned_1 (rtx *, void *);
906 static bool ix86_rtx_costs (rtx, int, int, int *);
907 static int min_insn_size (rtx);
908 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
909 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
910 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
911 tree, bool);
912 static void ix86_init_builtins (void);
913 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
914 static const char *ix86_mangle_fundamental_type (tree);
915 static tree ix86_stack_protect_fail (void);
917 /* This function is only used on Solaris. */
918 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
919 ATTRIBUTE_UNUSED;
921 /* Register class used for passing given 64bit part of the argument.
922 These represent classes as documented by the PS ABI, with the exception
923 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
924 use SF or DFmode move instead of DImode to avoid reformatting penalties.
926 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
927 whenever possible (upper half does contain padding).
929 enum x86_64_reg_class
931 X86_64_NO_CLASS,
932 X86_64_INTEGER_CLASS,
933 X86_64_INTEGERSI_CLASS,
934 X86_64_SSE_CLASS,
935 X86_64_SSESF_CLASS,
936 X86_64_SSEDF_CLASS,
937 X86_64_SSEUP_CLASS,
938 X86_64_X87_CLASS,
939 X86_64_X87UP_CLASS,
940 X86_64_COMPLEX_X87_CLASS,
941 X86_64_MEMORY_CLASS
943 static const char * const x86_64_reg_class_name[] = {
944 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
945 "sseup", "x87", "x87up", "cplx87", "no"
948 #define MAX_CLASSES 4
950 /* Table of constants used by fldpi, fldln2, etc.... */
951 static REAL_VALUE_TYPE ext_80387_constants_table [5];
952 static bool ext_80387_constants_init = 0;
953 static void init_ext_80387_constants (void);
954 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
955 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
956 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
957 static void x86_64_elf_select_section (tree decl, int reloc,
958 unsigned HOST_WIDE_INT align)
959 ATTRIBUTE_UNUSED;
961 /* Initialize the GCC target structure. */
962 #undef TARGET_ATTRIBUTE_TABLE
963 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
964 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
965 # undef TARGET_MERGE_DECL_ATTRIBUTES
966 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
967 #endif
969 #undef TARGET_COMP_TYPE_ATTRIBUTES
970 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
972 #undef TARGET_INIT_BUILTINS
973 #define TARGET_INIT_BUILTINS ix86_init_builtins
974 #undef TARGET_EXPAND_BUILTIN
975 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
977 #undef TARGET_ASM_FUNCTION_EPILOGUE
978 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
980 #undef TARGET_ENCODE_SECTION_INFO
981 #ifndef SUBTARGET_ENCODE_SECTION_INFO
982 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
983 #else
984 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
985 #endif
987 #undef TARGET_ASM_OPEN_PAREN
988 #define TARGET_ASM_OPEN_PAREN ""
989 #undef TARGET_ASM_CLOSE_PAREN
990 #define TARGET_ASM_CLOSE_PAREN ""
992 #undef TARGET_ASM_ALIGNED_HI_OP
993 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
994 #undef TARGET_ASM_ALIGNED_SI_OP
995 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
996 #ifdef ASM_QUAD
997 #undef TARGET_ASM_ALIGNED_DI_OP
998 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
999 #endif
1001 #undef TARGET_ASM_UNALIGNED_HI_OP
1002 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1003 #undef TARGET_ASM_UNALIGNED_SI_OP
1004 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1005 #undef TARGET_ASM_UNALIGNED_DI_OP
1006 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1008 #undef TARGET_SCHED_ADJUST_COST
1009 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1010 #undef TARGET_SCHED_ISSUE_RATE
1011 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1012 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1013 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1014 ia32_multipass_dfa_lookahead
1016 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1017 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1019 #ifdef HAVE_AS_TLS
1020 #undef TARGET_HAVE_TLS
1021 #define TARGET_HAVE_TLS true
1022 #endif
1023 #undef TARGET_CANNOT_FORCE_CONST_MEM
1024 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1026 #undef TARGET_DELEGITIMIZE_ADDRESS
1027 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1029 #undef TARGET_MS_BITFIELD_LAYOUT_P
1030 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1032 #if TARGET_MACHO
1033 #undef TARGET_BINDS_LOCAL_P
1034 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1035 #endif
1037 #undef TARGET_ASM_OUTPUT_MI_THUNK
1038 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1039 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1042 #undef TARGET_ASM_FILE_START
1043 #define TARGET_ASM_FILE_START x86_file_start
1045 #undef TARGET_DEFAULT_TARGET_FLAGS
1046 #define TARGET_DEFAULT_TARGET_FLAGS \
1047 (TARGET_DEFAULT \
1048 | TARGET_64BIT_DEFAULT \
1049 | TARGET_SUBTARGET_DEFAULT \
1050 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1052 #undef TARGET_HANDLE_OPTION
1053 #define TARGET_HANDLE_OPTION ix86_handle_option
1055 #undef TARGET_RTX_COSTS
1056 #define TARGET_RTX_COSTS ix86_rtx_costs
1057 #undef TARGET_ADDRESS_COST
1058 #define TARGET_ADDRESS_COST ix86_address_cost
1060 #undef TARGET_FIXED_CONDITION_CODE_REGS
1061 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1062 #undef TARGET_CC_MODES_COMPATIBLE
1063 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1065 #undef TARGET_MACHINE_DEPENDENT_REORG
1066 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1068 #undef TARGET_BUILD_BUILTIN_VA_LIST
1069 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1071 #undef TARGET_MD_ASM_CLOBBERS
1072 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1074 #undef TARGET_PROMOTE_PROTOTYPES
1075 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1076 #undef TARGET_STRUCT_VALUE_RTX
1077 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1078 #undef TARGET_SETUP_INCOMING_VARARGS
1079 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1080 #undef TARGET_MUST_PASS_IN_STACK
1081 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1082 #undef TARGET_PASS_BY_REFERENCE
1083 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1085 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1086 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1088 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1089 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1091 #ifdef HAVE_AS_TLS
1092 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1093 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1094 #endif
1096 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1097 #undef TARGET_INSERT_ATTRIBUTES
1098 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1099 #endif
1101 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1102 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1104 #undef TARGET_STACK_PROTECT_FAIL
1105 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1107 #undef TARGET_FUNCTION_VALUE
1108 #define TARGET_FUNCTION_VALUE ix86_function_value
1110 struct gcc_target targetm = TARGET_INITIALIZER;
1113 /* The svr4 ABI for the i386 says that records and unions are returned
1114 in memory. */
1115 #ifndef DEFAULT_PCC_STRUCT_RETURN
1116 #define DEFAULT_PCC_STRUCT_RETURN 1
1117 #endif
1119 /* Implement TARGET_HANDLE_OPTION. */
1121 static bool
1122 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1124 switch (code)
1126 case OPT_m3dnow:
1127 if (!value)
1129 target_flags &= ~MASK_3DNOW_A;
1130 target_flags_explicit |= MASK_3DNOW_A;
1132 return true;
1134 case OPT_mmmx:
1135 if (!value)
1137 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1138 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1140 return true;
1142 case OPT_msse:
1143 if (!value)
1145 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1146 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1148 return true;
1150 case OPT_msse2:
1151 if (!value)
1153 target_flags &= ~MASK_SSE3;
1154 target_flags_explicit |= MASK_SSE3;
1156 return true;
1158 default:
1159 return true;
1163 /* Sometimes certain combinations of command options do not make
1164 sense on a particular target machine. You can define a macro
1165 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1166 defined, is executed once just after all the command options have
1167 been parsed.
1169 Don't use this macro to turn on various extra optimizations for
1170 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1172 void
1173 override_options (void)
1175 int i;
1176 int ix86_tune_defaulted = 0;
1178 /* Comes from final.c -- no real reason to change it. */
1179 #define MAX_CODE_ALIGN 16
1181 static struct ptt
1183 const struct processor_costs *cost; /* Processor costs */
1184 const int target_enable; /* Target flags to enable. */
1185 const int target_disable; /* Target flags to disable. */
1186 const int align_loop; /* Default alignments. */
1187 const int align_loop_max_skip;
1188 const int align_jump;
1189 const int align_jump_max_skip;
1190 const int align_func;
1192 const processor_target_table[PROCESSOR_max] =
1194 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1195 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1196 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1197 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1198 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1199 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1200 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1201 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1202 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1205 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1206 static struct pta
1208 const char *const name; /* processor name or nickname. */
1209 const enum processor_type processor;
1210 const enum pta_flags
1212 PTA_SSE = 1,
1213 PTA_SSE2 = 2,
1214 PTA_SSE3 = 4,
1215 PTA_MMX = 8,
1216 PTA_PREFETCH_SSE = 16,
1217 PTA_3DNOW = 32,
1218 PTA_3DNOW_A = 64,
1219 PTA_64BIT = 128
1220 } flags;
1222 const processor_alias_table[] =
1224 {"i386", PROCESSOR_I386, 0},
1225 {"i486", PROCESSOR_I486, 0},
1226 {"i586", PROCESSOR_PENTIUM, 0},
1227 {"pentium", PROCESSOR_PENTIUM, 0},
1228 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1229 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1230 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1231 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1232 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1233 {"i686", PROCESSOR_PENTIUMPRO, 0},
1234 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1235 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1236 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1237 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1238 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1239 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1240 | PTA_MMX | PTA_PREFETCH_SSE},
1241 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1242 | PTA_MMX | PTA_PREFETCH_SSE},
1243 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1244 | PTA_MMX | PTA_PREFETCH_SSE},
1245 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1246 | PTA_MMX | PTA_PREFETCH_SSE},
1247 {"k6", PROCESSOR_K6, PTA_MMX},
1248 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1249 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1250 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1251 | PTA_3DNOW_A},
1252 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1253 | PTA_3DNOW | PTA_3DNOW_A},
1254 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1255 | PTA_3DNOW_A | PTA_SSE},
1256 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1257 | PTA_3DNOW_A | PTA_SSE},
1258 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1259 | PTA_3DNOW_A | PTA_SSE},
1260 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1261 | PTA_SSE | PTA_SSE2 },
1262 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1263 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1264 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1265 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1266 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1267 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1268 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1269 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1272 int const pta_size = ARRAY_SIZE (processor_alias_table);
1274 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1275 SUBTARGET_OVERRIDE_OPTIONS;
1276 #endif
1278 /* Set the default values for switches whose default depends on TARGET_64BIT
1279 in case they weren't overwritten by command line options. */
1280 if (TARGET_64BIT)
1282 if (flag_omit_frame_pointer == 2)
1283 flag_omit_frame_pointer = 1;
1284 if (flag_asynchronous_unwind_tables == 2)
1285 flag_asynchronous_unwind_tables = 1;
1286 if (flag_pcc_struct_return == 2)
1287 flag_pcc_struct_return = 0;
1289 else
1291 if (flag_omit_frame_pointer == 2)
1292 flag_omit_frame_pointer = 0;
1293 if (flag_asynchronous_unwind_tables == 2)
1294 flag_asynchronous_unwind_tables = 0;
1295 if (flag_pcc_struct_return == 2)
1296 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1299 if (!ix86_tune_string && ix86_arch_string)
1300 ix86_tune_string = ix86_arch_string;
1301 if (!ix86_tune_string)
1303 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1304 ix86_tune_defaulted = 1;
1306 if (!ix86_arch_string)
1307 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1309 if (ix86_cmodel_string != 0)
1311 if (!strcmp (ix86_cmodel_string, "small"))
1312 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1313 else if (!strcmp (ix86_cmodel_string, "medium"))
1314 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1315 else if (flag_pic)
1316 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1317 else if (!strcmp (ix86_cmodel_string, "32"))
1318 ix86_cmodel = CM_32;
1319 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1320 ix86_cmodel = CM_KERNEL;
1321 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1322 ix86_cmodel = CM_LARGE;
1323 else
1324 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1326 else
1328 ix86_cmodel = CM_32;
1329 if (TARGET_64BIT)
1330 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1332 if (ix86_asm_string != 0)
1334 if (!strcmp (ix86_asm_string, "intel"))
1335 ix86_asm_dialect = ASM_INTEL;
1336 else if (!strcmp (ix86_asm_string, "att"))
1337 ix86_asm_dialect = ASM_ATT;
1338 else
1339 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1341 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1342 error ("code model %qs not supported in the %s bit mode",
1343 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1344 if (ix86_cmodel == CM_LARGE)
1345 sorry ("code model %<large%> not supported yet");
1346 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1347 sorry ("%i-bit mode not compiled in",
1348 (target_flags & MASK_64BIT) ? 64 : 32);
1350 for (i = 0; i < pta_size; i++)
1351 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1353 ix86_arch = processor_alias_table[i].processor;
1354 /* Default cpu tuning to the architecture. */
1355 ix86_tune = ix86_arch;
1356 if (processor_alias_table[i].flags & PTA_MMX
1357 && !(target_flags_explicit & MASK_MMX))
1358 target_flags |= MASK_MMX;
1359 if (processor_alias_table[i].flags & PTA_3DNOW
1360 && !(target_flags_explicit & MASK_3DNOW))
1361 target_flags |= MASK_3DNOW;
1362 if (processor_alias_table[i].flags & PTA_3DNOW_A
1363 && !(target_flags_explicit & MASK_3DNOW_A))
1364 target_flags |= MASK_3DNOW_A;
1365 if (processor_alias_table[i].flags & PTA_SSE
1366 && !(target_flags_explicit & MASK_SSE))
1367 target_flags |= MASK_SSE;
1368 if (processor_alias_table[i].flags & PTA_SSE2
1369 && !(target_flags_explicit & MASK_SSE2))
1370 target_flags |= MASK_SSE2;
1371 if (processor_alias_table[i].flags & PTA_SSE3
1372 && !(target_flags_explicit & MASK_SSE3))
1373 target_flags |= MASK_SSE3;
1374 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1375 x86_prefetch_sse = true;
1376 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1377 error ("CPU you selected does not support x86-64 "
1378 "instruction set");
1379 break;
1382 if (i == pta_size)
1383 error ("bad value (%s) for -march= switch", ix86_arch_string);
1385 for (i = 0; i < pta_size; i++)
1386 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1388 ix86_tune = processor_alias_table[i].processor;
1389 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1391 if (ix86_tune_defaulted)
1393 ix86_tune_string = "x86-64";
1394 for (i = 0; i < pta_size; i++)
1395 if (! strcmp (ix86_tune_string,
1396 processor_alias_table[i].name))
1397 break;
1398 ix86_tune = processor_alias_table[i].processor;
1400 else
1401 error ("CPU you selected does not support x86-64 "
1402 "instruction set");
1404 /* Intel CPUs have always interpreted SSE prefetch instructions as
1405 NOPs; so, we can enable SSE prefetch instructions even when
1406 -mtune (rather than -march) points us to a processor that has them.
1407 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1408 higher processors. */
1409 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1410 x86_prefetch_sse = true;
1411 break;
1413 if (i == pta_size)
1414 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1416 if (optimize_size)
1417 ix86_cost = &size_cost;
1418 else
1419 ix86_cost = processor_target_table[ix86_tune].cost;
1420 target_flags |= processor_target_table[ix86_tune].target_enable;
1421 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1423 /* Arrange to set up i386_stack_locals for all functions. */
1424 init_machine_status = ix86_init_machine_status;
1426 /* Validate -mregparm= value. */
1427 if (ix86_regparm_string)
1429 i = atoi (ix86_regparm_string);
1430 if (i < 0 || i > REGPARM_MAX)
1431 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1432 else
1433 ix86_regparm = i;
1435 else
1436 if (TARGET_64BIT)
1437 ix86_regparm = REGPARM_MAX;
1439 /* If the user has provided any of the -malign-* options,
1440 warn and use that value only if -falign-* is not set.
1441 Remove this code in GCC 3.2 or later. */
1442 if (ix86_align_loops_string)
1444 warning (0, "-malign-loops is obsolete, use -falign-loops");
1445 if (align_loops == 0)
1447 i = atoi (ix86_align_loops_string);
1448 if (i < 0 || i > MAX_CODE_ALIGN)
1449 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1450 else
1451 align_loops = 1 << i;
1455 if (ix86_align_jumps_string)
1457 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1458 if (align_jumps == 0)
1460 i = atoi (ix86_align_jumps_string);
1461 if (i < 0 || i > MAX_CODE_ALIGN)
1462 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1463 else
1464 align_jumps = 1 << i;
1468 if (ix86_align_funcs_string)
1470 warning (0, "-malign-functions is obsolete, use -falign-functions");
1471 if (align_functions == 0)
1473 i = atoi (ix86_align_funcs_string);
1474 if (i < 0 || i > MAX_CODE_ALIGN)
1475 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1476 else
1477 align_functions = 1 << i;
1481 /* Default align_* from the processor table. */
1482 if (align_loops == 0)
1484 align_loops = processor_target_table[ix86_tune].align_loop;
1485 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1487 if (align_jumps == 0)
1489 align_jumps = processor_target_table[ix86_tune].align_jump;
1490 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1492 if (align_functions == 0)
1494 align_functions = processor_target_table[ix86_tune].align_func;
1497 /* Validate -mpreferred-stack-boundary= value, or provide default.
1498 The default of 128 bits is for Pentium III's SSE __m128, but we
1499 don't want additional code to keep the stack aligned when
1500 optimizing for code size. */
1501 ix86_preferred_stack_boundary = (optimize_size
1502 ? TARGET_64BIT ? 128 : 32
1503 : 128);
1504 if (ix86_preferred_stack_boundary_string)
1506 i = atoi (ix86_preferred_stack_boundary_string);
1507 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1508 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1509 TARGET_64BIT ? 4 : 2);
1510 else
1511 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1514 /* Validate -mbranch-cost= value, or provide default. */
1515 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1516 if (ix86_branch_cost_string)
1518 i = atoi (ix86_branch_cost_string);
1519 if (i < 0 || i > 5)
1520 error ("-mbranch-cost=%d is not between 0 and 5", i);
1521 else
1522 ix86_branch_cost = i;
1524 if (ix86_section_threshold_string)
1526 i = atoi (ix86_section_threshold_string);
1527 if (i < 0)
1528 error ("-mlarge-data-threshold=%d is negative", i);
1529 else
1530 ix86_section_threshold = i;
1533 if (ix86_tls_dialect_string)
1535 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1536 ix86_tls_dialect = TLS_DIALECT_GNU;
1537 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1538 ix86_tls_dialect = TLS_DIALECT_SUN;
1539 else
1540 error ("bad value (%s) for -mtls-dialect= switch",
1541 ix86_tls_dialect_string);
1544 /* Keep nonleaf frame pointers. */
1545 if (flag_omit_frame_pointer)
1546 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1547 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1548 flag_omit_frame_pointer = 1;
1550 /* If we're doing fast math, we don't care about comparison order
1551 wrt NaNs. This lets us use a shorter comparison sequence. */
1552 if (flag_unsafe_math_optimizations)
1553 target_flags &= ~MASK_IEEE_FP;
1555 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1556 since the insns won't need emulation. */
1557 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1558 target_flags &= ~MASK_NO_FANCY_MATH_387;
1560 /* Likewise, if the target doesn't have a 387, or we've specified
1561 software floating point, don't use 387 inline intrinsics. */
1562 if (!TARGET_80387)
1563 target_flags |= MASK_NO_FANCY_MATH_387;
1565 /* Turn on SSE2 builtins for -msse3. */
1566 if (TARGET_SSE3)
1567 target_flags |= MASK_SSE2;
1569 /* Turn on SSE builtins for -msse2. */
1570 if (TARGET_SSE2)
1571 target_flags |= MASK_SSE;
1573 /* Turn on MMX builtins for -msse. */
1574 if (TARGET_SSE)
1576 target_flags |= MASK_MMX & ~target_flags_explicit;
1577 x86_prefetch_sse = true;
1580 /* Turn on MMX builtins for 3Dnow. */
1581 if (TARGET_3DNOW)
1582 target_flags |= MASK_MMX;
1584 if (TARGET_64BIT)
1586 if (TARGET_ALIGN_DOUBLE)
1587 error ("-malign-double makes no sense in the 64bit mode");
1588 if (TARGET_RTD)
1589 error ("-mrtd calling convention not supported in the 64bit mode");
1591 /* Enable by default the SSE and MMX builtins. Do allow the user to
1592 explicitly disable any of these. In particular, disabling SSE and
1593 MMX for kernel code is extremely useful. */
1594 target_flags
1595 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1596 & ~target_flags_explicit);
1598 else
1600 /* i386 ABI does not specify red zone. It still makes sense to use it
1601 when programmer takes care to stack from being destroyed. */
1602 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1603 target_flags |= MASK_NO_RED_ZONE;
1606 /* Accept -msseregparm only if at least SSE support is enabled. */
1607 if (TARGET_SSEREGPARM
1608 && ! TARGET_SSE)
1609 error ("-msseregparm used without SSE enabled");
1611 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1613 if (ix86_fpmath_string != 0)
1615 if (! strcmp (ix86_fpmath_string, "387"))
1616 ix86_fpmath = FPMATH_387;
1617 else if (! strcmp (ix86_fpmath_string, "sse"))
1619 if (!TARGET_SSE)
1621 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1622 ix86_fpmath = FPMATH_387;
1624 else
1625 ix86_fpmath = FPMATH_SSE;
1627 else if (! strcmp (ix86_fpmath_string, "387,sse")
1628 || ! strcmp (ix86_fpmath_string, "sse,387"))
1630 if (!TARGET_SSE)
1632 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1633 ix86_fpmath = FPMATH_387;
1635 else if (!TARGET_80387)
1637 warning (0, "387 instruction set disabled, using SSE arithmetics");
1638 ix86_fpmath = FPMATH_SSE;
1640 else
1641 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1643 else
1644 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1647 /* If the i387 is disabled, then do not return values in it. */
1648 if (!TARGET_80387)
1649 target_flags &= ~MASK_FLOAT_RETURNS;
1651 if ((x86_accumulate_outgoing_args & TUNEMASK)
1652 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1653 && !optimize_size)
1654 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1656 /* ??? Unwind info is not correct around the CFG unless either a frame
1657 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1658 unwind info generation to be aware of the CFG and propagating states
1659 around edges. */
1660 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1661 || flag_exceptions || flag_non_call_exceptions)
1662 && flag_omit_frame_pointer
1663 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1665 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1666 warning (0, "unwind tables currently require either a frame pointer "
1667 "or -maccumulate-outgoing-args for correctness");
1668 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1671 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1673 char *p;
1674 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1675 p = strchr (internal_label_prefix, 'X');
1676 internal_label_prefix_len = p - internal_label_prefix;
1677 *p = '\0';
1680 /* When scheduling description is not available, disable scheduler pass
1681 so it won't slow down the compilation and make x87 code slower. */
1682 if (!TARGET_SCHEDULE)
1683 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1686 /* switch to the appropriate section for output of DECL.
1687 DECL is either a `VAR_DECL' node or a constant of some sort.
1688 RELOC indicates whether forming the initial value of DECL requires
1689 link-time relocations. */
1691 static void
1692 x86_64_elf_select_section (tree decl, int reloc,
1693 unsigned HOST_WIDE_INT align)
1695 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1696 && ix86_in_large_data_p (decl))
1698 const char *sname = NULL;
1699 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1701 case SECCAT_DATA:
1702 sname = ".ldata";
1703 break;
1704 case SECCAT_DATA_REL:
1705 sname = ".ldata.rel";
1706 break;
1707 case SECCAT_DATA_REL_LOCAL:
1708 sname = ".ldata.rel.local";
1709 break;
1710 case SECCAT_DATA_REL_RO:
1711 sname = ".ldata.rel.ro";
1712 break;
1713 case SECCAT_DATA_REL_RO_LOCAL:
1714 sname = ".ldata.rel.ro.local";
1715 break;
1716 case SECCAT_BSS:
1717 sname = ".lbss";
1718 break;
1719 case SECCAT_RODATA:
1720 case SECCAT_RODATA_MERGE_STR:
1721 case SECCAT_RODATA_MERGE_STR_INIT:
1722 case SECCAT_RODATA_MERGE_CONST:
1723 sname = ".lrodata";
1724 break;
1725 case SECCAT_SRODATA:
1726 case SECCAT_SDATA:
1727 case SECCAT_SBSS:
1728 gcc_unreachable ();
1729 case SECCAT_TEXT:
1730 case SECCAT_TDATA:
1731 case SECCAT_TBSS:
1732 /* We don't split these for medium model. Place them into
1733 default sections and hope for best. */
1734 break;
1736 if (sname)
1738 named_section (decl, sname, reloc);
1739 return;
1742 default_elf_select_section (decl, reloc, align);
1745 /* Build up a unique section name, expressed as a
1746 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
1747 RELOC indicates whether the initial value of EXP requires
1748 link-time relocations. */
1750 static void
1751 x86_64_elf_unique_section (tree decl, int reloc)
1753 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1754 && ix86_in_large_data_p (decl))
1756 const char *prefix = NULL;
1757 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
1758 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
1760 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1762 case SECCAT_DATA:
1763 case SECCAT_DATA_REL:
1764 case SECCAT_DATA_REL_LOCAL:
1765 case SECCAT_DATA_REL_RO:
1766 case SECCAT_DATA_REL_RO_LOCAL:
1767 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
1768 break;
1769 case SECCAT_BSS:
1770 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
1771 break;
1772 case SECCAT_RODATA:
1773 case SECCAT_RODATA_MERGE_STR:
1774 case SECCAT_RODATA_MERGE_STR_INIT:
1775 case SECCAT_RODATA_MERGE_CONST:
1776 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
1777 break;
1778 case SECCAT_SRODATA:
1779 case SECCAT_SDATA:
1780 case SECCAT_SBSS:
1781 gcc_unreachable ();
1782 case SECCAT_TEXT:
1783 case SECCAT_TDATA:
1784 case SECCAT_TBSS:
1785 /* We don't split these for medium model. Place them into
1786 default sections and hope for best. */
1787 break;
1789 if (prefix)
1791 const char *name;
1792 size_t nlen, plen;
1793 char *string;
1794 plen = strlen (prefix);
1796 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1797 name = targetm.strip_name_encoding (name);
1798 nlen = strlen (name);
1800 string = alloca (nlen + plen + 1);
1801 memcpy (string, prefix, plen);
1802 memcpy (string + plen, name, nlen + 1);
1804 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
1805 return;
1808 default_unique_section (decl, reloc);
1811 #ifdef COMMON_ASM_OP
1812 /* This says how to output assembler code to declare an
1813 uninitialized external linkage data object.
1815 For medium model x86-64 we need to use .largecomm opcode for
1816 large objects. */
1817 void
1818 x86_elf_aligned_common (FILE *file,
1819 const char *name, unsigned HOST_WIDE_INT size,
1820 int align)
1822 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1823 && size > (unsigned int)ix86_section_threshold)
1824 fprintf (file, ".largecomm\t");
1825 else
1826 fprintf (file, "%s", COMMON_ASM_OP);
1827 assemble_name (file, name);
1828 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
1829 size, align / BITS_PER_UNIT);
1832 /* Utility function for targets to use in implementing
1833 ASM_OUTPUT_ALIGNED_BSS. */
1835 void
1836 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
1837 const char *name, unsigned HOST_WIDE_INT size,
1838 int align)
1840 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1841 && size > (unsigned int)ix86_section_threshold)
1842 named_section (decl, ".lbss", 0);
1843 else
1844 bss_section ();
1845 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
1846 #ifdef ASM_DECLARE_OBJECT_NAME
1847 last_assemble_variable_decl = decl;
1848 ASM_DECLARE_OBJECT_NAME (file, name, decl);
1849 #else
1850 /* Standard thing is just output label for the object. */
1851 ASM_OUTPUT_LABEL (file, name);
1852 #endif /* ASM_DECLARE_OBJECT_NAME */
1853 ASM_OUTPUT_SKIP (file, size ? size : 1);
1855 #endif
1857 void
1858 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1860 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1861 make the problem with not enough registers even worse. */
1862 #ifdef INSN_SCHEDULING
1863 if (level > 1)
1864 flag_schedule_insns = 0;
1865 #endif
1867 if (TARGET_MACHO)
1868 /* The Darwin libraries never set errno, so we might as well
1869 avoid calling them when that's the only reason we would. */
1870 flag_errno_math = 0;
1872 /* The default values of these switches depend on the TARGET_64BIT
1873 that is not known at this moment. Mark these values with 2 and
1874 let user the to override these. In case there is no command line option
1875 specifying them, we will set the defaults in override_options. */
1876 if (optimize >= 1)
1877 flag_omit_frame_pointer = 2;
1878 flag_pcc_struct_return = 2;
1879 flag_asynchronous_unwind_tables = 2;
1880 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1881 SUBTARGET_OPTIMIZATION_OPTIONS;
1882 #endif
1885 /* Table of valid machine attributes. */
1886 const struct attribute_spec ix86_attribute_table[] =
1888 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1889 /* Stdcall attribute says callee is responsible for popping arguments
1890 if they are not variable. */
1891 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1892 /* Fastcall attribute says callee is responsible for popping arguments
1893 if they are not variable. */
1894 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1895 /* Cdecl attribute says the callee is a normal C declaration */
1896 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1897 /* Regparm attribute specifies how many integer arguments are to be
1898 passed in registers. */
1899 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
1900 /* Sseregparm attribute says we are using x86_64 calling conventions
1901 for FP arguments. */
1902 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1903 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1904 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1905 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1906 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1907 #endif
1908 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1909 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1910 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1911 SUBTARGET_ATTRIBUTE_TABLE,
1912 #endif
1913 { NULL, 0, 0, false, false, false, NULL }
1916 /* Decide whether we can make a sibling call to a function. DECL is the
1917 declaration of the function being targeted by the call and EXP is the
1918 CALL_EXPR representing the call. */
1920 static bool
1921 ix86_function_ok_for_sibcall (tree decl, tree exp)
1923 tree func;
1924 rtx a, b;
1926 /* If we are generating position-independent code, we cannot sibcall
1927 optimize any indirect call, or a direct call to a global function,
1928 as the PLT requires %ebx be live. */
1929 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1930 return false;
1932 if (decl)
1933 func = decl;
1934 else
1936 func = TREE_TYPE (TREE_OPERAND (exp, 0));
1937 if (POINTER_TYPE_P (func))
1938 func = TREE_TYPE (func);
1941 /* Check that the return value locations are the same. Like
1942 if we are returning floats on the 80387 register stack, we cannot
1943 make a sibcall from a function that doesn't return a float to a
1944 function that does or, conversely, from a function that does return
1945 a float to a function that doesn't; the necessary stack adjustment
1946 would not be executed. This is also the place we notice
1947 differences in the return value ABI. Note that it is ok for one
1948 of the functions to have void return type as long as the return
1949 value of the other is passed in a register. */
1950 a = ix86_function_value (TREE_TYPE (exp), func, false);
1951 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1952 cfun->decl, false);
1953 if (STACK_REG_P (a) || STACK_REG_P (b))
1955 if (!rtx_equal_p (a, b))
1956 return false;
1958 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1960 else if (!rtx_equal_p (a, b))
1961 return false;
1963 /* If this call is indirect, we'll need to be able to use a call-clobbered
1964 register for the address of the target function. Make sure that all
1965 such registers are not used for passing parameters. */
1966 if (!decl && !TARGET_64BIT)
1968 tree type;
1970 /* We're looking at the CALL_EXPR, we need the type of the function. */
1971 type = TREE_OPERAND (exp, 0); /* pointer expression */
1972 type = TREE_TYPE (type); /* pointer type */
1973 type = TREE_TYPE (type); /* function type */
1975 if (ix86_function_regparm (type, NULL) >= 3)
1977 /* ??? Need to count the actual number of registers to be used,
1978 not the possible number of registers. Fix later. */
1979 return false;
1983 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1984 /* Dllimport'd functions are also called indirectly. */
1985 if (decl && DECL_DLLIMPORT_P (decl)
1986 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1987 return false;
1988 #endif
1990 /* Otherwise okay. That also includes certain types of indirect calls. */
1991 return true;
1994 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
1995 calling convention attributes;
1996 arguments as in struct attribute_spec.handler. */
1998 static tree
1999 ix86_handle_cconv_attribute (tree *node, tree name,
2000 tree args,
2001 int flags ATTRIBUTE_UNUSED,
2002 bool *no_add_attrs)
2004 if (TREE_CODE (*node) != FUNCTION_TYPE
2005 && TREE_CODE (*node) != METHOD_TYPE
2006 && TREE_CODE (*node) != FIELD_DECL
2007 && TREE_CODE (*node) != TYPE_DECL)
2009 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2010 IDENTIFIER_POINTER (name));
2011 *no_add_attrs = true;
2012 return NULL_TREE;
2015 /* Can combine regparm with all attributes but fastcall. */
2016 if (is_attribute_p ("regparm", name))
2018 tree cst;
2020 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2022 error ("fastcall and regparm attributes are not compatible");
2025 cst = TREE_VALUE (args);
2026 if (TREE_CODE (cst) != INTEGER_CST)
2028 warning (OPT_Wattributes,
2029 "%qs attribute requires an integer constant argument",
2030 IDENTIFIER_POINTER (name));
2031 *no_add_attrs = true;
2033 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2035 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2036 IDENTIFIER_POINTER (name), REGPARM_MAX);
2037 *no_add_attrs = true;
2040 return NULL_TREE;
2043 if (TARGET_64BIT)
2045 warning (OPT_Wattributes, "%qs attribute ignored",
2046 IDENTIFIER_POINTER (name));
2047 *no_add_attrs = true;
2048 return NULL_TREE;
2051 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2052 if (is_attribute_p ("fastcall", name))
2054 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2056 error ("fastcall and cdecl attributes are not compatible");
2058 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2060 error ("fastcall and stdcall attributes are not compatible");
2062 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2064 error ("fastcall and regparm attributes are not compatible");
2068 /* Can combine stdcall with fastcall (redundant), regparm and
2069 sseregparm. */
2070 else if (is_attribute_p ("stdcall", name))
2072 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2074 error ("stdcall and cdecl attributes are not compatible");
2076 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2078 error ("stdcall and fastcall attributes are not compatible");
2082 /* Can combine cdecl with regparm and sseregparm. */
2083 else if (is_attribute_p ("cdecl", name))
2085 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2087 error ("stdcall and cdecl attributes are not compatible");
2089 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2091 error ("fastcall and cdecl attributes are not compatible");
2095 /* Can combine sseregparm with all attributes. */
2097 return NULL_TREE;
2100 /* Return 0 if the attributes for two types are incompatible, 1 if they
2101 are compatible, and 2 if they are nearly compatible (which causes a
2102 warning to be generated). */
2104 static int
2105 ix86_comp_type_attributes (tree type1, tree type2)
2107 /* Check for mismatch of non-default calling convention. */
2108 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2110 if (TREE_CODE (type1) != FUNCTION_TYPE)
2111 return 1;
2113 /* Check for mismatched fastcall/regparm types. */
2114 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2115 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2116 || (ix86_function_regparm (type1, NULL)
2117 != ix86_function_regparm (type2, NULL)))
2118 return 0;
2120 /* Check for mismatched sseregparm types. */
2121 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2122 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2123 return 0;
2125 /* Check for mismatched return types (cdecl vs stdcall). */
2126 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2127 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2128 return 0;
2130 return 1;
2133 /* Return the regparm value for a function with the indicated TYPE and DECL.
2134 DECL may be NULL when calling function indirectly
2135 or considering a libcall. */
2137 static int
2138 ix86_function_regparm (tree type, tree decl)
2140 tree attr;
2141 int regparm = ix86_regparm;
2142 bool user_convention = false;
2144 if (!TARGET_64BIT)
2146 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2147 if (attr)
2149 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2150 user_convention = true;
2153 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2155 regparm = 2;
2156 user_convention = true;
2159 /* Use register calling convention for local functions when possible. */
2160 if (!TARGET_64BIT && !user_convention && decl
2161 && flag_unit_at_a_time && !profile_flag)
2163 struct cgraph_local_info *i = cgraph_local_info (decl);
2164 if (i && i->local)
2166 int local_regparm, globals = 0, regno;
2168 /* Make sure no regparm register is taken by a global register
2169 variable. */
2170 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2171 if (global_regs[local_regparm])
2172 break;
2173 /* We can't use regparm(3) for nested functions as these use
2174 static chain pointer in third argument. */
2175 if (local_regparm == 3
2176 && decl_function_context (decl)
2177 && !DECL_NO_STATIC_CHAIN (decl))
2178 local_regparm = 2;
2179 /* Each global register variable increases register preassure,
2180 so the more global reg vars there are, the smaller regparm
2181 optimization use, unless requested by the user explicitly. */
2182 for (regno = 0; regno < 6; regno++)
2183 if (global_regs[regno])
2184 globals++;
2185 local_regparm
2186 = globals < local_regparm ? local_regparm - globals : 0;
2188 if (local_regparm > regparm)
2189 regparm = local_regparm;
2193 return regparm;
2196 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2197 in SSE registers for a function with the indicated TYPE and DECL.
2198 DECL may be NULL when calling function indirectly
2199 or considering a libcall. Otherwise return 0. */
2201 static int
2202 ix86_function_sseregparm (tree type, tree decl)
2204 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2205 by the sseregparm attribute. */
2206 if (TARGET_SSEREGPARM
2207 || (type
2208 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2210 if (!TARGET_SSE)
2212 if (decl)
2213 error ("Calling %qD with attribute sseregparm without "
2214 "SSE/SSE2 enabled", decl);
2215 else
2216 error ("Calling %qT with attribute sseregparm without "
2217 "SSE/SSE2 enabled", type);
2218 return 0;
2221 return 2;
2224 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2225 in SSE registers even for 32-bit mode and not just 3, but up to
2226 8 SSE arguments in registers. */
2227 if (!TARGET_64BIT && decl
2228 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2230 struct cgraph_local_info *i = cgraph_local_info (decl);
2231 if (i && i->local)
2232 return TARGET_SSE2 ? 2 : 1;
2235 return 0;
2238 /* Return true if EAX is live at the start of the function. Used by
2239 ix86_expand_prologue to determine if we need special help before
2240 calling allocate_stack_worker. */
2242 static bool
2243 ix86_eax_live_at_start_p (void)
2245 /* Cheat. Don't bother working forward from ix86_function_regparm
2246 to the function type to whether an actual argument is located in
2247 eax. Instead just look at cfg info, which is still close enough
2248 to correct at this point. This gives false positives for broken
2249 functions that might use uninitialized data that happens to be
2250 allocated in eax, but who cares? */
2251 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2254 /* Value is the number of bytes of arguments automatically
2255 popped when returning from a subroutine call.
2256 FUNDECL is the declaration node of the function (as a tree),
2257 FUNTYPE is the data type of the function (as a tree),
2258 or for a library call it is an identifier node for the subroutine name.
2259 SIZE is the number of bytes of arguments passed on the stack.
2261 On the 80386, the RTD insn may be used to pop them if the number
2262 of args is fixed, but if the number is variable then the caller
2263 must pop them all. RTD can't be used for library calls now
2264 because the library is compiled with the Unix compiler.
2265 Use of RTD is a selectable option, since it is incompatible with
2266 standard Unix calling sequences. If the option is not selected,
2267 the caller must always pop the args.
2269 The attribute stdcall is equivalent to RTD on a per module basis. */
2272 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2274 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2276 /* Cdecl functions override -mrtd, and never pop the stack. */
2277 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2279 /* Stdcall and fastcall functions will pop the stack if not
2280 variable args. */
2281 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2282 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2283 rtd = 1;
2285 if (rtd
2286 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2287 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2288 == void_type_node)))
2289 return size;
2292 /* Lose any fake structure return argument if it is passed on the stack. */
2293 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2294 && !TARGET_64BIT
2295 && !KEEP_AGGREGATE_RETURN_POINTER)
2297 int nregs = ix86_function_regparm (funtype, fundecl);
2299 if (!nregs)
2300 return GET_MODE_SIZE (Pmode);
2303 return 0;
2306 /* Argument support functions. */
2308 /* Return true when register may be used to pass function parameters. */
2309 bool
2310 ix86_function_arg_regno_p (int regno)
2312 int i;
2313 if (!TARGET_64BIT)
2314 return (regno < REGPARM_MAX
2315 || (TARGET_MMX && MMX_REGNO_P (regno)
2316 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2317 || (TARGET_SSE && SSE_REGNO_P (regno)
2318 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2320 if (TARGET_SSE && SSE_REGNO_P (regno)
2321 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2322 return true;
2323 /* RAX is used as hidden argument to va_arg functions. */
2324 if (!regno)
2325 return true;
2326 for (i = 0; i < REGPARM_MAX; i++)
2327 if (regno == x86_64_int_parameter_registers[i])
2328 return true;
2329 return false;
2332 /* Return if we do not know how to pass TYPE solely in registers. */
2334 static bool
2335 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2337 if (must_pass_in_stack_var_size_or_pad (mode, type))
2338 return true;
2340 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2341 The layout_type routine is crafty and tries to trick us into passing
2342 currently unsupported vector types on the stack by using TImode. */
2343 return (!TARGET_64BIT && mode == TImode
2344 && type && TREE_CODE (type) != VECTOR_TYPE);
2347 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2348 for a call to a function whose data type is FNTYPE.
2349 For a library call, FNTYPE is 0. */
2351 void
2352 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2353 tree fntype, /* tree ptr for function decl */
2354 rtx libname, /* SYMBOL_REF of library name or 0 */
2355 tree fndecl)
2357 static CUMULATIVE_ARGS zero_cum;
2358 tree param, next_param;
2360 if (TARGET_DEBUG_ARG)
2362 fprintf (stderr, "\ninit_cumulative_args (");
2363 if (fntype)
2364 fprintf (stderr, "fntype code = %s, ret code = %s",
2365 tree_code_name[(int) TREE_CODE (fntype)],
2366 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2367 else
2368 fprintf (stderr, "no fntype");
2370 if (libname)
2371 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2374 *cum = zero_cum;
2376 /* Set up the number of registers to use for passing arguments. */
2377 cum->nregs = ix86_regparm;
2378 if (TARGET_SSE)
2379 cum->sse_nregs = SSE_REGPARM_MAX;
2380 if (TARGET_MMX)
2381 cum->mmx_nregs = MMX_REGPARM_MAX;
2382 cum->warn_sse = true;
2383 cum->warn_mmx = true;
2384 cum->maybe_vaarg = false;
2386 /* Use ecx and edx registers if function has fastcall attribute,
2387 else look for regparm information. */
2388 if (fntype && !TARGET_64BIT)
2390 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2392 cum->nregs = 2;
2393 cum->fastcall = 1;
2395 else
2396 cum->nregs = ix86_function_regparm (fntype, fndecl);
2399 /* Set up the number of SSE registers used for passing SFmode
2400 and DFmode arguments. Warn for mismatching ABI. */
2401 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2403 /* Determine if this function has variable arguments. This is
2404 indicated by the last argument being 'void_type_mode' if there
2405 are no variable arguments. If there are variable arguments, then
2406 we won't pass anything in registers in 32-bit mode. */
2408 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2410 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2411 param != 0; param = next_param)
2413 next_param = TREE_CHAIN (param);
2414 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2416 if (!TARGET_64BIT)
2418 cum->nregs = 0;
2419 cum->sse_nregs = 0;
2420 cum->mmx_nregs = 0;
2421 cum->warn_sse = 0;
2422 cum->warn_mmx = 0;
2423 cum->fastcall = 0;
2424 cum->float_in_sse = 0;
2426 cum->maybe_vaarg = true;
2430 if ((!fntype && !libname)
2431 || (fntype && !TYPE_ARG_TYPES (fntype)))
2432 cum->maybe_vaarg = true;
2434 if (TARGET_DEBUG_ARG)
2435 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2437 return;
2440 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2441 But in the case of vector types, it is some vector mode.
2443 When we have only some of our vector isa extensions enabled, then there
2444 are some modes for which vector_mode_supported_p is false. For these
2445 modes, the generic vector support in gcc will choose some non-vector mode
2446 in order to implement the type. By computing the natural mode, we'll
2447 select the proper ABI location for the operand and not depend on whatever
2448 the middle-end decides to do with these vector types. */
2450 static enum machine_mode
2451 type_natural_mode (tree type)
2453 enum machine_mode mode = TYPE_MODE (type);
2455 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2457 HOST_WIDE_INT size = int_size_in_bytes (type);
2458 if ((size == 8 || size == 16)
2459 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2460 && TYPE_VECTOR_SUBPARTS (type) > 1)
2462 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2464 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2465 mode = MIN_MODE_VECTOR_FLOAT;
2466 else
2467 mode = MIN_MODE_VECTOR_INT;
2469 /* Get the mode which has this inner mode and number of units. */
2470 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2471 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2472 && GET_MODE_INNER (mode) == innermode)
2473 return mode;
2475 gcc_unreachable ();
2479 return mode;
2482 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2483 this may not agree with the mode that the type system has chosen for the
2484 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2485 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2487 static rtx
2488 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2489 unsigned int regno)
2491 rtx tmp;
2493 if (orig_mode != BLKmode)
2494 tmp = gen_rtx_REG (orig_mode, regno);
2495 else
2497 tmp = gen_rtx_REG (mode, regno);
2498 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2499 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2502 return tmp;
2505 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2506 of this code is to classify each 8bytes of incoming argument by the register
2507 class and assign registers accordingly. */
2509 /* Return the union class of CLASS1 and CLASS2.
2510 See the x86-64 PS ABI for details. */
2512 static enum x86_64_reg_class
2513 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2515 /* Rule #1: If both classes are equal, this is the resulting class. */
2516 if (class1 == class2)
2517 return class1;
2519 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2520 the other class. */
2521 if (class1 == X86_64_NO_CLASS)
2522 return class2;
2523 if (class2 == X86_64_NO_CLASS)
2524 return class1;
2526 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2527 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2528 return X86_64_MEMORY_CLASS;
2530 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2531 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2532 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2533 return X86_64_INTEGERSI_CLASS;
2534 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2535 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2536 return X86_64_INTEGER_CLASS;
2538 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2539 MEMORY is used. */
2540 if (class1 == X86_64_X87_CLASS
2541 || class1 == X86_64_X87UP_CLASS
2542 || class1 == X86_64_COMPLEX_X87_CLASS
2543 || class2 == X86_64_X87_CLASS
2544 || class2 == X86_64_X87UP_CLASS
2545 || class2 == X86_64_COMPLEX_X87_CLASS)
2546 return X86_64_MEMORY_CLASS;
2548 /* Rule #6: Otherwise class SSE is used. */
2549 return X86_64_SSE_CLASS;
2552 /* Classify the argument of type TYPE and mode MODE.
2553 CLASSES will be filled by the register class used to pass each word
2554 of the operand. The number of words is returned. In case the parameter
2555 should be passed in memory, 0 is returned. As a special case for zero
2556 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2558 BIT_OFFSET is used internally for handling records and specifies offset
2559 of the offset in bits modulo 256 to avoid overflow cases.
2561 See the x86-64 PS ABI for details.
2564 static int
2565 classify_argument (enum machine_mode mode, tree type,
2566 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2568 HOST_WIDE_INT bytes =
2569 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2570 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2572 /* Variable sized entities are always passed/returned in memory. */
2573 if (bytes < 0)
2574 return 0;
2576 if (mode != VOIDmode
2577 && targetm.calls.must_pass_in_stack (mode, type))
2578 return 0;
2580 if (type && AGGREGATE_TYPE_P (type))
2582 int i;
2583 tree field;
2584 enum x86_64_reg_class subclasses[MAX_CLASSES];
2586 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2587 if (bytes > 16)
2588 return 0;
2590 for (i = 0; i < words; i++)
2591 classes[i] = X86_64_NO_CLASS;
2593 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2594 signalize memory class, so handle it as special case. */
2595 if (!words)
2597 classes[0] = X86_64_NO_CLASS;
2598 return 1;
2601 /* Classify each field of record and merge classes. */
2602 switch (TREE_CODE (type))
2604 case RECORD_TYPE:
2605 /* For classes first merge in the field of the subclasses. */
2606 if (TYPE_BINFO (type))
2608 tree binfo, base_binfo;
2609 int basenum;
2611 for (binfo = TYPE_BINFO (type), basenum = 0;
2612 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2614 int num;
2615 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2616 tree type = BINFO_TYPE (base_binfo);
2618 num = classify_argument (TYPE_MODE (type),
2619 type, subclasses,
2620 (offset + bit_offset) % 256);
2621 if (!num)
2622 return 0;
2623 for (i = 0; i < num; i++)
2625 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2626 classes[i + pos] =
2627 merge_classes (subclasses[i], classes[i + pos]);
2631 /* And now merge the fields of structure. */
2632 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2634 if (TREE_CODE (field) == FIELD_DECL)
2636 int num;
2638 /* Bitfields are always classified as integer. Handle them
2639 early, since later code would consider them to be
2640 misaligned integers. */
2641 if (DECL_BIT_FIELD (field))
2643 for (i = int_bit_position (field) / 8 / 8;
2644 i < (int_bit_position (field)
2645 + tree_low_cst (DECL_SIZE (field), 0)
2646 + 63) / 8 / 8; i++)
2647 classes[i] =
2648 merge_classes (X86_64_INTEGER_CLASS,
2649 classes[i]);
2651 else
2653 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2654 TREE_TYPE (field), subclasses,
2655 (int_bit_position (field)
2656 + bit_offset) % 256);
2657 if (!num)
2658 return 0;
2659 for (i = 0; i < num; i++)
2661 int pos =
2662 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2663 classes[i + pos] =
2664 merge_classes (subclasses[i], classes[i + pos]);
2669 break;
2671 case ARRAY_TYPE:
2672 /* Arrays are handled as small records. */
2674 int num;
2675 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2676 TREE_TYPE (type), subclasses, bit_offset);
2677 if (!num)
2678 return 0;
2680 /* The partial classes are now full classes. */
2681 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2682 subclasses[0] = X86_64_SSE_CLASS;
2683 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2684 subclasses[0] = X86_64_INTEGER_CLASS;
2686 for (i = 0; i < words; i++)
2687 classes[i] = subclasses[i % num];
2689 break;
2691 case UNION_TYPE:
2692 case QUAL_UNION_TYPE:
2693 /* Unions are similar to RECORD_TYPE but offset is always 0.
2696 /* Unions are not derived. */
2697 gcc_assert (!TYPE_BINFO (type)
2698 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2699 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2701 if (TREE_CODE (field) == FIELD_DECL)
2703 int num;
2704 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2705 TREE_TYPE (field), subclasses,
2706 bit_offset);
2707 if (!num)
2708 return 0;
2709 for (i = 0; i < num; i++)
2710 classes[i] = merge_classes (subclasses[i], classes[i]);
2713 break;
2715 default:
2716 gcc_unreachable ();
2719 /* Final merger cleanup. */
2720 for (i = 0; i < words; i++)
2722 /* If one class is MEMORY, everything should be passed in
2723 memory. */
2724 if (classes[i] == X86_64_MEMORY_CLASS)
2725 return 0;
2727 /* The X86_64_SSEUP_CLASS should be always preceded by
2728 X86_64_SSE_CLASS. */
2729 if (classes[i] == X86_64_SSEUP_CLASS
2730 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2731 classes[i] = X86_64_SSE_CLASS;
2733 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2734 if (classes[i] == X86_64_X87UP_CLASS
2735 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2736 classes[i] = X86_64_SSE_CLASS;
2738 return words;
2741 /* Compute alignment needed. We align all types to natural boundaries with
2742 exception of XFmode that is aligned to 64bits. */
2743 if (mode != VOIDmode && mode != BLKmode)
2745 int mode_alignment = GET_MODE_BITSIZE (mode);
2747 if (mode == XFmode)
2748 mode_alignment = 128;
2749 else if (mode == XCmode)
2750 mode_alignment = 256;
2751 if (COMPLEX_MODE_P (mode))
2752 mode_alignment /= 2;
2753 /* Misaligned fields are always returned in memory. */
2754 if (bit_offset % mode_alignment)
2755 return 0;
2758 /* for V1xx modes, just use the base mode */
2759 if (VECTOR_MODE_P (mode)
2760 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2761 mode = GET_MODE_INNER (mode);
2763 /* Classification of atomic types. */
2764 switch (mode)
2766 case DImode:
2767 case SImode:
2768 case HImode:
2769 case QImode:
2770 case CSImode:
2771 case CHImode:
2772 case CQImode:
2773 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2774 classes[0] = X86_64_INTEGERSI_CLASS;
2775 else
2776 classes[0] = X86_64_INTEGER_CLASS;
2777 return 1;
2778 case CDImode:
2779 case TImode:
2780 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2781 return 2;
2782 case CTImode:
2783 return 0;
2784 case SFmode:
2785 if (!(bit_offset % 64))
2786 classes[0] = X86_64_SSESF_CLASS;
2787 else
2788 classes[0] = X86_64_SSE_CLASS;
2789 return 1;
2790 case DFmode:
2791 classes[0] = X86_64_SSEDF_CLASS;
2792 return 1;
2793 case XFmode:
2794 classes[0] = X86_64_X87_CLASS;
2795 classes[1] = X86_64_X87UP_CLASS;
2796 return 2;
2797 case TFmode:
2798 classes[0] = X86_64_SSE_CLASS;
2799 classes[1] = X86_64_SSEUP_CLASS;
2800 return 2;
2801 case SCmode:
2802 classes[0] = X86_64_SSE_CLASS;
2803 return 1;
2804 case DCmode:
2805 classes[0] = X86_64_SSEDF_CLASS;
2806 classes[1] = X86_64_SSEDF_CLASS;
2807 return 2;
2808 case XCmode:
2809 classes[0] = X86_64_COMPLEX_X87_CLASS;
2810 return 1;
2811 case TCmode:
2812 /* This modes is larger than 16 bytes. */
2813 return 0;
2814 case V4SFmode:
2815 case V4SImode:
2816 case V16QImode:
2817 case V8HImode:
2818 case V2DFmode:
2819 case V2DImode:
2820 classes[0] = X86_64_SSE_CLASS;
2821 classes[1] = X86_64_SSEUP_CLASS;
2822 return 2;
2823 case V2SFmode:
2824 case V2SImode:
2825 case V4HImode:
2826 case V8QImode:
2827 classes[0] = X86_64_SSE_CLASS;
2828 return 1;
2829 case BLKmode:
2830 case VOIDmode:
2831 return 0;
2832 default:
2833 gcc_assert (VECTOR_MODE_P (mode));
2835 if (bytes > 16)
2836 return 0;
2838 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2840 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2841 classes[0] = X86_64_INTEGERSI_CLASS;
2842 else
2843 classes[0] = X86_64_INTEGER_CLASS;
2844 classes[1] = X86_64_INTEGER_CLASS;
2845 return 1 + (bytes > 8);
2849 /* Examine the argument and return set number of register required in each
2850 class. Return 0 iff parameter should be passed in memory. */
2851 static int
2852 examine_argument (enum machine_mode mode, tree type, int in_return,
2853 int *int_nregs, int *sse_nregs)
2855 enum x86_64_reg_class class[MAX_CLASSES];
2856 int n = classify_argument (mode, type, class, 0);
2858 *int_nregs = 0;
2859 *sse_nregs = 0;
2860 if (!n)
2861 return 0;
2862 for (n--; n >= 0; n--)
2863 switch (class[n])
2865 case X86_64_INTEGER_CLASS:
2866 case X86_64_INTEGERSI_CLASS:
2867 (*int_nregs)++;
2868 break;
2869 case X86_64_SSE_CLASS:
2870 case X86_64_SSESF_CLASS:
2871 case X86_64_SSEDF_CLASS:
2872 (*sse_nregs)++;
2873 break;
2874 case X86_64_NO_CLASS:
2875 case X86_64_SSEUP_CLASS:
2876 break;
2877 case X86_64_X87_CLASS:
2878 case X86_64_X87UP_CLASS:
2879 if (!in_return)
2880 return 0;
2881 break;
2882 case X86_64_COMPLEX_X87_CLASS:
2883 return in_return ? 2 : 0;
2884 case X86_64_MEMORY_CLASS:
2885 gcc_unreachable ();
2887 return 1;
2890 /* Construct container for the argument used by GCC interface. See
2891 FUNCTION_ARG for the detailed description. */
2893 static rtx
2894 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2895 tree type, int in_return, int nintregs, int nsseregs,
2896 const int *intreg, int sse_regno)
2898 enum machine_mode tmpmode;
2899 int bytes =
2900 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2901 enum x86_64_reg_class class[MAX_CLASSES];
2902 int n;
2903 int i;
2904 int nexps = 0;
2905 int needed_sseregs, needed_intregs;
2906 rtx exp[MAX_CLASSES];
2907 rtx ret;
2909 n = classify_argument (mode, type, class, 0);
2910 if (TARGET_DEBUG_ARG)
2912 if (!n)
2913 fprintf (stderr, "Memory class\n");
2914 else
2916 fprintf (stderr, "Classes:");
2917 for (i = 0; i < n; i++)
2919 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2921 fprintf (stderr, "\n");
2924 if (!n)
2925 return NULL;
2926 if (!examine_argument (mode, type, in_return, &needed_intregs,
2927 &needed_sseregs))
2928 return NULL;
2929 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2930 return NULL;
2932 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2933 some less clueful developer tries to use floating-point anyway. */
2934 if (needed_sseregs && !TARGET_SSE)
2936 static bool issued_error;
2937 if (!issued_error)
2939 issued_error = true;
2940 if (in_return)
2941 error ("SSE register return with SSE disabled");
2942 else
2943 error ("SSE register argument with SSE disabled");
2945 return NULL;
2948 /* First construct simple cases. Avoid SCmode, since we want to use
2949 single register to pass this type. */
2950 if (n == 1 && mode != SCmode)
2951 switch (class[0])
2953 case X86_64_INTEGER_CLASS:
2954 case X86_64_INTEGERSI_CLASS:
2955 return gen_rtx_REG (mode, intreg[0]);
2956 case X86_64_SSE_CLASS:
2957 case X86_64_SSESF_CLASS:
2958 case X86_64_SSEDF_CLASS:
2959 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2960 case X86_64_X87_CLASS:
2961 case X86_64_COMPLEX_X87_CLASS:
2962 return gen_rtx_REG (mode, FIRST_STACK_REG);
2963 case X86_64_NO_CLASS:
2964 /* Zero sized array, struct or class. */
2965 return NULL;
2966 default:
2967 gcc_unreachable ();
2969 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2970 && mode != BLKmode)
2971 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2972 if (n == 2
2973 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2974 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2975 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2976 && class[1] == X86_64_INTEGER_CLASS
2977 && (mode == CDImode || mode == TImode || mode == TFmode)
2978 && intreg[0] + 1 == intreg[1])
2979 return gen_rtx_REG (mode, intreg[0]);
2981 /* Otherwise figure out the entries of the PARALLEL. */
2982 for (i = 0; i < n; i++)
2984 switch (class[i])
2986 case X86_64_NO_CLASS:
2987 break;
2988 case X86_64_INTEGER_CLASS:
2989 case X86_64_INTEGERSI_CLASS:
2990 /* Merge TImodes on aligned occasions here too. */
2991 if (i * 8 + 8 > bytes)
2992 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2993 else if (class[i] == X86_64_INTEGERSI_CLASS)
2994 tmpmode = SImode;
2995 else
2996 tmpmode = DImode;
2997 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2998 if (tmpmode == BLKmode)
2999 tmpmode = DImode;
3000 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3001 gen_rtx_REG (tmpmode, *intreg),
3002 GEN_INT (i*8));
3003 intreg++;
3004 break;
3005 case X86_64_SSESF_CLASS:
3006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3007 gen_rtx_REG (SFmode,
3008 SSE_REGNO (sse_regno)),
3009 GEN_INT (i*8));
3010 sse_regno++;
3011 break;
3012 case X86_64_SSEDF_CLASS:
3013 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3014 gen_rtx_REG (DFmode,
3015 SSE_REGNO (sse_regno)),
3016 GEN_INT (i*8));
3017 sse_regno++;
3018 break;
3019 case X86_64_SSE_CLASS:
3020 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3021 tmpmode = TImode;
3022 else
3023 tmpmode = DImode;
3024 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3025 gen_rtx_REG (tmpmode,
3026 SSE_REGNO (sse_regno)),
3027 GEN_INT (i*8));
3028 if (tmpmode == TImode)
3029 i++;
3030 sse_regno++;
3031 break;
3032 default:
3033 gcc_unreachable ();
3037 /* Empty aligned struct, union or class. */
3038 if (nexps == 0)
3039 return NULL;
3041 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3042 for (i = 0; i < nexps; i++)
3043 XVECEXP (ret, 0, i) = exp [i];
3044 return ret;
3047 /* Update the data in CUM to advance over an argument
3048 of mode MODE and data type TYPE.
3049 (TYPE is null for libcalls where that information may not be available.) */
3051 void
3052 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3053 tree type, int named)
3055 int bytes =
3056 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3057 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3059 if (type)
3060 mode = type_natural_mode (type);
3062 if (TARGET_DEBUG_ARG)
3063 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3064 "mode=%s, named=%d)\n\n",
3065 words, cum->words, cum->nregs, cum->sse_nregs,
3066 GET_MODE_NAME (mode), named);
3068 if (TARGET_64BIT)
3070 int int_nregs, sse_nregs;
3071 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3072 cum->words += words;
3073 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3075 cum->nregs -= int_nregs;
3076 cum->sse_nregs -= sse_nregs;
3077 cum->regno += int_nregs;
3078 cum->sse_regno += sse_nregs;
3080 else
3081 cum->words += words;
3083 else
3085 switch (mode)
3087 default:
3088 break;
3090 case BLKmode:
3091 if (bytes < 0)
3092 break;
3093 /* FALLTHRU */
3095 case DImode:
3096 case SImode:
3097 case HImode:
3098 case QImode:
3099 cum->words += words;
3100 cum->nregs -= words;
3101 cum->regno += words;
3103 if (cum->nregs <= 0)
3105 cum->nregs = 0;
3106 cum->regno = 0;
3108 break;
3110 case DFmode:
3111 if (cum->float_in_sse < 2)
3112 break;
3113 case SFmode:
3114 if (cum->float_in_sse < 1)
3115 break;
3116 /* FALLTHRU */
3118 case TImode:
3119 case V16QImode:
3120 case V8HImode:
3121 case V4SImode:
3122 case V2DImode:
3123 case V4SFmode:
3124 case V2DFmode:
3125 if (!type || !AGGREGATE_TYPE_P (type))
3127 cum->sse_words += words;
3128 cum->sse_nregs -= 1;
3129 cum->sse_regno += 1;
3130 if (cum->sse_nregs <= 0)
3132 cum->sse_nregs = 0;
3133 cum->sse_regno = 0;
3136 break;
3138 case V8QImode:
3139 case V4HImode:
3140 case V2SImode:
3141 case V2SFmode:
3142 if (!type || !AGGREGATE_TYPE_P (type))
3144 cum->mmx_words += words;
3145 cum->mmx_nregs -= 1;
3146 cum->mmx_regno += 1;
3147 if (cum->mmx_nregs <= 0)
3149 cum->mmx_nregs = 0;
3150 cum->mmx_regno = 0;
3153 break;
3158 /* Define where to put the arguments to a function.
3159 Value is zero to push the argument on the stack,
3160 or a hard register in which to store the argument.
3162 MODE is the argument's machine mode.
3163 TYPE is the data type of the argument (as a tree).
3164 This is null for libcalls where that information may
3165 not be available.
3166 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3167 the preceding args and about the function being called.
3168 NAMED is nonzero if this argument is a named parameter
3169 (otherwise it is an extra parameter matching an ellipsis). */
3172 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3173 tree type, int named)
3175 enum machine_mode mode = orig_mode;
3176 rtx ret = NULL_RTX;
3177 int bytes =
3178 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3179 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3180 static bool warnedsse, warnedmmx;
3182 /* To simplify the code below, represent vector types with a vector mode
3183 even if MMX/SSE are not active. */
3184 if (type && TREE_CODE (type) == VECTOR_TYPE)
3185 mode = type_natural_mode (type);
3187 /* Handle a hidden AL argument containing number of registers for varargs
3188 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3189 any AL settings. */
3190 if (mode == VOIDmode)
3192 if (TARGET_64BIT)
3193 return GEN_INT (cum->maybe_vaarg
3194 ? (cum->sse_nregs < 0
3195 ? SSE_REGPARM_MAX
3196 : cum->sse_regno)
3197 : -1);
3198 else
3199 return constm1_rtx;
3201 if (TARGET_64BIT)
3202 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3203 cum->sse_nregs,
3204 &x86_64_int_parameter_registers [cum->regno],
3205 cum->sse_regno);
3206 else
3207 switch (mode)
3209 /* For now, pass fp/complex values on the stack. */
3210 default:
3211 break;
3213 case BLKmode:
3214 if (bytes < 0)
3215 break;
3216 /* FALLTHRU */
3217 case DImode:
3218 case SImode:
3219 case HImode:
3220 case QImode:
3221 if (words <= cum->nregs)
3223 int regno = cum->regno;
3225 /* Fastcall allocates the first two DWORD (SImode) or
3226 smaller arguments to ECX and EDX. */
3227 if (cum->fastcall)
3229 if (mode == BLKmode || mode == DImode)
3230 break;
3232 /* ECX not EAX is the first allocated register. */
3233 if (regno == 0)
3234 regno = 2;
3236 ret = gen_rtx_REG (mode, regno);
3238 break;
3239 case DFmode:
3240 if (cum->float_in_sse < 2)
3241 break;
3242 case SFmode:
3243 if (cum->float_in_sse < 1)
3244 break;
3245 /* FALLTHRU */
3246 case TImode:
3247 case V16QImode:
3248 case V8HImode:
3249 case V4SImode:
3250 case V2DImode:
3251 case V4SFmode:
3252 case V2DFmode:
3253 if (!type || !AGGREGATE_TYPE_P (type))
3255 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3257 warnedsse = true;
3258 warning (0, "SSE vector argument without SSE enabled "
3259 "changes the ABI");
3261 if (cum->sse_nregs)
3262 ret = gen_reg_or_parallel (mode, orig_mode,
3263 cum->sse_regno + FIRST_SSE_REG);
3265 break;
3266 case V8QImode:
3267 case V4HImode:
3268 case V2SImode:
3269 case V2SFmode:
3270 if (!type || !AGGREGATE_TYPE_P (type))
3272 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3274 warnedmmx = true;
3275 warning (0, "MMX vector argument without MMX enabled "
3276 "changes the ABI");
3278 if (cum->mmx_nregs)
3279 ret = gen_reg_or_parallel (mode, orig_mode,
3280 cum->mmx_regno + FIRST_MMX_REG);
3282 break;
3285 if (TARGET_DEBUG_ARG)
3287 fprintf (stderr,
3288 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3289 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3291 if (ret)
3292 print_simple_rtl (stderr, ret);
3293 else
3294 fprintf (stderr, ", stack");
3296 fprintf (stderr, " )\n");
3299 return ret;
3302 /* A C expression that indicates when an argument must be passed by
3303 reference. If nonzero for an argument, a copy of that argument is
3304 made in memory and a pointer to the argument is passed instead of
3305 the argument itself. The pointer is passed in whatever way is
3306 appropriate for passing a pointer to that type. */
3308 static bool
3309 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3310 enum machine_mode mode ATTRIBUTE_UNUSED,
3311 tree type, bool named ATTRIBUTE_UNUSED)
3313 if (!TARGET_64BIT)
3314 return 0;
3316 if (type && int_size_in_bytes (type) == -1)
3318 if (TARGET_DEBUG_ARG)
3319 fprintf (stderr, "function_arg_pass_by_reference\n");
3320 return 1;
3323 return 0;
3326 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3327 ABI. Only called if TARGET_SSE. */
3328 static bool
3329 contains_128bit_aligned_vector_p (tree type)
3331 enum machine_mode mode = TYPE_MODE (type);
3332 if (SSE_REG_MODE_P (mode)
3333 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3334 return true;
3335 if (TYPE_ALIGN (type) < 128)
3336 return false;
3338 if (AGGREGATE_TYPE_P (type))
3340 /* Walk the aggregates recursively. */
3341 switch (TREE_CODE (type))
3343 case RECORD_TYPE:
3344 case UNION_TYPE:
3345 case QUAL_UNION_TYPE:
3347 tree field;
3349 if (TYPE_BINFO (type))
3351 tree binfo, base_binfo;
3352 int i;
3354 for (binfo = TYPE_BINFO (type), i = 0;
3355 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3356 if (contains_128bit_aligned_vector_p
3357 (BINFO_TYPE (base_binfo)))
3358 return true;
3360 /* And now merge the fields of structure. */
3361 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3363 if (TREE_CODE (field) == FIELD_DECL
3364 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3365 return true;
3367 break;
3370 case ARRAY_TYPE:
3371 /* Just for use if some languages passes arrays by value. */
3372 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3373 return true;
3374 break;
3376 default:
3377 gcc_unreachable ();
3380 return false;
3383 /* Gives the alignment boundary, in bits, of an argument with the
3384 specified mode and type. */
3387 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3389 int align;
3390 if (type)
3391 align = TYPE_ALIGN (type);
3392 else
3393 align = GET_MODE_ALIGNMENT (mode);
3394 if (align < PARM_BOUNDARY)
3395 align = PARM_BOUNDARY;
3396 if (!TARGET_64BIT)
3398 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3399 make an exception for SSE modes since these require 128bit
3400 alignment.
3402 The handling here differs from field_alignment. ICC aligns MMX
3403 arguments to 4 byte boundaries, while structure fields are aligned
3404 to 8 byte boundaries. */
3405 if (!TARGET_SSE)
3406 align = PARM_BOUNDARY;
3407 else if (!type)
3409 if (!SSE_REG_MODE_P (mode))
3410 align = PARM_BOUNDARY;
3412 else
3414 if (!contains_128bit_aligned_vector_p (type))
3415 align = PARM_BOUNDARY;
3418 if (align > 128)
3419 align = 128;
3420 return align;
3423 /* Return true if N is a possible register number of function value. */
3424 bool
3425 ix86_function_value_regno_p (int regno)
3427 if (regno == 0
3428 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3429 || (regno == FIRST_SSE_REG && TARGET_SSE))
3430 return true;
3432 if (!TARGET_64BIT
3433 && (regno == FIRST_MMX_REG && TARGET_MMX))
3434 return true;
3436 return false;
3439 /* Define how to find the value returned by a function.
3440 VALTYPE is the data type of the value (as a tree).
3441 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3442 otherwise, FUNC is 0. */
3444 ix86_function_value (tree valtype, tree fntype_or_decl,
3445 bool outgoing ATTRIBUTE_UNUSED)
3447 enum machine_mode natmode = type_natural_mode (valtype);
3449 if (TARGET_64BIT)
3451 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3452 1, REGPARM_MAX, SSE_REGPARM_MAX,
3453 x86_64_int_return_registers, 0);
3454 /* For zero sized structures, construct_container return NULL, but we
3455 need to keep rest of compiler happy by returning meaningful value. */
3456 if (!ret)
3457 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3458 return ret;
3460 else
3462 tree fn = NULL_TREE, fntype;
3463 if (fntype_or_decl
3464 && DECL_P (fntype_or_decl))
3465 fn = fntype_or_decl;
3466 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3467 return gen_rtx_REG (TYPE_MODE (valtype),
3468 ix86_value_regno (natmode, fn, fntype));
3472 /* Return false iff type is returned in memory. */
3474 ix86_return_in_memory (tree type)
3476 int needed_intregs, needed_sseregs, size;
3477 enum machine_mode mode = type_natural_mode (type);
3479 if (TARGET_64BIT)
3480 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3482 if (mode == BLKmode)
3483 return 1;
3485 size = int_size_in_bytes (type);
3487 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3488 return 0;
3490 if (VECTOR_MODE_P (mode) || mode == TImode)
3492 /* User-created vectors small enough to fit in EAX. */
3493 if (size < 8)
3494 return 0;
3496 /* MMX/3dNow values are returned in MM0,
3497 except when it doesn't exits. */
3498 if (size == 8)
3499 return (TARGET_MMX ? 0 : 1);
3501 /* SSE values are returned in XMM0, except when it doesn't exist. */
3502 if (size == 16)
3503 return (TARGET_SSE ? 0 : 1);
3506 if (mode == XFmode)
3507 return 0;
3509 if (size > 12)
3510 return 1;
3511 return 0;
3514 /* When returning SSE vector types, we have a choice of either
3515 (1) being abi incompatible with a -march switch, or
3516 (2) generating an error.
3517 Given no good solution, I think the safest thing is one warning.
3518 The user won't be able to use -Werror, but....
3520 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3521 called in response to actually generating a caller or callee that
3522 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3523 via aggregate_value_p for general type probing from tree-ssa. */
3525 static rtx
3526 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3528 static bool warnedsse, warnedmmx;
3530 if (type)
3532 /* Look at the return type of the function, not the function type. */
3533 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3535 if (!TARGET_SSE && !warnedsse)
3537 if (mode == TImode
3538 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3540 warnedsse = true;
3541 warning (0, "SSE vector return without SSE enabled "
3542 "changes the ABI");
3546 if (!TARGET_MMX && !warnedmmx)
3548 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3550 warnedmmx = true;
3551 warning (0, "MMX vector return without MMX enabled "
3552 "changes the ABI");
3557 return NULL;
3560 /* Define how to find the value returned by a library function
3561 assuming the value has mode MODE. */
3563 ix86_libcall_value (enum machine_mode mode)
3565 if (TARGET_64BIT)
3567 switch (mode)
3569 case SFmode:
3570 case SCmode:
3571 case DFmode:
3572 case DCmode:
3573 case TFmode:
3574 return gen_rtx_REG (mode, FIRST_SSE_REG);
3575 case XFmode:
3576 case XCmode:
3577 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3578 case TCmode:
3579 return NULL;
3580 default:
3581 return gen_rtx_REG (mode, 0);
3584 else
3585 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3588 /* Given a mode, return the register to use for a return value. */
3590 static int
3591 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3593 gcc_assert (!TARGET_64BIT);
3595 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3596 we prevent this case when mmx is not available. */
3597 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3598 return FIRST_MMX_REG;
3600 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3601 we prevent this case when sse is not available. */
3602 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3603 return FIRST_SSE_REG;
3605 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3606 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3607 return 0;
3609 /* Floating point return values in %st(0), except for local functions when
3610 SSE math is enabled or for functions with sseregparm attribute. */
3611 if ((func || fntype)
3612 && (mode == SFmode || mode == DFmode))
3614 int sse_level = ix86_function_sseregparm (fntype, func);
3615 if ((sse_level >= 1 && mode == SFmode)
3616 || (sse_level == 2 && mode == DFmode))
3617 return FIRST_SSE_REG;
3620 return FIRST_FLOAT_REG;
3623 /* Create the va_list data type. */
3625 static tree
3626 ix86_build_builtin_va_list (void)
3628 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3630 /* For i386 we use plain pointer to argument area. */
3631 if (!TARGET_64BIT)
3632 return build_pointer_type (char_type_node);
3634 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3635 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3637 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3638 unsigned_type_node);
3639 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3640 unsigned_type_node);
3641 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3642 ptr_type_node);
3643 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3644 ptr_type_node);
3646 va_list_gpr_counter_field = f_gpr;
3647 va_list_fpr_counter_field = f_fpr;
3649 DECL_FIELD_CONTEXT (f_gpr) = record;
3650 DECL_FIELD_CONTEXT (f_fpr) = record;
3651 DECL_FIELD_CONTEXT (f_ovf) = record;
3652 DECL_FIELD_CONTEXT (f_sav) = record;
3654 TREE_CHAIN (record) = type_decl;
3655 TYPE_NAME (record) = type_decl;
3656 TYPE_FIELDS (record) = f_gpr;
3657 TREE_CHAIN (f_gpr) = f_fpr;
3658 TREE_CHAIN (f_fpr) = f_ovf;
3659 TREE_CHAIN (f_ovf) = f_sav;
3661 layout_type (record);
3663 /* The correct type is an array type of one element. */
3664 return build_array_type (record, build_index_type (size_zero_node));
3667 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3669 static void
3670 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3671 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3672 int no_rtl)
3674 CUMULATIVE_ARGS next_cum;
3675 rtx save_area = NULL_RTX, mem;
3676 rtx label;
3677 rtx label_ref;
3678 rtx tmp_reg;
3679 rtx nsse_reg;
3680 int set;
3681 tree fntype;
3682 int stdarg_p;
3683 int i;
3685 if (!TARGET_64BIT)
3686 return;
3688 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3689 return;
3691 /* Indicate to allocate space on the stack for varargs save area. */
3692 ix86_save_varrargs_registers = 1;
3694 cfun->stack_alignment_needed = 128;
3696 fntype = TREE_TYPE (current_function_decl);
3697 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3698 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3699 != void_type_node));
3701 /* For varargs, we do not want to skip the dummy va_dcl argument.
3702 For stdargs, we do want to skip the last named argument. */
3703 next_cum = *cum;
3704 if (stdarg_p)
3705 function_arg_advance (&next_cum, mode, type, 1);
3707 if (!no_rtl)
3708 save_area = frame_pointer_rtx;
3710 set = get_varargs_alias_set ();
3712 for (i = next_cum.regno;
3713 i < ix86_regparm
3714 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3715 i++)
3717 mem = gen_rtx_MEM (Pmode,
3718 plus_constant (save_area, i * UNITS_PER_WORD));
3719 MEM_NOTRAP_P (mem) = 1;
3720 set_mem_alias_set (mem, set);
3721 emit_move_insn (mem, gen_rtx_REG (Pmode,
3722 x86_64_int_parameter_registers[i]));
3725 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3727 /* Now emit code to save SSE registers. The AX parameter contains number
3728 of SSE parameter registers used to call this function. We use
3729 sse_prologue_save insn template that produces computed jump across
3730 SSE saves. We need some preparation work to get this working. */
3732 label = gen_label_rtx ();
3733 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3735 /* Compute address to jump to :
3736 label - 5*eax + nnamed_sse_arguments*5 */
3737 tmp_reg = gen_reg_rtx (Pmode);
3738 nsse_reg = gen_reg_rtx (Pmode);
3739 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3740 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3741 gen_rtx_MULT (Pmode, nsse_reg,
3742 GEN_INT (4))));
3743 if (next_cum.sse_regno)
3744 emit_move_insn
3745 (nsse_reg,
3746 gen_rtx_CONST (DImode,
3747 gen_rtx_PLUS (DImode,
3748 label_ref,
3749 GEN_INT (next_cum.sse_regno * 4))));
3750 else
3751 emit_move_insn (nsse_reg, label_ref);
3752 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3754 /* Compute address of memory block we save into. We always use pointer
3755 pointing 127 bytes after first byte to store - this is needed to keep
3756 instruction size limited by 4 bytes. */
3757 tmp_reg = gen_reg_rtx (Pmode);
3758 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3759 plus_constant (save_area,
3760 8 * REGPARM_MAX + 127)));
3761 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3762 MEM_NOTRAP_P (mem) = 1;
3763 set_mem_alias_set (mem, set);
3764 set_mem_align (mem, BITS_PER_WORD);
3766 /* And finally do the dirty job! */
3767 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3768 GEN_INT (next_cum.sse_regno), label));
3773 /* Implement va_start. */
3775 void
3776 ix86_va_start (tree valist, rtx nextarg)
3778 HOST_WIDE_INT words, n_gpr, n_fpr;
3779 tree f_gpr, f_fpr, f_ovf, f_sav;
3780 tree gpr, fpr, ovf, sav, t;
3782 /* Only 64bit target needs something special. */
3783 if (!TARGET_64BIT)
3785 std_expand_builtin_va_start (valist, nextarg);
3786 return;
3789 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3790 f_fpr = TREE_CHAIN (f_gpr);
3791 f_ovf = TREE_CHAIN (f_fpr);
3792 f_sav = TREE_CHAIN (f_ovf);
3794 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3795 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3796 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3797 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3798 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3800 /* Count number of gp and fp argument registers used. */
3801 words = current_function_args_info.words;
3802 n_gpr = current_function_args_info.regno;
3803 n_fpr = current_function_args_info.sse_regno;
3805 if (TARGET_DEBUG_ARG)
3806 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3807 (int) words, (int) n_gpr, (int) n_fpr);
3809 if (cfun->va_list_gpr_size)
3811 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3812 build_int_cst (NULL_TREE, n_gpr * 8));
3813 TREE_SIDE_EFFECTS (t) = 1;
3814 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3817 if (cfun->va_list_fpr_size)
3819 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3820 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3821 TREE_SIDE_EFFECTS (t) = 1;
3822 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3825 /* Find the overflow area. */
3826 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3827 if (words != 0)
3828 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3829 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3830 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3831 TREE_SIDE_EFFECTS (t) = 1;
3832 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3834 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3836 /* Find the register save area.
3837 Prologue of the function save it right above stack frame. */
3838 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3839 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3840 TREE_SIDE_EFFECTS (t) = 1;
3841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3845 /* Implement va_arg. */
3847 tree
3848 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3850 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3851 tree f_gpr, f_fpr, f_ovf, f_sav;
3852 tree gpr, fpr, ovf, sav, t;
3853 int size, rsize;
3854 tree lab_false, lab_over = NULL_TREE;
3855 tree addr, t2;
3856 rtx container;
3857 int indirect_p = 0;
3858 tree ptrtype;
3859 enum machine_mode nat_mode;
3861 /* Only 64bit target needs something special. */
3862 if (!TARGET_64BIT)
3863 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3865 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3866 f_fpr = TREE_CHAIN (f_gpr);
3867 f_ovf = TREE_CHAIN (f_fpr);
3868 f_sav = TREE_CHAIN (f_ovf);
3870 valist = build_va_arg_indirect_ref (valist);
3871 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3872 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3873 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3874 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3876 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3877 if (indirect_p)
3878 type = build_pointer_type (type);
3879 size = int_size_in_bytes (type);
3880 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3882 nat_mode = type_natural_mode (type);
3883 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3884 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3886 /* Pull the value out of the saved registers. */
3888 addr = create_tmp_var (ptr_type_node, "addr");
3889 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3891 if (container)
3893 int needed_intregs, needed_sseregs;
3894 bool need_temp;
3895 tree int_addr, sse_addr;
3897 lab_false = create_artificial_label ();
3898 lab_over = create_artificial_label ();
3900 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3902 need_temp = (!REG_P (container)
3903 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3904 || TYPE_ALIGN (type) > 128));
3906 /* In case we are passing structure, verify that it is consecutive block
3907 on the register save area. If not we need to do moves. */
3908 if (!need_temp && !REG_P (container))
3910 /* Verify that all registers are strictly consecutive */
3911 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3913 int i;
3915 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3917 rtx slot = XVECEXP (container, 0, i);
3918 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3919 || INTVAL (XEXP (slot, 1)) != i * 16)
3920 need_temp = 1;
3923 else
3925 int i;
3927 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3929 rtx slot = XVECEXP (container, 0, i);
3930 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3931 || INTVAL (XEXP (slot, 1)) != i * 8)
3932 need_temp = 1;
3936 if (!need_temp)
3938 int_addr = addr;
3939 sse_addr = addr;
3941 else
3943 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3944 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3945 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3946 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3949 /* First ensure that we fit completely in registers. */
3950 if (needed_intregs)
3952 t = build_int_cst (TREE_TYPE (gpr),
3953 (REGPARM_MAX - needed_intregs + 1) * 8);
3954 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3955 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3956 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3957 gimplify_and_add (t, pre_p);
3959 if (needed_sseregs)
3961 t = build_int_cst (TREE_TYPE (fpr),
3962 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3963 + REGPARM_MAX * 8);
3964 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3965 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3966 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3967 gimplify_and_add (t, pre_p);
3970 /* Compute index to start of area used for integer regs. */
3971 if (needed_intregs)
3973 /* int_addr = gpr + sav; */
3974 t = fold_convert (ptr_type_node, gpr);
3975 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3976 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3977 gimplify_and_add (t, pre_p);
3979 if (needed_sseregs)
3981 /* sse_addr = fpr + sav; */
3982 t = fold_convert (ptr_type_node, fpr);
3983 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3984 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3985 gimplify_and_add (t, pre_p);
3987 if (need_temp)
3989 int i;
3990 tree temp = create_tmp_var (type, "va_arg_tmp");
3992 /* addr = &temp; */
3993 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3994 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3995 gimplify_and_add (t, pre_p);
3997 for (i = 0; i < XVECLEN (container, 0); i++)
3999 rtx slot = XVECEXP (container, 0, i);
4000 rtx reg = XEXP (slot, 0);
4001 enum machine_mode mode = GET_MODE (reg);
4002 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4003 tree addr_type = build_pointer_type (piece_type);
4004 tree src_addr, src;
4005 int src_offset;
4006 tree dest_addr, dest;
4008 if (SSE_REGNO_P (REGNO (reg)))
4010 src_addr = sse_addr;
4011 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4013 else
4015 src_addr = int_addr;
4016 src_offset = REGNO (reg) * 8;
4018 src_addr = fold_convert (addr_type, src_addr);
4019 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4020 size_int (src_offset)));
4021 src = build_va_arg_indirect_ref (src_addr);
4023 dest_addr = fold_convert (addr_type, addr);
4024 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4025 size_int (INTVAL (XEXP (slot, 1)))));
4026 dest = build_va_arg_indirect_ref (dest_addr);
4028 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4029 gimplify_and_add (t, pre_p);
4033 if (needed_intregs)
4035 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4036 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4037 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4038 gimplify_and_add (t, pre_p);
4040 if (needed_sseregs)
4042 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4043 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4044 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4045 gimplify_and_add (t, pre_p);
4048 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4049 gimplify_and_add (t, pre_p);
4051 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4052 append_to_statement_list (t, pre_p);
4055 /* ... otherwise out of the overflow area. */
4057 /* Care for on-stack alignment if needed. */
4058 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
4059 t = ovf;
4060 else
4062 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4063 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4064 build_int_cst (TREE_TYPE (ovf), align - 1));
4065 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4066 build_int_cst (TREE_TYPE (t), -align));
4068 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4070 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4071 gimplify_and_add (t2, pre_p);
4073 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4074 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4075 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4076 gimplify_and_add (t, pre_p);
4078 if (container)
4080 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4081 append_to_statement_list (t, pre_p);
4084 ptrtype = build_pointer_type (type);
4085 addr = fold_convert (ptrtype, addr);
4087 if (indirect_p)
4088 addr = build_va_arg_indirect_ref (addr);
4089 return build_va_arg_indirect_ref (addr);
4092 /* Return nonzero if OPNUM's MEM should be matched
4093 in movabs* patterns. */
4096 ix86_check_movabs (rtx insn, int opnum)
4098 rtx set, mem;
4100 set = PATTERN (insn);
4101 if (GET_CODE (set) == PARALLEL)
4102 set = XVECEXP (set, 0, 0);
4103 gcc_assert (GET_CODE (set) == SET);
4104 mem = XEXP (set, opnum);
4105 while (GET_CODE (mem) == SUBREG)
4106 mem = SUBREG_REG (mem);
4107 gcc_assert (GET_CODE (mem) == MEM);
4108 return (volatile_ok || !MEM_VOLATILE_P (mem));
4111 /* Initialize the table of extra 80387 mathematical constants. */
4113 static void
4114 init_ext_80387_constants (void)
4116 static const char * cst[5] =
4118 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4119 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4120 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4121 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4122 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4124 int i;
4126 for (i = 0; i < 5; i++)
4128 real_from_string (&ext_80387_constants_table[i], cst[i]);
4129 /* Ensure each constant is rounded to XFmode precision. */
4130 real_convert (&ext_80387_constants_table[i],
4131 XFmode, &ext_80387_constants_table[i]);
4134 ext_80387_constants_init = 1;
4137 /* Return true if the constant is something that can be loaded with
4138 a special instruction. */
4141 standard_80387_constant_p (rtx x)
4143 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4144 return -1;
4146 if (x == CONST0_RTX (GET_MODE (x)))
4147 return 1;
4148 if (x == CONST1_RTX (GET_MODE (x)))
4149 return 2;
4151 /* For XFmode constants, try to find a special 80387 instruction when
4152 optimizing for size or on those CPUs that benefit from them. */
4153 if (GET_MODE (x) == XFmode
4154 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4156 REAL_VALUE_TYPE r;
4157 int i;
4159 if (! ext_80387_constants_init)
4160 init_ext_80387_constants ();
4162 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4163 for (i = 0; i < 5; i++)
4164 if (real_identical (&r, &ext_80387_constants_table[i]))
4165 return i + 3;
4168 return 0;
4171 /* Return the opcode of the special instruction to be used to load
4172 the constant X. */
4174 const char *
4175 standard_80387_constant_opcode (rtx x)
4177 switch (standard_80387_constant_p (x))
4179 case 1:
4180 return "fldz";
4181 case 2:
4182 return "fld1";
4183 case 3:
4184 return "fldlg2";
4185 case 4:
4186 return "fldln2";
4187 case 5:
4188 return "fldl2e";
4189 case 6:
4190 return "fldl2t";
4191 case 7:
4192 return "fldpi";
4193 default:
4194 gcc_unreachable ();
4198 /* Return the CONST_DOUBLE representing the 80387 constant that is
4199 loaded by the specified special instruction. The argument IDX
4200 matches the return value from standard_80387_constant_p. */
4203 standard_80387_constant_rtx (int idx)
4205 int i;
4207 if (! ext_80387_constants_init)
4208 init_ext_80387_constants ();
4210 switch (idx)
4212 case 3:
4213 case 4:
4214 case 5:
4215 case 6:
4216 case 7:
4217 i = idx - 3;
4218 break;
4220 default:
4221 gcc_unreachable ();
4224 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4225 XFmode);
4228 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4231 standard_sse_constant_p (rtx x)
4233 if (x == const0_rtx)
4234 return 1;
4235 return (x == CONST0_RTX (GET_MODE (x)));
4238 /* Returns 1 if OP contains a symbol reference */
4241 symbolic_reference_mentioned_p (rtx op)
4243 const char *fmt;
4244 int i;
4246 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4247 return 1;
4249 fmt = GET_RTX_FORMAT (GET_CODE (op));
4250 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4252 if (fmt[i] == 'E')
4254 int j;
4256 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4257 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4258 return 1;
4261 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4262 return 1;
4265 return 0;
4268 /* Return 1 if it is appropriate to emit `ret' instructions in the
4269 body of a function. Do this only if the epilogue is simple, needing a
4270 couple of insns. Prior to reloading, we can't tell how many registers
4271 must be saved, so return 0 then. Return 0 if there is no frame
4272 marker to de-allocate. */
4275 ix86_can_use_return_insn_p (void)
4277 struct ix86_frame frame;
4279 if (! reload_completed || frame_pointer_needed)
4280 return 0;
4282 /* Don't allow more than 32 pop, since that's all we can do
4283 with one instruction. */
4284 if (current_function_pops_args
4285 && current_function_args_size >= 32768)
4286 return 0;
4288 ix86_compute_frame_layout (&frame);
4289 return frame.to_allocate == 0 && frame.nregs == 0;
4292 /* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4297 ix86_frame_pointer_required (void)
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun->machine->accesses_prev_frame)
4302 return 1;
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4307 return 1;
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
4313 && (!current_function_is_leaf))
4314 return 1;
4316 if (current_function_profile)
4317 return 1;
4319 return 0;
4322 /* Record that the current function accesses previous call frames. */
4324 void
4325 ix86_setup_frame_addresses (void)
4327 cfun->machine->accesses_prev_frame = 1;
4330 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4331 # define USE_HIDDEN_LINKONCE 1
4332 #else
4333 # define USE_HIDDEN_LINKONCE 0
4334 #endif
4336 static int pic_labels_used;
4338 /* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4341 static void
4342 get_pc_thunk_name (char name[32], unsigned int regno)
4344 if (USE_HIDDEN_LINKONCE)
4345 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4346 else
4347 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4351 /* This function generates code for -fpic that loads %ebx with
4352 the return address of the caller and then returns. */
4354 void
4355 ix86_file_end (void)
4357 rtx xops[2];
4358 int regno;
4360 for (regno = 0; regno < 8; ++regno)
4362 char name[32];
4364 if (! ((pic_labels_used >> regno) & 1))
4365 continue;
4367 get_pc_thunk_name (name, regno);
4369 if (USE_HIDDEN_LINKONCE)
4371 tree decl;
4373 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4374 error_mark_node);
4375 TREE_PUBLIC (decl) = 1;
4376 TREE_STATIC (decl) = 1;
4377 DECL_ONE_ONLY (decl) = 1;
4379 (*targetm.asm_out.unique_section) (decl, 0);
4380 named_section (decl, NULL, 0);
4382 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4383 fputs ("\t.hidden\t", asm_out_file);
4384 assemble_name (asm_out_file, name);
4385 fputc ('\n', asm_out_file);
4386 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4388 else
4390 text_section ();
4391 ASM_OUTPUT_LABEL (asm_out_file, name);
4394 xops[0] = gen_rtx_REG (SImode, regno);
4395 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4396 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4397 output_asm_insn ("ret", xops);
4400 if (NEED_INDICATE_EXEC_STACK)
4401 file_end_indicate_exec_stack ();
4404 /* Emit code for the SET_GOT patterns. */
4406 const char *
4407 output_set_got (rtx dest)
4409 rtx xops[3];
4411 xops[0] = dest;
4412 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4414 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4416 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4418 if (!flag_pic)
4419 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4420 else
4421 output_asm_insn ("call\t%a2", xops);
4423 #if TARGET_MACHO
4424 /* Output the "canonical" label name ("Lxx$pb") here too. This
4425 is what will be referred to by the Mach-O PIC subsystem. */
4426 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4427 #endif
4428 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4429 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4431 if (flag_pic)
4432 output_asm_insn ("pop{l}\t%0", xops);
4434 else
4436 char name[32];
4437 get_pc_thunk_name (name, REGNO (dest));
4438 pic_labels_used |= 1 << REGNO (dest);
4440 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4441 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4442 output_asm_insn ("call\t%X2", xops);
4445 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4446 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4447 else if (!TARGET_MACHO)
4448 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4450 return "";
4453 /* Generate an "push" pattern for input ARG. */
4455 static rtx
4456 gen_push (rtx arg)
4458 return gen_rtx_SET (VOIDmode,
4459 gen_rtx_MEM (Pmode,
4460 gen_rtx_PRE_DEC (Pmode,
4461 stack_pointer_rtx)),
4462 arg);
4465 /* Return >= 0 if there is an unused call-clobbered register available
4466 for the entire function. */
4468 static unsigned int
4469 ix86_select_alt_pic_regnum (void)
4471 if (current_function_is_leaf && !current_function_profile)
4473 int i;
4474 for (i = 2; i >= 0; --i)
4475 if (!regs_ever_live[i])
4476 return i;
4479 return INVALID_REGNUM;
4482 /* Return 1 if we need to save REGNO. */
4483 static int
4484 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4486 if (pic_offset_table_rtx
4487 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4488 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4489 || current_function_profile
4490 || current_function_calls_eh_return
4491 || current_function_uses_const_pool))
4493 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4494 return 0;
4495 return 1;
4498 if (current_function_calls_eh_return && maybe_eh_return)
4500 unsigned i;
4501 for (i = 0; ; i++)
4503 unsigned test = EH_RETURN_DATA_REGNO (i);
4504 if (test == INVALID_REGNUM)
4505 break;
4506 if (test == regno)
4507 return 1;
4511 return (regs_ever_live[regno]
4512 && !call_used_regs[regno]
4513 && !fixed_regs[regno]
4514 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4517 /* Return number of registers to be saved on the stack. */
4519 static int
4520 ix86_nsaved_regs (void)
4522 int nregs = 0;
4523 int regno;
4525 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4526 if (ix86_save_reg (regno, true))
4527 nregs++;
4528 return nregs;
4531 /* Return the offset between two registers, one to be eliminated, and the other
4532 its replacement, at the start of a routine. */
4534 HOST_WIDE_INT
4535 ix86_initial_elimination_offset (int from, int to)
4537 struct ix86_frame frame;
4538 ix86_compute_frame_layout (&frame);
4540 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4541 return frame.hard_frame_pointer_offset;
4542 else if (from == FRAME_POINTER_REGNUM
4543 && to == HARD_FRAME_POINTER_REGNUM)
4544 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4545 else
4547 gcc_assert (to == STACK_POINTER_REGNUM);
4549 if (from == ARG_POINTER_REGNUM)
4550 return frame.stack_pointer_offset;
4552 gcc_assert (from == FRAME_POINTER_REGNUM);
4553 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4557 /* Fill structure ix86_frame about frame of currently computed function. */
4559 static void
4560 ix86_compute_frame_layout (struct ix86_frame *frame)
4562 HOST_WIDE_INT total_size;
4563 unsigned int stack_alignment_needed;
4564 HOST_WIDE_INT offset;
4565 unsigned int preferred_alignment;
4566 HOST_WIDE_INT size = get_frame_size ();
4568 frame->nregs = ix86_nsaved_regs ();
4569 total_size = size;
4571 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4572 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4574 /* During reload iteration the amount of registers saved can change.
4575 Recompute the value as needed. Do not recompute when amount of registers
4576 didn't change as reload does multiple calls to the function and does not
4577 expect the decision to change within single iteration. */
4578 if (!optimize_size
4579 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4581 int count = frame->nregs;
4583 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4584 /* The fast prologue uses move instead of push to save registers. This
4585 is significantly longer, but also executes faster as modern hardware
4586 can execute the moves in parallel, but can't do that for push/pop.
4588 Be careful about choosing what prologue to emit: When function takes
4589 many instructions to execute we may use slow version as well as in
4590 case function is known to be outside hot spot (this is known with
4591 feedback only). Weight the size of function by number of registers
4592 to save as it is cheap to use one or two push instructions but very
4593 slow to use many of them. */
4594 if (count)
4595 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4596 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4597 || (flag_branch_probabilities
4598 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4599 cfun->machine->use_fast_prologue_epilogue = false;
4600 else
4601 cfun->machine->use_fast_prologue_epilogue
4602 = !expensive_function_p (count);
4604 if (TARGET_PROLOGUE_USING_MOVE
4605 && cfun->machine->use_fast_prologue_epilogue)
4606 frame->save_regs_using_mov = true;
4607 else
4608 frame->save_regs_using_mov = false;
4611 /* Skip return address and saved base pointer. */
4612 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4614 frame->hard_frame_pointer_offset = offset;
4616 /* Do some sanity checking of stack_alignment_needed and
4617 preferred_alignment, since i386 port is the only using those features
4618 that may break easily. */
4620 gcc_assert (!size || stack_alignment_needed);
4621 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4622 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4623 gcc_assert (stack_alignment_needed
4624 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4626 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4627 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4629 /* Register save area */
4630 offset += frame->nregs * UNITS_PER_WORD;
4632 /* Va-arg area */
4633 if (ix86_save_varrargs_registers)
4635 offset += X86_64_VARARGS_SIZE;
4636 frame->va_arg_size = X86_64_VARARGS_SIZE;
4638 else
4639 frame->va_arg_size = 0;
4641 /* Align start of frame for local function. */
4642 frame->padding1 = ((offset + stack_alignment_needed - 1)
4643 & -stack_alignment_needed) - offset;
4645 offset += frame->padding1;
4647 /* Frame pointer points here. */
4648 frame->frame_pointer_offset = offset;
4650 offset += size;
4652 /* Add outgoing arguments area. Can be skipped if we eliminated
4653 all the function calls as dead code.
4654 Skipping is however impossible when function calls alloca. Alloca
4655 expander assumes that last current_function_outgoing_args_size
4656 of stack frame are unused. */
4657 if (ACCUMULATE_OUTGOING_ARGS
4658 && (!current_function_is_leaf || current_function_calls_alloca))
4660 offset += current_function_outgoing_args_size;
4661 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4663 else
4664 frame->outgoing_arguments_size = 0;
4666 /* Align stack boundary. Only needed if we're calling another function
4667 or using alloca. */
4668 if (!current_function_is_leaf || current_function_calls_alloca)
4669 frame->padding2 = ((offset + preferred_alignment - 1)
4670 & -preferred_alignment) - offset;
4671 else
4672 frame->padding2 = 0;
4674 offset += frame->padding2;
4676 /* We've reached end of stack frame. */
4677 frame->stack_pointer_offset = offset;
4679 /* Size prologue needs to allocate. */
4680 frame->to_allocate =
4681 (size + frame->padding1 + frame->padding2
4682 + frame->outgoing_arguments_size + frame->va_arg_size);
4684 if ((!frame->to_allocate && frame->nregs <= 1)
4685 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4686 frame->save_regs_using_mov = false;
4688 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4689 && current_function_is_leaf)
4691 frame->red_zone_size = frame->to_allocate;
4692 if (frame->save_regs_using_mov)
4693 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4694 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4695 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4697 else
4698 frame->red_zone_size = 0;
4699 frame->to_allocate -= frame->red_zone_size;
4700 frame->stack_pointer_offset -= frame->red_zone_size;
4701 #if 0
4702 fprintf (stderr, "nregs: %i\n", frame->nregs);
4703 fprintf (stderr, "size: %i\n", size);
4704 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4705 fprintf (stderr, "padding1: %i\n", frame->padding1);
4706 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4707 fprintf (stderr, "padding2: %i\n", frame->padding2);
4708 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4709 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4710 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4711 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4712 frame->hard_frame_pointer_offset);
4713 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4714 #endif
4717 /* Emit code to save registers in the prologue. */
4719 static void
4720 ix86_emit_save_regs (void)
4722 int regno;
4723 rtx insn;
4725 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4726 if (ix86_save_reg (regno, true))
4728 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4729 RTX_FRAME_RELATED_P (insn) = 1;
4733 /* Emit code to save registers using MOV insns. First register
4734 is restored from POINTER + OFFSET. */
4735 static void
4736 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4738 int regno;
4739 rtx insn;
4741 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4742 if (ix86_save_reg (regno, true))
4744 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4745 Pmode, offset),
4746 gen_rtx_REG (Pmode, regno));
4747 RTX_FRAME_RELATED_P (insn) = 1;
4748 offset += UNITS_PER_WORD;
4752 /* Expand prologue or epilogue stack adjustment.
4753 The pattern exist to put a dependency on all ebp-based memory accesses.
4754 STYLE should be negative if instructions should be marked as frame related,
4755 zero if %r11 register is live and cannot be freely used and positive
4756 otherwise. */
4758 static void
4759 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4761 rtx insn;
4763 if (! TARGET_64BIT)
4764 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4765 else if (x86_64_immediate_operand (offset, DImode))
4766 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4767 else
4769 rtx r11;
4770 /* r11 is used by indirect sibcall return as well, set before the
4771 epilogue and used after the epilogue. ATM indirect sibcall
4772 shouldn't be used together with huge frame sizes in one
4773 function because of the frame_size check in sibcall.c. */
4774 gcc_assert (style);
4775 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4776 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4777 if (style < 0)
4778 RTX_FRAME_RELATED_P (insn) = 1;
4779 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4780 offset));
4782 if (style < 0)
4783 RTX_FRAME_RELATED_P (insn) = 1;
4786 /* Expand the prologue into a bunch of separate insns. */
4788 void
4789 ix86_expand_prologue (void)
4791 rtx insn;
4792 bool pic_reg_used;
4793 struct ix86_frame frame;
4794 HOST_WIDE_INT allocate;
4796 ix86_compute_frame_layout (&frame);
4798 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4799 slower on all targets. Also sdb doesn't like it. */
4801 if (frame_pointer_needed)
4803 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4804 RTX_FRAME_RELATED_P (insn) = 1;
4806 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4807 RTX_FRAME_RELATED_P (insn) = 1;
4810 allocate = frame.to_allocate;
4812 if (!frame.save_regs_using_mov)
4813 ix86_emit_save_regs ();
4814 else
4815 allocate += frame.nregs * UNITS_PER_WORD;
4817 /* When using red zone we may start register saving before allocating
4818 the stack frame saving one cycle of the prologue. */
4819 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4820 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4821 : stack_pointer_rtx,
4822 -frame.nregs * UNITS_PER_WORD);
4824 if (allocate == 0)
4826 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4827 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4828 GEN_INT (-allocate), -1);
4829 else
4831 /* Only valid for Win32. */
4832 rtx eax = gen_rtx_REG (SImode, 0);
4833 bool eax_live = ix86_eax_live_at_start_p ();
4834 rtx t;
4836 gcc_assert (!TARGET_64BIT);
4838 if (eax_live)
4840 emit_insn (gen_push (eax));
4841 allocate -= 4;
4844 emit_move_insn (eax, GEN_INT (allocate));
4846 insn = emit_insn (gen_allocate_stack_worker (eax));
4847 RTX_FRAME_RELATED_P (insn) = 1;
4848 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4849 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4850 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4851 t, REG_NOTES (insn));
4853 if (eax_live)
4855 if (frame_pointer_needed)
4856 t = plus_constant (hard_frame_pointer_rtx,
4857 allocate
4858 - frame.to_allocate
4859 - frame.nregs * UNITS_PER_WORD);
4860 else
4861 t = plus_constant (stack_pointer_rtx, allocate);
4862 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4866 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4868 if (!frame_pointer_needed || !frame.to_allocate)
4869 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4870 else
4871 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4872 -frame.nregs * UNITS_PER_WORD);
4875 pic_reg_used = false;
4876 if (pic_offset_table_rtx
4877 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4878 || current_function_profile))
4880 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4882 if (alt_pic_reg_used != INVALID_REGNUM)
4883 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4885 pic_reg_used = true;
4888 if (pic_reg_used)
4890 if (TARGET_64BIT)
4891 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
4892 else
4893 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4895 /* Even with accurate pre-reload life analysis, we can wind up
4896 deleting all references to the pic register after reload.
4897 Consider if cross-jumping unifies two sides of a branch
4898 controlled by a comparison vs the only read from a global.
4899 In which case, allow the set_got to be deleted, though we're
4900 too late to do anything about the ebx save in the prologue. */
4901 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4904 /* Prevent function calls from be scheduled before the call to mcount.
4905 In the pic_reg_used case, make sure that the got load isn't deleted. */
4906 if (current_function_profile)
4907 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4910 /* Emit code to restore saved registers using MOV insns. First register
4911 is restored from POINTER + OFFSET. */
4912 static void
4913 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4914 int maybe_eh_return)
4916 int regno;
4917 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4919 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4920 if (ix86_save_reg (regno, maybe_eh_return))
4922 /* Ensure that adjust_address won't be forced to produce pointer
4923 out of range allowed by x86-64 instruction set. */
4924 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4926 rtx r11;
4928 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4929 emit_move_insn (r11, GEN_INT (offset));
4930 emit_insn (gen_adddi3 (r11, r11, pointer));
4931 base_address = gen_rtx_MEM (Pmode, r11);
4932 offset = 0;
4934 emit_move_insn (gen_rtx_REG (Pmode, regno),
4935 adjust_address (base_address, Pmode, offset));
4936 offset += UNITS_PER_WORD;
4940 /* Restore function stack, frame, and registers. */
4942 void
4943 ix86_expand_epilogue (int style)
4945 int regno;
4946 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4947 struct ix86_frame frame;
4948 HOST_WIDE_INT offset;
4950 ix86_compute_frame_layout (&frame);
4952 /* Calculate start of saved registers relative to ebp. Special care
4953 must be taken for the normal return case of a function using
4954 eh_return: the eax and edx registers are marked as saved, but not
4955 restored along this path. */
4956 offset = frame.nregs;
4957 if (current_function_calls_eh_return && style != 2)
4958 offset -= 2;
4959 offset *= -UNITS_PER_WORD;
4961 /* If we're only restoring one register and sp is not valid then
4962 using a move instruction to restore the register since it's
4963 less work than reloading sp and popping the register.
4965 The default code result in stack adjustment using add/lea instruction,
4966 while this code results in LEAVE instruction (or discrete equivalent),
4967 so it is profitable in some other cases as well. Especially when there
4968 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4969 and there is exactly one register to pop. This heuristic may need some
4970 tuning in future. */
4971 if ((!sp_valid && frame.nregs <= 1)
4972 || (TARGET_EPILOGUE_USING_MOVE
4973 && cfun->machine->use_fast_prologue_epilogue
4974 && (frame.nregs > 1 || frame.to_allocate))
4975 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4976 || (frame_pointer_needed && TARGET_USE_LEAVE
4977 && cfun->machine->use_fast_prologue_epilogue
4978 && frame.nregs == 1)
4979 || current_function_calls_eh_return)
4981 /* Restore registers. We can use ebp or esp to address the memory
4982 locations. If both are available, default to ebp, since offsets
4983 are known to be small. Only exception is esp pointing directly to the
4984 end of block of saved registers, where we may simplify addressing
4985 mode. */
4987 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4988 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4989 frame.to_allocate, style == 2);
4990 else
4991 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4992 offset, style == 2);
4994 /* eh_return epilogues need %ecx added to the stack pointer. */
4995 if (style == 2)
4997 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4999 if (frame_pointer_needed)
5001 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5002 tmp = plus_constant (tmp, UNITS_PER_WORD);
5003 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5005 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5006 emit_move_insn (hard_frame_pointer_rtx, tmp);
5008 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5009 const0_rtx, style);
5011 else
5013 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5014 tmp = plus_constant (tmp, (frame.to_allocate
5015 + frame.nregs * UNITS_PER_WORD));
5016 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5019 else if (!frame_pointer_needed)
5020 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5021 GEN_INT (frame.to_allocate
5022 + frame.nregs * UNITS_PER_WORD),
5023 style);
5024 /* If not an i386, mov & pop is faster than "leave". */
5025 else if (TARGET_USE_LEAVE || optimize_size
5026 || !cfun->machine->use_fast_prologue_epilogue)
5027 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5028 else
5030 pro_epilogue_adjust_stack (stack_pointer_rtx,
5031 hard_frame_pointer_rtx,
5032 const0_rtx, style);
5033 if (TARGET_64BIT)
5034 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5035 else
5036 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5039 else
5041 /* First step is to deallocate the stack frame so that we can
5042 pop the registers. */
5043 if (!sp_valid)
5045 gcc_assert (frame_pointer_needed);
5046 pro_epilogue_adjust_stack (stack_pointer_rtx,
5047 hard_frame_pointer_rtx,
5048 GEN_INT (offset), style);
5050 else if (frame.to_allocate)
5051 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5052 GEN_INT (frame.to_allocate), style);
5054 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5055 if (ix86_save_reg (regno, false))
5057 if (TARGET_64BIT)
5058 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5059 else
5060 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5062 if (frame_pointer_needed)
5064 /* Leave results in shorter dependency chains on CPUs that are
5065 able to grok it fast. */
5066 if (TARGET_USE_LEAVE)
5067 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5068 else if (TARGET_64BIT)
5069 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5070 else
5071 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5075 /* Sibcall epilogues don't want a return instruction. */
5076 if (style == 0)
5077 return;
5079 if (current_function_pops_args && current_function_args_size)
5081 rtx popc = GEN_INT (current_function_pops_args);
5083 /* i386 can only pop 64K bytes. If asked to pop more, pop
5084 return address, do explicit add, and jump indirectly to the
5085 caller. */
5087 if (current_function_pops_args >= 65536)
5089 rtx ecx = gen_rtx_REG (SImode, 2);
5091 /* There is no "pascal" calling convention in 64bit ABI. */
5092 gcc_assert (!TARGET_64BIT);
5094 emit_insn (gen_popsi1 (ecx));
5095 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5096 emit_jump_insn (gen_return_indirect_internal (ecx));
5098 else
5099 emit_jump_insn (gen_return_pop_internal (popc));
5101 else
5102 emit_jump_insn (gen_return_internal ());
5105 /* Reset from the function's potential modifications. */
5107 static void
5108 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5109 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5111 if (pic_offset_table_rtx)
5112 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5115 /* Extract the parts of an RTL expression that is a valid memory address
5116 for an instruction. Return 0 if the structure of the address is
5117 grossly off. Return -1 if the address contains ASHIFT, so it is not
5118 strictly valid, but still used for computing length of lea instruction. */
5121 ix86_decompose_address (rtx addr, struct ix86_address *out)
5123 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5124 rtx base_reg, index_reg;
5125 HOST_WIDE_INT scale = 1;
5126 rtx scale_rtx = NULL_RTX;
5127 int retval = 1;
5128 enum ix86_address_seg seg = SEG_DEFAULT;
5130 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5131 base = addr;
5132 else if (GET_CODE (addr) == PLUS)
5134 rtx addends[4], op;
5135 int n = 0, i;
5137 op = addr;
5140 if (n >= 4)
5141 return 0;
5142 addends[n++] = XEXP (op, 1);
5143 op = XEXP (op, 0);
5145 while (GET_CODE (op) == PLUS);
5146 if (n >= 4)
5147 return 0;
5148 addends[n] = op;
5150 for (i = n; i >= 0; --i)
5152 op = addends[i];
5153 switch (GET_CODE (op))
5155 case MULT:
5156 if (index)
5157 return 0;
5158 index = XEXP (op, 0);
5159 scale_rtx = XEXP (op, 1);
5160 break;
5162 case UNSPEC:
5163 if (XINT (op, 1) == UNSPEC_TP
5164 && TARGET_TLS_DIRECT_SEG_REFS
5165 && seg == SEG_DEFAULT)
5166 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5167 else
5168 return 0;
5169 break;
5171 case REG:
5172 case SUBREG:
5173 if (!base)
5174 base = op;
5175 else if (!index)
5176 index = op;
5177 else
5178 return 0;
5179 break;
5181 case CONST:
5182 case CONST_INT:
5183 case SYMBOL_REF:
5184 case LABEL_REF:
5185 if (disp)
5186 return 0;
5187 disp = op;
5188 break;
5190 default:
5191 return 0;
5195 else if (GET_CODE (addr) == MULT)
5197 index = XEXP (addr, 0); /* index*scale */
5198 scale_rtx = XEXP (addr, 1);
5200 else if (GET_CODE (addr) == ASHIFT)
5202 rtx tmp;
5204 /* We're called for lea too, which implements ashift on occasion. */
5205 index = XEXP (addr, 0);
5206 tmp = XEXP (addr, 1);
5207 if (GET_CODE (tmp) != CONST_INT)
5208 return 0;
5209 scale = INTVAL (tmp);
5210 if ((unsigned HOST_WIDE_INT) scale > 3)
5211 return 0;
5212 scale = 1 << scale;
5213 retval = -1;
5215 else
5216 disp = addr; /* displacement */
5218 /* Extract the integral value of scale. */
5219 if (scale_rtx)
5221 if (GET_CODE (scale_rtx) != CONST_INT)
5222 return 0;
5223 scale = INTVAL (scale_rtx);
5226 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5227 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5229 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5230 if (base_reg && index_reg && scale == 1
5231 && (index_reg == arg_pointer_rtx
5232 || index_reg == frame_pointer_rtx
5233 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5235 rtx tmp;
5236 tmp = base, base = index, index = tmp;
5237 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5240 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5241 if ((base_reg == hard_frame_pointer_rtx
5242 || base_reg == frame_pointer_rtx
5243 || base_reg == arg_pointer_rtx) && !disp)
5244 disp = const0_rtx;
5246 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5247 Avoid this by transforming to [%esi+0]. */
5248 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5249 && base_reg && !index_reg && !disp
5250 && REG_P (base_reg)
5251 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5252 disp = const0_rtx;
5254 /* Special case: encode reg+reg instead of reg*2. */
5255 if (!base && index && scale && scale == 2)
5256 base = index, base_reg = index_reg, scale = 1;
5258 /* Special case: scaling cannot be encoded without base or displacement. */
5259 if (!base && !disp && index && scale != 1)
5260 disp = const0_rtx;
5262 out->base = base;
5263 out->index = index;
5264 out->disp = disp;
5265 out->scale = scale;
5266 out->seg = seg;
5268 return retval;
5271 /* Return cost of the memory address x.
5272 For i386, it is better to use a complex address than let gcc copy
5273 the address into a reg and make a new pseudo. But not if the address
5274 requires to two regs - that would mean more pseudos with longer
5275 lifetimes. */
5276 static int
5277 ix86_address_cost (rtx x)
5279 struct ix86_address parts;
5280 int cost = 1;
5281 int ok = ix86_decompose_address (x, &parts);
5283 gcc_assert (ok);
5285 if (parts.base && GET_CODE (parts.base) == SUBREG)
5286 parts.base = SUBREG_REG (parts.base);
5287 if (parts.index && GET_CODE (parts.index) == SUBREG)
5288 parts.index = SUBREG_REG (parts.index);
5290 /* More complex memory references are better. */
5291 if (parts.disp && parts.disp != const0_rtx)
5292 cost--;
5293 if (parts.seg != SEG_DEFAULT)
5294 cost--;
5296 /* Attempt to minimize number of registers in the address. */
5297 if ((parts.base
5298 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5299 || (parts.index
5300 && (!REG_P (parts.index)
5301 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5302 cost++;
5304 if (parts.base
5305 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5306 && parts.index
5307 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5308 && parts.base != parts.index)
5309 cost++;
5311 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5312 since it's predecode logic can't detect the length of instructions
5313 and it degenerates to vector decoded. Increase cost of such
5314 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5315 to split such addresses or even refuse such addresses at all.
5317 Following addressing modes are affected:
5318 [base+scale*index]
5319 [scale*index+disp]
5320 [base+index]
5322 The first and last case may be avoidable by explicitly coding the zero in
5323 memory address, but I don't have AMD-K6 machine handy to check this
5324 theory. */
5326 if (TARGET_K6
5327 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5328 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5329 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5330 cost += 10;
5332 return cost;
5335 /* If X is a machine specific address (i.e. a symbol or label being
5336 referenced as a displacement from the GOT implemented using an
5337 UNSPEC), then return the base term. Otherwise return X. */
5340 ix86_find_base_term (rtx x)
5342 rtx term;
5344 if (TARGET_64BIT)
5346 if (GET_CODE (x) != CONST)
5347 return x;
5348 term = XEXP (x, 0);
5349 if (GET_CODE (term) == PLUS
5350 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5351 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5352 term = XEXP (term, 0);
5353 if (GET_CODE (term) != UNSPEC
5354 || XINT (term, 1) != UNSPEC_GOTPCREL)
5355 return x;
5357 term = XVECEXP (term, 0, 0);
5359 if (GET_CODE (term) != SYMBOL_REF
5360 && GET_CODE (term) != LABEL_REF)
5361 return x;
5363 return term;
5366 term = ix86_delegitimize_address (x);
5368 if (GET_CODE (term) != SYMBOL_REF
5369 && GET_CODE (term) != LABEL_REF)
5370 return x;
5372 return term;
5375 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5376 this is used for to form addresses to local data when -fPIC is in
5377 use. */
5379 static bool
5380 darwin_local_data_pic (rtx disp)
5382 if (GET_CODE (disp) == MINUS)
5384 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5385 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5386 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5388 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5389 if (! strcmp (sym_name, "<pic base>"))
5390 return true;
5394 return false;
5397 /* Determine if a given RTX is a valid constant. We already know this
5398 satisfies CONSTANT_P. */
5400 bool
5401 legitimate_constant_p (rtx x)
5403 switch (GET_CODE (x))
5405 case CONST:
5406 x = XEXP (x, 0);
5408 if (GET_CODE (x) == PLUS)
5410 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5411 return false;
5412 x = XEXP (x, 0);
5415 if (TARGET_MACHO && darwin_local_data_pic (x))
5416 return true;
5418 /* Only some unspecs are valid as "constants". */
5419 if (GET_CODE (x) == UNSPEC)
5420 switch (XINT (x, 1))
5422 case UNSPEC_GOTOFF:
5423 return TARGET_64BIT;
5424 case UNSPEC_TPOFF:
5425 case UNSPEC_NTPOFF:
5426 x = XVECEXP (x, 0, 0);
5427 return (GET_CODE (x) == SYMBOL_REF
5428 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5429 case UNSPEC_DTPOFF:
5430 x = XVECEXP (x, 0, 0);
5431 return (GET_CODE (x) == SYMBOL_REF
5432 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5433 default:
5434 return false;
5437 /* We must have drilled down to a symbol. */
5438 if (GET_CODE (x) == LABEL_REF)
5439 return true;
5440 if (GET_CODE (x) != SYMBOL_REF)
5441 return false;
5442 /* FALLTHRU */
5444 case SYMBOL_REF:
5445 /* TLS symbols are never valid. */
5446 if (SYMBOL_REF_TLS_MODEL (x))
5447 return false;
5448 break;
5450 default:
5451 break;
5454 /* Otherwise we handle everything else in the move patterns. */
5455 return true;
5458 /* Determine if it's legal to put X into the constant pool. This
5459 is not possible for the address of thread-local symbols, which
5460 is checked above. */
5462 static bool
5463 ix86_cannot_force_const_mem (rtx x)
5465 return !legitimate_constant_p (x);
5468 /* Determine if a given RTX is a valid constant address. */
5470 bool
5471 constant_address_p (rtx x)
5473 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5476 /* Nonzero if the constant value X is a legitimate general operand
5477 when generating PIC code. It is given that flag_pic is on and
5478 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5480 bool
5481 legitimate_pic_operand_p (rtx x)
5483 rtx inner;
5485 switch (GET_CODE (x))
5487 case CONST:
5488 inner = XEXP (x, 0);
5489 if (GET_CODE (inner) == PLUS
5490 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5491 inner = XEXP (inner, 0);
5493 /* Only some unspecs are valid as "constants". */
5494 if (GET_CODE (inner) == UNSPEC)
5495 switch (XINT (inner, 1))
5497 case UNSPEC_GOTOFF:
5498 return TARGET_64BIT;
5499 case UNSPEC_TPOFF:
5500 x = XVECEXP (inner, 0, 0);
5501 return (GET_CODE (x) == SYMBOL_REF
5502 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5503 default:
5504 return false;
5506 /* FALLTHRU */
5508 case SYMBOL_REF:
5509 case LABEL_REF:
5510 return legitimate_pic_address_disp_p (x);
5512 default:
5513 return true;
5517 /* Determine if a given CONST RTX is a valid memory displacement
5518 in PIC mode. */
5521 legitimate_pic_address_disp_p (rtx disp)
5523 bool saw_plus;
5525 /* In 64bit mode we can allow direct addresses of symbols and labels
5526 when they are not dynamic symbols. */
5527 if (TARGET_64BIT)
5529 rtx op0 = disp, op1;
5531 switch (GET_CODE (disp))
5533 case LABEL_REF:
5534 return true;
5536 case CONST:
5537 if (GET_CODE (XEXP (disp, 0)) != PLUS)
5538 break;
5539 op0 = XEXP (XEXP (disp, 0), 0);
5540 op1 = XEXP (XEXP (disp, 0), 1);
5541 if (GET_CODE (op1) != CONST_INT
5542 || INTVAL (op1) >= 16*1024*1024
5543 || INTVAL (op1) < -16*1024*1024)
5544 break;
5545 if (GET_CODE (op0) == LABEL_REF)
5546 return true;
5547 if (GET_CODE (op0) != SYMBOL_REF)
5548 break;
5549 /* FALLTHRU */
5551 case SYMBOL_REF:
5552 /* TLS references should always be enclosed in UNSPEC. */
5553 if (SYMBOL_REF_TLS_MODEL (op0))
5554 return false;
5555 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
5556 return true;
5557 break;
5559 default:
5560 break;
5563 if (GET_CODE (disp) != CONST)
5564 return 0;
5565 disp = XEXP (disp, 0);
5567 if (TARGET_64BIT)
5569 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5570 of GOT tables. We should not need these anyway. */
5571 if (GET_CODE (disp) != UNSPEC
5572 || (XINT (disp, 1) != UNSPEC_GOTPCREL
5573 && XINT (disp, 1) != UNSPEC_GOTOFF))
5574 return 0;
5576 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5577 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5578 return 0;
5579 return 1;
5582 saw_plus = false;
5583 if (GET_CODE (disp) == PLUS)
5585 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5586 return 0;
5587 disp = XEXP (disp, 0);
5588 saw_plus = true;
5591 if (TARGET_MACHO && darwin_local_data_pic (disp))
5592 return 1;
5594 if (GET_CODE (disp) != UNSPEC)
5595 return 0;
5597 switch (XINT (disp, 1))
5599 case UNSPEC_GOT:
5600 if (saw_plus)
5601 return false;
5602 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5603 case UNSPEC_GOTOFF:
5604 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
5605 While ABI specify also 32bit relocation but we don't produce it in
5606 small PIC model at all. */
5607 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5608 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5609 && !TARGET_64BIT)
5610 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5611 return false;
5612 case UNSPEC_GOTTPOFF:
5613 case UNSPEC_GOTNTPOFF:
5614 case UNSPEC_INDNTPOFF:
5615 if (saw_plus)
5616 return false;
5617 disp = XVECEXP (disp, 0, 0);
5618 return (GET_CODE (disp) == SYMBOL_REF
5619 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
5620 case UNSPEC_NTPOFF:
5621 disp = XVECEXP (disp, 0, 0);
5622 return (GET_CODE (disp) == SYMBOL_REF
5623 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
5624 case UNSPEC_DTPOFF:
5625 disp = XVECEXP (disp, 0, 0);
5626 return (GET_CODE (disp) == SYMBOL_REF
5627 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
5630 return 0;
5633 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5634 memory address for an instruction. The MODE argument is the machine mode
5635 for the MEM expression that wants to use this address.
5637 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5638 convert common non-canonical forms to canonical form so that they will
5639 be recognized. */
5642 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5644 struct ix86_address parts;
5645 rtx base, index, disp;
5646 HOST_WIDE_INT scale;
5647 const char *reason = NULL;
5648 rtx reason_rtx = NULL_RTX;
5650 if (TARGET_DEBUG_ADDR)
5652 fprintf (stderr,
5653 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5654 GET_MODE_NAME (mode), strict);
5655 debug_rtx (addr);
5658 if (ix86_decompose_address (addr, &parts) <= 0)
5660 reason = "decomposition failed";
5661 goto report_error;
5664 base = parts.base;
5665 index = parts.index;
5666 disp = parts.disp;
5667 scale = parts.scale;
5669 /* Validate base register.
5671 Don't allow SUBREG's that span more than a word here. It can lead to spill
5672 failures when the base is one word out of a two word structure, which is
5673 represented internally as a DImode int. */
5675 if (base)
5677 rtx reg;
5678 reason_rtx = base;
5680 if (REG_P (base))
5681 reg = base;
5682 else if (GET_CODE (base) == SUBREG
5683 && REG_P (SUBREG_REG (base))
5684 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5685 <= UNITS_PER_WORD)
5686 reg = SUBREG_REG (base);
5687 else
5689 reason = "base is not a register";
5690 goto report_error;
5693 if (GET_MODE (base) != Pmode)
5695 reason = "base is not in Pmode";
5696 goto report_error;
5699 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5700 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5702 reason = "base is not valid";
5703 goto report_error;
5707 /* Validate index register.
5709 Don't allow SUBREG's that span more than a word here -- same as above. */
5711 if (index)
5713 rtx reg;
5714 reason_rtx = index;
5716 if (REG_P (index))
5717 reg = index;
5718 else if (GET_CODE (index) == SUBREG
5719 && REG_P (SUBREG_REG (index))
5720 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5721 <= UNITS_PER_WORD)
5722 reg = SUBREG_REG (index);
5723 else
5725 reason = "index is not a register";
5726 goto report_error;
5729 if (GET_MODE (index) != Pmode)
5731 reason = "index is not in Pmode";
5732 goto report_error;
5735 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5736 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5738 reason = "index is not valid";
5739 goto report_error;
5743 /* Validate scale factor. */
5744 if (scale != 1)
5746 reason_rtx = GEN_INT (scale);
5747 if (!index)
5749 reason = "scale without index";
5750 goto report_error;
5753 if (scale != 2 && scale != 4 && scale != 8)
5755 reason = "scale is not a valid multiplier";
5756 goto report_error;
5760 /* Validate displacement. */
5761 if (disp)
5763 reason_rtx = disp;
5765 if (GET_CODE (disp) == CONST
5766 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5767 switch (XINT (XEXP (disp, 0), 1))
5769 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
5770 used. While ABI specify also 32bit relocations, we don't produce
5771 them at all and use IP relative instead. */
5772 case UNSPEC_GOT:
5773 case UNSPEC_GOTOFF:
5774 gcc_assert (flag_pic);
5775 if (!TARGET_64BIT)
5776 goto is_legitimate_pic;
5777 reason = "64bit address unspec";
5778 goto report_error;
5780 case UNSPEC_GOTPCREL:
5781 gcc_assert (flag_pic);
5782 goto is_legitimate_pic;
5784 case UNSPEC_GOTTPOFF:
5785 case UNSPEC_GOTNTPOFF:
5786 case UNSPEC_INDNTPOFF:
5787 case UNSPEC_NTPOFF:
5788 case UNSPEC_DTPOFF:
5789 break;
5791 default:
5792 reason = "invalid address unspec";
5793 goto report_error;
5796 else if (flag_pic && (SYMBOLIC_CONST (disp)
5797 #if TARGET_MACHO
5798 && !machopic_operand_p (disp)
5799 #endif
5802 is_legitimate_pic:
5803 if (TARGET_64BIT && (index || base))
5805 /* foo@dtpoff(%rX) is ok. */
5806 if (GET_CODE (disp) != CONST
5807 || GET_CODE (XEXP (disp, 0)) != PLUS
5808 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5809 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5810 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5811 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5813 reason = "non-constant pic memory reference";
5814 goto report_error;
5817 else if (! legitimate_pic_address_disp_p (disp))
5819 reason = "displacement is an invalid pic construct";
5820 goto report_error;
5823 /* This code used to verify that a symbolic pic displacement
5824 includes the pic_offset_table_rtx register.
5826 While this is good idea, unfortunately these constructs may
5827 be created by "adds using lea" optimization for incorrect
5828 code like:
5830 int a;
5831 int foo(int i)
5833 return *(&a+i);
5836 This code is nonsensical, but results in addressing
5837 GOT table with pic_offset_table_rtx base. We can't
5838 just refuse it easily, since it gets matched by
5839 "addsi3" pattern, that later gets split to lea in the
5840 case output register differs from input. While this
5841 can be handled by separate addsi pattern for this case
5842 that never results in lea, this seems to be easier and
5843 correct fix for crash to disable this test. */
5845 else if (GET_CODE (disp) != LABEL_REF
5846 && GET_CODE (disp) != CONST_INT
5847 && (GET_CODE (disp) != CONST
5848 || !legitimate_constant_p (disp))
5849 && (GET_CODE (disp) != SYMBOL_REF
5850 || !legitimate_constant_p (disp)))
5852 reason = "displacement is not constant";
5853 goto report_error;
5855 else if (TARGET_64BIT
5856 && !x86_64_immediate_operand (disp, VOIDmode))
5858 reason = "displacement is out of range";
5859 goto report_error;
5863 /* Everything looks valid. */
5864 if (TARGET_DEBUG_ADDR)
5865 fprintf (stderr, "Success.\n");
5866 return TRUE;
5868 report_error:
5869 if (TARGET_DEBUG_ADDR)
5871 fprintf (stderr, "Error: %s\n", reason);
5872 debug_rtx (reason_rtx);
5874 return FALSE;
5877 /* Return a unique alias set for the GOT. */
5879 static HOST_WIDE_INT
5880 ix86_GOT_alias_set (void)
5882 static HOST_WIDE_INT set = -1;
5883 if (set == -1)
5884 set = new_alias_set ();
5885 return set;
5888 /* Return a legitimate reference for ORIG (an address) using the
5889 register REG. If REG is 0, a new pseudo is generated.
5891 There are two types of references that must be handled:
5893 1. Global data references must load the address from the GOT, via
5894 the PIC reg. An insn is emitted to do this load, and the reg is
5895 returned.
5897 2. Static data references, constant pool addresses, and code labels
5898 compute the address as an offset from the GOT, whose base is in
5899 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5900 differentiate them from global data objects. The returned
5901 address is the PIC reg + an unspec constant.
5903 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5904 reg also appears in the address. */
5906 static rtx
5907 legitimize_pic_address (rtx orig, rtx reg)
5909 rtx addr = orig;
5910 rtx new = orig;
5911 rtx base;
5913 #if TARGET_MACHO
5914 if (reg == 0)
5915 reg = gen_reg_rtx (Pmode);
5916 /* Use the generic Mach-O PIC machinery. */
5917 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5918 #endif
5920 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5921 new = addr;
5922 else if (TARGET_64BIT
5923 && ix86_cmodel != CM_SMALL_PIC
5924 && local_symbolic_operand (addr, Pmode))
5926 rtx tmpreg;
5927 /* This symbol may be referenced via a displacement from the PIC
5928 base address (@GOTOFF). */
5930 if (reload_in_progress)
5931 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5932 if (GET_CODE (addr) == CONST)
5933 addr = XEXP (addr, 0);
5934 if (GET_CODE (addr) == PLUS)
5936 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5937 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5939 else
5940 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5941 new = gen_rtx_CONST (Pmode, new);
5942 if (!reg)
5943 tmpreg = gen_reg_rtx (Pmode);
5944 else
5945 tmpreg = reg;
5946 emit_move_insn (tmpreg, new);
5948 if (reg != 0)
5950 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
5951 tmpreg, 1, OPTAB_DIRECT);
5952 new = reg;
5954 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
5956 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5958 /* This symbol may be referenced via a displacement from the PIC
5959 base address (@GOTOFF). */
5961 if (reload_in_progress)
5962 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5963 if (GET_CODE (addr) == CONST)
5964 addr = XEXP (addr, 0);
5965 if (GET_CODE (addr) == PLUS)
5967 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5968 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5970 else
5971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5972 new = gen_rtx_CONST (Pmode, new);
5973 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5975 if (reg != 0)
5977 emit_move_insn (reg, new);
5978 new = reg;
5981 else if (GET_CODE (addr) == SYMBOL_REF)
5983 if (TARGET_64BIT)
5985 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5986 new = gen_rtx_CONST (Pmode, new);
5987 new = gen_const_mem (Pmode, new);
5988 set_mem_alias_set (new, ix86_GOT_alias_set ());
5990 if (reg == 0)
5991 reg = gen_reg_rtx (Pmode);
5992 /* Use directly gen_movsi, otherwise the address is loaded
5993 into register for CSE. We don't want to CSE this addresses,
5994 instead we CSE addresses from the GOT table, so skip this. */
5995 emit_insn (gen_movsi (reg, new));
5996 new = reg;
5998 else
6000 /* This symbol must be referenced via a load from the
6001 Global Offset Table (@GOT). */
6003 if (reload_in_progress)
6004 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6005 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6006 new = gen_rtx_CONST (Pmode, new);
6007 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6008 new = gen_const_mem (Pmode, new);
6009 set_mem_alias_set (new, ix86_GOT_alias_set ());
6011 if (reg == 0)
6012 reg = gen_reg_rtx (Pmode);
6013 emit_move_insn (reg, new);
6014 new = reg;
6017 else
6019 if (GET_CODE (addr) == CONST)
6021 addr = XEXP (addr, 0);
6023 /* We must match stuff we generate before. Assume the only
6024 unspecs that can get here are ours. Not that we could do
6025 anything with them anyway.... */
6026 if (GET_CODE (addr) == UNSPEC
6027 || (GET_CODE (addr) == PLUS
6028 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6029 return orig;
6030 gcc_assert (GET_CODE (addr) == PLUS);
6032 if (GET_CODE (addr) == PLUS)
6034 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6036 /* Check first to see if this is a constant offset from a @GOTOFF
6037 symbol reference. */
6038 if (local_symbolic_operand (op0, Pmode)
6039 && GET_CODE (op1) == CONST_INT)
6041 if (!TARGET_64BIT)
6043 if (reload_in_progress)
6044 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6045 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6046 UNSPEC_GOTOFF);
6047 new = gen_rtx_PLUS (Pmode, new, op1);
6048 new = gen_rtx_CONST (Pmode, new);
6049 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6051 if (reg != 0)
6053 emit_move_insn (reg, new);
6054 new = reg;
6057 else
6059 if (INTVAL (op1) < -16*1024*1024
6060 || INTVAL (op1) >= 16*1024*1024)
6061 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6064 else
6066 base = legitimize_pic_address (XEXP (addr, 0), reg);
6067 new = legitimize_pic_address (XEXP (addr, 1),
6068 base == reg ? NULL_RTX : reg);
6070 if (GET_CODE (new) == CONST_INT)
6071 new = plus_constant (base, INTVAL (new));
6072 else
6074 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6076 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6077 new = XEXP (new, 1);
6079 new = gen_rtx_PLUS (Pmode, base, new);
6084 return new;
6087 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6089 static rtx
6090 get_thread_pointer (int to_reg)
6092 rtx tp, reg, insn;
6094 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6095 if (!to_reg)
6096 return tp;
6098 reg = gen_reg_rtx (Pmode);
6099 insn = gen_rtx_SET (VOIDmode, reg, tp);
6100 insn = emit_insn (insn);
6102 return reg;
6105 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6106 false if we expect this to be used for a memory address and true if
6107 we expect to load the address into a register. */
6109 static rtx
6110 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6112 rtx dest, base, off, pic;
6113 int type;
6115 switch (model)
6117 case TLS_MODEL_GLOBAL_DYNAMIC:
6118 dest = gen_reg_rtx (Pmode);
6119 if (TARGET_64BIT)
6121 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6123 start_sequence ();
6124 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6125 insns = get_insns ();
6126 end_sequence ();
6128 emit_libcall_block (insns, dest, rax, x);
6130 else
6131 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6132 break;
6134 case TLS_MODEL_LOCAL_DYNAMIC:
6135 base = gen_reg_rtx (Pmode);
6136 if (TARGET_64BIT)
6138 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6140 start_sequence ();
6141 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6142 insns = get_insns ();
6143 end_sequence ();
6145 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6146 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6147 emit_libcall_block (insns, base, rax, note);
6149 else
6150 emit_insn (gen_tls_local_dynamic_base_32 (base));
6152 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6153 off = gen_rtx_CONST (Pmode, off);
6155 return gen_rtx_PLUS (Pmode, base, off);
6157 case TLS_MODEL_INITIAL_EXEC:
6158 if (TARGET_64BIT)
6160 pic = NULL;
6161 type = UNSPEC_GOTNTPOFF;
6163 else if (flag_pic)
6165 if (reload_in_progress)
6166 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6167 pic = pic_offset_table_rtx;
6168 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6170 else if (!TARGET_GNU_TLS)
6172 pic = gen_reg_rtx (Pmode);
6173 emit_insn (gen_set_got (pic));
6174 type = UNSPEC_GOTTPOFF;
6176 else
6178 pic = NULL;
6179 type = UNSPEC_INDNTPOFF;
6182 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6183 off = gen_rtx_CONST (Pmode, off);
6184 if (pic)
6185 off = gen_rtx_PLUS (Pmode, pic, off);
6186 off = gen_const_mem (Pmode, off);
6187 set_mem_alias_set (off, ix86_GOT_alias_set ());
6189 if (TARGET_64BIT || TARGET_GNU_TLS)
6191 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6192 off = force_reg (Pmode, off);
6193 return gen_rtx_PLUS (Pmode, base, off);
6195 else
6197 base = get_thread_pointer (true);
6198 dest = gen_reg_rtx (Pmode);
6199 emit_insn (gen_subsi3 (dest, base, off));
6201 break;
6203 case TLS_MODEL_LOCAL_EXEC:
6204 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6205 (TARGET_64BIT || TARGET_GNU_TLS)
6206 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6207 off = gen_rtx_CONST (Pmode, off);
6209 if (TARGET_64BIT || TARGET_GNU_TLS)
6211 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6212 return gen_rtx_PLUS (Pmode, base, off);
6214 else
6216 base = get_thread_pointer (true);
6217 dest = gen_reg_rtx (Pmode);
6218 emit_insn (gen_subsi3 (dest, base, off));
6220 break;
6222 default:
6223 gcc_unreachable ();
6226 return dest;
6229 /* Try machine-dependent ways of modifying an illegitimate address
6230 to be legitimate. If we find one, return the new, valid address.
6231 This macro is used in only one place: `memory_address' in explow.c.
6233 OLDX is the address as it was before break_out_memory_refs was called.
6234 In some cases it is useful to look at this to decide what needs to be done.
6236 MODE and WIN are passed so that this macro can use
6237 GO_IF_LEGITIMATE_ADDRESS.
6239 It is always safe for this macro to do nothing. It exists to recognize
6240 opportunities to optimize the output.
6242 For the 80386, we handle X+REG by loading X into a register R and
6243 using R+REG. R will go in a general reg and indexing will be used.
6244 However, if REG is a broken-out memory address or multiplication,
6245 nothing needs to be done because REG can certainly go in a general reg.
6247 When -fpic is used, special handling is needed for symbolic references.
6248 See comments by legitimize_pic_address in i386.c for details. */
6251 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6253 int changed = 0;
6254 unsigned log;
6256 if (TARGET_DEBUG_ADDR)
6258 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6259 GET_MODE_NAME (mode));
6260 debug_rtx (x);
6263 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6264 if (log)
6265 return legitimize_tls_address (x, log, false);
6266 if (GET_CODE (x) == CONST
6267 && GET_CODE (XEXP (x, 0)) == PLUS
6268 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6269 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6271 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6272 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6275 if (flag_pic && SYMBOLIC_CONST (x))
6276 return legitimize_pic_address (x, 0);
6278 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6279 if (GET_CODE (x) == ASHIFT
6280 && GET_CODE (XEXP (x, 1)) == CONST_INT
6281 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6283 changed = 1;
6284 log = INTVAL (XEXP (x, 1));
6285 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6286 GEN_INT (1 << log));
6289 if (GET_CODE (x) == PLUS)
6291 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6293 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6294 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6295 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6297 changed = 1;
6298 log = INTVAL (XEXP (XEXP (x, 0), 1));
6299 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6300 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6301 GEN_INT (1 << log));
6304 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6305 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6306 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6308 changed = 1;
6309 log = INTVAL (XEXP (XEXP (x, 1), 1));
6310 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6311 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6312 GEN_INT (1 << log));
6315 /* Put multiply first if it isn't already. */
6316 if (GET_CODE (XEXP (x, 1)) == MULT)
6318 rtx tmp = XEXP (x, 0);
6319 XEXP (x, 0) = XEXP (x, 1);
6320 XEXP (x, 1) = tmp;
6321 changed = 1;
6324 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6325 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6326 created by virtual register instantiation, register elimination, and
6327 similar optimizations. */
6328 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6330 changed = 1;
6331 x = gen_rtx_PLUS (Pmode,
6332 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6333 XEXP (XEXP (x, 1), 0)),
6334 XEXP (XEXP (x, 1), 1));
6337 /* Canonicalize
6338 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6339 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6340 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6342 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6343 && CONSTANT_P (XEXP (x, 1)))
6345 rtx constant;
6346 rtx other = NULL_RTX;
6348 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6350 constant = XEXP (x, 1);
6351 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6353 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6355 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6356 other = XEXP (x, 1);
6358 else
6359 constant = 0;
6361 if (constant)
6363 changed = 1;
6364 x = gen_rtx_PLUS (Pmode,
6365 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6366 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6367 plus_constant (other, INTVAL (constant)));
6371 if (changed && legitimate_address_p (mode, x, FALSE))
6372 return x;
6374 if (GET_CODE (XEXP (x, 0)) == MULT)
6376 changed = 1;
6377 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6380 if (GET_CODE (XEXP (x, 1)) == MULT)
6382 changed = 1;
6383 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6386 if (changed
6387 && GET_CODE (XEXP (x, 1)) == REG
6388 && GET_CODE (XEXP (x, 0)) == REG)
6389 return x;
6391 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6393 changed = 1;
6394 x = legitimize_pic_address (x, 0);
6397 if (changed && legitimate_address_p (mode, x, FALSE))
6398 return x;
6400 if (GET_CODE (XEXP (x, 0)) == REG)
6402 rtx temp = gen_reg_rtx (Pmode);
6403 rtx val = force_operand (XEXP (x, 1), temp);
6404 if (val != temp)
6405 emit_move_insn (temp, val);
6407 XEXP (x, 1) = temp;
6408 return x;
6411 else if (GET_CODE (XEXP (x, 1)) == REG)
6413 rtx temp = gen_reg_rtx (Pmode);
6414 rtx val = force_operand (XEXP (x, 0), temp);
6415 if (val != temp)
6416 emit_move_insn (temp, val);
6418 XEXP (x, 0) = temp;
6419 return x;
6423 return x;
6426 /* Print an integer constant expression in assembler syntax. Addition
6427 and subtraction are the only arithmetic that may appear in these
6428 expressions. FILE is the stdio stream to write to, X is the rtx, and
6429 CODE is the operand print code from the output string. */
6431 static void
6432 output_pic_addr_const (FILE *file, rtx x, int code)
6434 char buf[256];
6436 switch (GET_CODE (x))
6438 case PC:
6439 gcc_assert (flag_pic);
6440 putc ('.', file);
6441 break;
6443 case SYMBOL_REF:
6444 assemble_name (file, XSTR (x, 0));
6445 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6446 fputs ("@PLT", file);
6447 break;
6449 case LABEL_REF:
6450 x = XEXP (x, 0);
6451 /* FALLTHRU */
6452 case CODE_LABEL:
6453 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6454 assemble_name (asm_out_file, buf);
6455 break;
6457 case CONST_INT:
6458 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6459 break;
6461 case CONST:
6462 /* This used to output parentheses around the expression,
6463 but that does not work on the 386 (either ATT or BSD assembler). */
6464 output_pic_addr_const (file, XEXP (x, 0), code);
6465 break;
6467 case CONST_DOUBLE:
6468 if (GET_MODE (x) == VOIDmode)
6470 /* We can use %d if the number is <32 bits and positive. */
6471 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6472 fprintf (file, "0x%lx%08lx",
6473 (unsigned long) CONST_DOUBLE_HIGH (x),
6474 (unsigned long) CONST_DOUBLE_LOW (x));
6475 else
6476 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6478 else
6479 /* We can't handle floating point constants;
6480 PRINT_OPERAND must handle them. */
6481 output_operand_lossage ("floating constant misused");
6482 break;
6484 case PLUS:
6485 /* Some assemblers need integer constants to appear first. */
6486 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6488 output_pic_addr_const (file, XEXP (x, 0), code);
6489 putc ('+', file);
6490 output_pic_addr_const (file, XEXP (x, 1), code);
6492 else
6494 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6495 output_pic_addr_const (file, XEXP (x, 1), code);
6496 putc ('+', file);
6497 output_pic_addr_const (file, XEXP (x, 0), code);
6499 break;
6501 case MINUS:
6502 if (!TARGET_MACHO)
6503 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6504 output_pic_addr_const (file, XEXP (x, 0), code);
6505 putc ('-', file);
6506 output_pic_addr_const (file, XEXP (x, 1), code);
6507 if (!TARGET_MACHO)
6508 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6509 break;
6511 case UNSPEC:
6512 gcc_assert (XVECLEN (x, 0) == 1);
6513 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6514 switch (XINT (x, 1))
6516 case UNSPEC_GOT:
6517 fputs ("@GOT", file);
6518 break;
6519 case UNSPEC_GOTOFF:
6520 fputs ("@GOTOFF", file);
6521 break;
6522 case UNSPEC_GOTPCREL:
6523 fputs ("@GOTPCREL(%rip)", file);
6524 break;
6525 case UNSPEC_GOTTPOFF:
6526 /* FIXME: This might be @TPOFF in Sun ld too. */
6527 fputs ("@GOTTPOFF", file);
6528 break;
6529 case UNSPEC_TPOFF:
6530 fputs ("@TPOFF", file);
6531 break;
6532 case UNSPEC_NTPOFF:
6533 if (TARGET_64BIT)
6534 fputs ("@TPOFF", file);
6535 else
6536 fputs ("@NTPOFF", file);
6537 break;
6538 case UNSPEC_DTPOFF:
6539 fputs ("@DTPOFF", file);
6540 break;
6541 case UNSPEC_GOTNTPOFF:
6542 if (TARGET_64BIT)
6543 fputs ("@GOTTPOFF(%rip)", file);
6544 else
6545 fputs ("@GOTNTPOFF", file);
6546 break;
6547 case UNSPEC_INDNTPOFF:
6548 fputs ("@INDNTPOFF", file);
6549 break;
6550 default:
6551 output_operand_lossage ("invalid UNSPEC as operand");
6552 break;
6554 break;
6556 default:
6557 output_operand_lossage ("invalid expression as operand");
6561 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6562 We need to emit DTP-relative relocations. */
6564 static void
6565 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6567 fputs (ASM_LONG, file);
6568 output_addr_const (file, x);
6569 fputs ("@DTPOFF", file);
6570 switch (size)
6572 case 4:
6573 break;
6574 case 8:
6575 fputs (", 0", file);
6576 break;
6577 default:
6578 gcc_unreachable ();
6582 /* In the name of slightly smaller debug output, and to cater to
6583 general assembler lossage, recognize PIC+GOTOFF and turn it back
6584 into a direct symbol reference. */
6586 static rtx
6587 ix86_delegitimize_address (rtx orig_x)
6589 rtx x = orig_x, y;
6591 if (GET_CODE (x) == MEM)
6592 x = XEXP (x, 0);
6594 if (TARGET_64BIT)
6596 if (GET_CODE (x) != CONST
6597 || GET_CODE (XEXP (x, 0)) != UNSPEC
6598 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6599 || GET_CODE (orig_x) != MEM)
6600 return orig_x;
6601 return XVECEXP (XEXP (x, 0), 0, 0);
6604 if (GET_CODE (x) != PLUS
6605 || GET_CODE (XEXP (x, 1)) != CONST)
6606 return orig_x;
6608 if (GET_CODE (XEXP (x, 0)) == REG
6609 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6610 /* %ebx + GOT/GOTOFF */
6611 y = NULL;
6612 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6614 /* %ebx + %reg * scale + GOT/GOTOFF */
6615 y = XEXP (x, 0);
6616 if (GET_CODE (XEXP (y, 0)) == REG
6617 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6618 y = XEXP (y, 1);
6619 else if (GET_CODE (XEXP (y, 1)) == REG
6620 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6621 y = XEXP (y, 0);
6622 else
6623 return orig_x;
6624 if (GET_CODE (y) != REG
6625 && GET_CODE (y) != MULT
6626 && GET_CODE (y) != ASHIFT)
6627 return orig_x;
6629 else
6630 return orig_x;
6632 x = XEXP (XEXP (x, 1), 0);
6633 if (GET_CODE (x) == UNSPEC
6634 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6635 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6637 if (y)
6638 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6639 return XVECEXP (x, 0, 0);
6642 if (GET_CODE (x) == PLUS
6643 && GET_CODE (XEXP (x, 0)) == UNSPEC
6644 && GET_CODE (XEXP (x, 1)) == CONST_INT
6645 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6646 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6647 && GET_CODE (orig_x) != MEM)))
6649 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6650 if (y)
6651 return gen_rtx_PLUS (Pmode, y, x);
6652 return x;
6655 return orig_x;
6658 static void
6659 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6660 int fp, FILE *file)
6662 const char *suffix;
6664 if (mode == CCFPmode || mode == CCFPUmode)
6666 enum rtx_code second_code, bypass_code;
6667 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6668 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6669 code = ix86_fp_compare_code_to_integer (code);
6670 mode = CCmode;
6672 if (reverse)
6673 code = reverse_condition (code);
6675 switch (code)
6677 case EQ:
6678 suffix = "e";
6679 break;
6680 case NE:
6681 suffix = "ne";
6682 break;
6683 case GT:
6684 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6685 suffix = "g";
6686 break;
6687 case GTU:
6688 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6689 Those same assemblers have the same but opposite lossage on cmov. */
6690 gcc_assert (mode == CCmode);
6691 suffix = fp ? "nbe" : "a";
6692 break;
6693 case LT:
6694 switch (mode)
6696 case CCNOmode:
6697 case CCGOCmode:
6698 suffix = "s";
6699 break;
6701 case CCmode:
6702 case CCGCmode:
6703 suffix = "l";
6704 break;
6706 default:
6707 gcc_unreachable ();
6709 break;
6710 case LTU:
6711 gcc_assert (mode == CCmode);
6712 suffix = "b";
6713 break;
6714 case GE:
6715 switch (mode)
6717 case CCNOmode:
6718 case CCGOCmode:
6719 suffix = "ns";
6720 break;
6722 case CCmode:
6723 case CCGCmode:
6724 suffix = "ge";
6725 break;
6727 default:
6728 gcc_unreachable ();
6730 break;
6731 case GEU:
6732 /* ??? As above. */
6733 gcc_assert (mode == CCmode);
6734 suffix = fp ? "nb" : "ae";
6735 break;
6736 case LE:
6737 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6738 suffix = "le";
6739 break;
6740 case LEU:
6741 gcc_assert (mode == CCmode);
6742 suffix = "be";
6743 break;
6744 case UNORDERED:
6745 suffix = fp ? "u" : "p";
6746 break;
6747 case ORDERED:
6748 suffix = fp ? "nu" : "np";
6749 break;
6750 default:
6751 gcc_unreachable ();
6753 fputs (suffix, file);
6756 /* Print the name of register X to FILE based on its machine mode and number.
6757 If CODE is 'w', pretend the mode is HImode.
6758 If CODE is 'b', pretend the mode is QImode.
6759 If CODE is 'k', pretend the mode is SImode.
6760 If CODE is 'q', pretend the mode is DImode.
6761 If CODE is 'h', pretend the reg is the 'high' byte register.
6762 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6764 void
6765 print_reg (rtx x, int code, FILE *file)
6767 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6768 && REGNO (x) != FRAME_POINTER_REGNUM
6769 && REGNO (x) != FLAGS_REG
6770 && REGNO (x) != FPSR_REG);
6772 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6773 putc ('%', file);
6775 if (code == 'w' || MMX_REG_P (x))
6776 code = 2;
6777 else if (code == 'b')
6778 code = 1;
6779 else if (code == 'k')
6780 code = 4;
6781 else if (code == 'q')
6782 code = 8;
6783 else if (code == 'y')
6784 code = 3;
6785 else if (code == 'h')
6786 code = 0;
6787 else
6788 code = GET_MODE_SIZE (GET_MODE (x));
6790 /* Irritatingly, AMD extended registers use different naming convention
6791 from the normal registers. */
6792 if (REX_INT_REG_P (x))
6794 gcc_assert (TARGET_64BIT);
6795 switch (code)
6797 case 0:
6798 error ("extended registers have no high halves");
6799 break;
6800 case 1:
6801 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6802 break;
6803 case 2:
6804 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6805 break;
6806 case 4:
6807 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6808 break;
6809 case 8:
6810 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6811 break;
6812 default:
6813 error ("unsupported operand size for extended register");
6814 break;
6816 return;
6818 switch (code)
6820 case 3:
6821 if (STACK_TOP_P (x))
6823 fputs ("st(0)", file);
6824 break;
6826 /* FALLTHRU */
6827 case 8:
6828 case 4:
6829 case 12:
6830 if (! ANY_FP_REG_P (x))
6831 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6832 /* FALLTHRU */
6833 case 16:
6834 case 2:
6835 normal:
6836 fputs (hi_reg_name[REGNO (x)], file);
6837 break;
6838 case 1:
6839 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6840 goto normal;
6841 fputs (qi_reg_name[REGNO (x)], file);
6842 break;
6843 case 0:
6844 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6845 goto normal;
6846 fputs (qi_high_reg_name[REGNO (x)], file);
6847 break;
6848 default:
6849 gcc_unreachable ();
6853 /* Locate some local-dynamic symbol still in use by this function
6854 so that we can print its name in some tls_local_dynamic_base
6855 pattern. */
6857 static const char *
6858 get_some_local_dynamic_name (void)
6860 rtx insn;
6862 if (cfun->machine->some_ld_name)
6863 return cfun->machine->some_ld_name;
6865 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6866 if (INSN_P (insn)
6867 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6868 return cfun->machine->some_ld_name;
6870 gcc_unreachable ();
6873 static int
6874 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6876 rtx x = *px;
6878 if (GET_CODE (x) == SYMBOL_REF
6879 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
6881 cfun->machine->some_ld_name = XSTR (x, 0);
6882 return 1;
6885 return 0;
6888 /* Meaning of CODE:
6889 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6890 C -- print opcode suffix for set/cmov insn.
6891 c -- like C, but print reversed condition
6892 F,f -- likewise, but for floating-point.
6893 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6894 otherwise nothing
6895 R -- print the prefix for register names.
6896 z -- print the opcode suffix for the size of the current operand.
6897 * -- print a star (in certain assembler syntax)
6898 A -- print an absolute memory reference.
6899 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6900 s -- print a shift double count, followed by the assemblers argument
6901 delimiter.
6902 b -- print the QImode name of the register for the indicated operand.
6903 %b0 would print %al if operands[0] is reg 0.
6904 w -- likewise, print the HImode name of the register.
6905 k -- likewise, print the SImode name of the register.
6906 q -- likewise, print the DImode name of the register.
6907 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6908 y -- print "st(0)" instead of "st" as a register.
6909 D -- print condition for SSE cmp instruction.
6910 P -- if PIC, print an @PLT suffix.
6911 X -- don't print any sort of PIC '@' suffix for a symbol.
6912 & -- print some in-use local-dynamic symbol name.
6913 H -- print a memory address offset by 8; used for sse high-parts
6916 void
6917 print_operand (FILE *file, rtx x, int code)
6919 if (code)
6921 switch (code)
6923 case '*':
6924 if (ASSEMBLER_DIALECT == ASM_ATT)
6925 putc ('*', file);
6926 return;
6928 case '&':
6929 assemble_name (file, get_some_local_dynamic_name ());
6930 return;
6932 case 'A':
6933 switch (ASSEMBLER_DIALECT)
6935 case ASM_ATT:
6936 putc ('*', file);
6937 break;
6939 case ASM_INTEL:
6940 /* Intel syntax. For absolute addresses, registers should not
6941 be surrounded by braces. */
6942 if (GET_CODE (x) != REG)
6944 putc ('[', file);
6945 PRINT_OPERAND (file, x, 0);
6946 putc (']', file);
6947 return;
6949 break;
6951 default:
6952 gcc_unreachable ();
6955 PRINT_OPERAND (file, x, 0);
6956 return;
6959 case 'L':
6960 if (ASSEMBLER_DIALECT == ASM_ATT)
6961 putc ('l', file);
6962 return;
6964 case 'W':
6965 if (ASSEMBLER_DIALECT == ASM_ATT)
6966 putc ('w', file);
6967 return;
6969 case 'B':
6970 if (ASSEMBLER_DIALECT == ASM_ATT)
6971 putc ('b', file);
6972 return;
6974 case 'Q':
6975 if (ASSEMBLER_DIALECT == ASM_ATT)
6976 putc ('l', file);
6977 return;
6979 case 'S':
6980 if (ASSEMBLER_DIALECT == ASM_ATT)
6981 putc ('s', file);
6982 return;
6984 case 'T':
6985 if (ASSEMBLER_DIALECT == ASM_ATT)
6986 putc ('t', file);
6987 return;
6989 case 'z':
6990 /* 387 opcodes don't get size suffixes if the operands are
6991 registers. */
6992 if (STACK_REG_P (x))
6993 return;
6995 /* Likewise if using Intel opcodes. */
6996 if (ASSEMBLER_DIALECT == ASM_INTEL)
6997 return;
6999 /* This is the size of op from size of operand. */
7000 switch (GET_MODE_SIZE (GET_MODE (x)))
7002 case 2:
7003 #ifdef HAVE_GAS_FILDS_FISTS
7004 putc ('s', file);
7005 #endif
7006 return;
7008 case 4:
7009 if (GET_MODE (x) == SFmode)
7011 putc ('s', file);
7012 return;
7014 else
7015 putc ('l', file);
7016 return;
7018 case 12:
7019 case 16:
7020 putc ('t', file);
7021 return;
7023 case 8:
7024 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7026 #ifdef GAS_MNEMONICS
7027 putc ('q', file);
7028 #else
7029 putc ('l', file);
7030 putc ('l', file);
7031 #endif
7033 else
7034 putc ('l', file);
7035 return;
7037 default:
7038 gcc_unreachable ();
7041 case 'b':
7042 case 'w':
7043 case 'k':
7044 case 'q':
7045 case 'h':
7046 case 'y':
7047 case 'X':
7048 case 'P':
7049 break;
7051 case 's':
7052 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7054 PRINT_OPERAND (file, x, 0);
7055 putc (',', file);
7057 return;
7059 case 'D':
7060 /* Little bit of braindamage here. The SSE compare instructions
7061 does use completely different names for the comparisons that the
7062 fp conditional moves. */
7063 switch (GET_CODE (x))
7065 case EQ:
7066 case UNEQ:
7067 fputs ("eq", file);
7068 break;
7069 case LT:
7070 case UNLT:
7071 fputs ("lt", file);
7072 break;
7073 case LE:
7074 case UNLE:
7075 fputs ("le", file);
7076 break;
7077 case UNORDERED:
7078 fputs ("unord", file);
7079 break;
7080 case NE:
7081 case LTGT:
7082 fputs ("neq", file);
7083 break;
7084 case UNGE:
7085 case GE:
7086 fputs ("nlt", file);
7087 break;
7088 case UNGT:
7089 case GT:
7090 fputs ("nle", file);
7091 break;
7092 case ORDERED:
7093 fputs ("ord", file);
7094 break;
7095 default:
7096 gcc_unreachable ();
7098 return;
7099 case 'O':
7100 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7101 if (ASSEMBLER_DIALECT == ASM_ATT)
7103 switch (GET_MODE (x))
7105 case HImode: putc ('w', file); break;
7106 case SImode:
7107 case SFmode: putc ('l', file); break;
7108 case DImode:
7109 case DFmode: putc ('q', file); break;
7110 default: gcc_unreachable ();
7112 putc ('.', file);
7114 #endif
7115 return;
7116 case 'C':
7117 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7118 return;
7119 case 'F':
7120 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7121 if (ASSEMBLER_DIALECT == ASM_ATT)
7122 putc ('.', file);
7123 #endif
7124 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7125 return;
7127 /* Like above, but reverse condition */
7128 case 'c':
7129 /* Check to see if argument to %c is really a constant
7130 and not a condition code which needs to be reversed. */
7131 if (!COMPARISON_P (x))
7133 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7134 return;
7136 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7137 return;
7138 case 'f':
7139 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7140 if (ASSEMBLER_DIALECT == ASM_ATT)
7141 putc ('.', file);
7142 #endif
7143 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7144 return;
7146 case 'H':
7147 /* It doesn't actually matter what mode we use here, as we're
7148 only going to use this for printing. */
7149 x = adjust_address_nv (x, DImode, 8);
7150 break;
7152 case '+':
7154 rtx x;
7156 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7157 return;
7159 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7160 if (x)
7162 int pred_val = INTVAL (XEXP (x, 0));
7164 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7165 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7167 int taken = pred_val > REG_BR_PROB_BASE / 2;
7168 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7170 /* Emit hints only in the case default branch prediction
7171 heuristics would fail. */
7172 if (taken != cputaken)
7174 /* We use 3e (DS) prefix for taken branches and
7175 2e (CS) prefix for not taken branches. */
7176 if (taken)
7177 fputs ("ds ; ", file);
7178 else
7179 fputs ("cs ; ", file);
7183 return;
7185 default:
7186 output_operand_lossage ("invalid operand code '%c'", code);
7190 if (GET_CODE (x) == REG)
7191 print_reg (x, code, file);
7193 else if (GET_CODE (x) == MEM)
7195 /* No `byte ptr' prefix for call instructions. */
7196 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7198 const char * size;
7199 switch (GET_MODE_SIZE (GET_MODE (x)))
7201 case 1: size = "BYTE"; break;
7202 case 2: size = "WORD"; break;
7203 case 4: size = "DWORD"; break;
7204 case 8: size = "QWORD"; break;
7205 case 12: size = "XWORD"; break;
7206 case 16: size = "XMMWORD"; break;
7207 default:
7208 gcc_unreachable ();
7211 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7212 if (code == 'b')
7213 size = "BYTE";
7214 else if (code == 'w')
7215 size = "WORD";
7216 else if (code == 'k')
7217 size = "DWORD";
7219 fputs (size, file);
7220 fputs (" PTR ", file);
7223 x = XEXP (x, 0);
7224 /* Avoid (%rip) for call operands. */
7225 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7226 && GET_CODE (x) != CONST_INT)
7227 output_addr_const (file, x);
7228 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7229 output_operand_lossage ("invalid constraints for operand");
7230 else
7231 output_address (x);
7234 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7236 REAL_VALUE_TYPE r;
7237 long l;
7239 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7240 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7242 if (ASSEMBLER_DIALECT == ASM_ATT)
7243 putc ('$', file);
7244 fprintf (file, "0x%08lx", l);
7247 /* These float cases don't actually occur as immediate operands. */
7248 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7250 char dstr[30];
7252 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7253 fprintf (file, "%s", dstr);
7256 else if (GET_CODE (x) == CONST_DOUBLE
7257 && GET_MODE (x) == XFmode)
7259 char dstr[30];
7261 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7262 fprintf (file, "%s", dstr);
7265 else
7267 /* We have patterns that allow zero sets of memory, for instance.
7268 In 64-bit mode, we should probably support all 8-byte vectors,
7269 since we can in fact encode that into an immediate. */
7270 if (GET_CODE (x) == CONST_VECTOR)
7272 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7273 x = const0_rtx;
7276 if (code != 'P')
7278 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7280 if (ASSEMBLER_DIALECT == ASM_ATT)
7281 putc ('$', file);
7283 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7284 || GET_CODE (x) == LABEL_REF)
7286 if (ASSEMBLER_DIALECT == ASM_ATT)
7287 putc ('$', file);
7288 else
7289 fputs ("OFFSET FLAT:", file);
7292 if (GET_CODE (x) == CONST_INT)
7293 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7294 else if (flag_pic)
7295 output_pic_addr_const (file, x, code);
7296 else
7297 output_addr_const (file, x);
7301 /* Print a memory operand whose address is ADDR. */
7303 void
7304 print_operand_address (FILE *file, rtx addr)
7306 struct ix86_address parts;
7307 rtx base, index, disp;
7308 int scale;
7309 int ok = ix86_decompose_address (addr, &parts);
7311 gcc_assert (ok);
7313 base = parts.base;
7314 index = parts.index;
7315 disp = parts.disp;
7316 scale = parts.scale;
7318 switch (parts.seg)
7320 case SEG_DEFAULT:
7321 break;
7322 case SEG_FS:
7323 case SEG_GS:
7324 if (USER_LABEL_PREFIX[0] == 0)
7325 putc ('%', file);
7326 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7327 break;
7328 default:
7329 gcc_unreachable ();
7332 if (!base && !index)
7334 /* Displacement only requires special attention. */
7336 if (GET_CODE (disp) == CONST_INT)
7338 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7340 if (USER_LABEL_PREFIX[0] == 0)
7341 putc ('%', file);
7342 fputs ("ds:", file);
7344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7346 else if (flag_pic)
7347 output_pic_addr_const (file, disp, 0);
7348 else
7349 output_addr_const (file, disp);
7351 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7352 if (TARGET_64BIT)
7354 if (GET_CODE (disp) == CONST
7355 && GET_CODE (XEXP (disp, 0)) == PLUS
7356 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7357 disp = XEXP (XEXP (disp, 0), 0);
7358 if (GET_CODE (disp) == LABEL_REF
7359 || (GET_CODE (disp) == SYMBOL_REF
7360 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7361 fputs ("(%rip)", file);
7364 else
7366 if (ASSEMBLER_DIALECT == ASM_ATT)
7368 if (disp)
7370 if (flag_pic)
7371 output_pic_addr_const (file, disp, 0);
7372 else if (GET_CODE (disp) == LABEL_REF)
7373 output_asm_label (disp);
7374 else
7375 output_addr_const (file, disp);
7378 putc ('(', file);
7379 if (base)
7380 print_reg (base, 0, file);
7381 if (index)
7383 putc (',', file);
7384 print_reg (index, 0, file);
7385 if (scale != 1)
7386 fprintf (file, ",%d", scale);
7388 putc (')', file);
7390 else
7392 rtx offset = NULL_RTX;
7394 if (disp)
7396 /* Pull out the offset of a symbol; print any symbol itself. */
7397 if (GET_CODE (disp) == CONST
7398 && GET_CODE (XEXP (disp, 0)) == PLUS
7399 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7401 offset = XEXP (XEXP (disp, 0), 1);
7402 disp = gen_rtx_CONST (VOIDmode,
7403 XEXP (XEXP (disp, 0), 0));
7406 if (flag_pic)
7407 output_pic_addr_const (file, disp, 0);
7408 else if (GET_CODE (disp) == LABEL_REF)
7409 output_asm_label (disp);
7410 else if (GET_CODE (disp) == CONST_INT)
7411 offset = disp;
7412 else
7413 output_addr_const (file, disp);
7416 putc ('[', file);
7417 if (base)
7419 print_reg (base, 0, file);
7420 if (offset)
7422 if (INTVAL (offset) >= 0)
7423 putc ('+', file);
7424 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7427 else if (offset)
7428 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7429 else
7430 putc ('0', file);
7432 if (index)
7434 putc ('+', file);
7435 print_reg (index, 0, file);
7436 if (scale != 1)
7437 fprintf (file, "*%d", scale);
7439 putc (']', file);
7444 bool
7445 output_addr_const_extra (FILE *file, rtx x)
7447 rtx op;
7449 if (GET_CODE (x) != UNSPEC)
7450 return false;
7452 op = XVECEXP (x, 0, 0);
7453 switch (XINT (x, 1))
7455 case UNSPEC_GOTTPOFF:
7456 output_addr_const (file, op);
7457 /* FIXME: This might be @TPOFF in Sun ld. */
7458 fputs ("@GOTTPOFF", file);
7459 break;
7460 case UNSPEC_TPOFF:
7461 output_addr_const (file, op);
7462 fputs ("@TPOFF", file);
7463 break;
7464 case UNSPEC_NTPOFF:
7465 output_addr_const (file, op);
7466 if (TARGET_64BIT)
7467 fputs ("@TPOFF", file);
7468 else
7469 fputs ("@NTPOFF", file);
7470 break;
7471 case UNSPEC_DTPOFF:
7472 output_addr_const (file, op);
7473 fputs ("@DTPOFF", file);
7474 break;
7475 case UNSPEC_GOTNTPOFF:
7476 output_addr_const (file, op);
7477 if (TARGET_64BIT)
7478 fputs ("@GOTTPOFF(%rip)", file);
7479 else
7480 fputs ("@GOTNTPOFF", file);
7481 break;
7482 case UNSPEC_INDNTPOFF:
7483 output_addr_const (file, op);
7484 fputs ("@INDNTPOFF", file);
7485 break;
7487 default:
7488 return false;
7491 return true;
7494 /* Split one or more DImode RTL references into pairs of SImode
7495 references. The RTL can be REG, offsettable MEM, integer constant, or
7496 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7497 split and "num" is its length. lo_half and hi_half are output arrays
7498 that parallel "operands". */
7500 void
7501 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7503 while (num--)
7505 rtx op = operands[num];
7507 /* simplify_subreg refuse to split volatile memory addresses,
7508 but we still have to handle it. */
7509 if (GET_CODE (op) == MEM)
7511 lo_half[num] = adjust_address (op, SImode, 0);
7512 hi_half[num] = adjust_address (op, SImode, 4);
7514 else
7516 lo_half[num] = simplify_gen_subreg (SImode, op,
7517 GET_MODE (op) == VOIDmode
7518 ? DImode : GET_MODE (op), 0);
7519 hi_half[num] = simplify_gen_subreg (SImode, op,
7520 GET_MODE (op) == VOIDmode
7521 ? DImode : GET_MODE (op), 4);
7525 /* Split one or more TImode RTL references into pairs of DImode
7526 references. The RTL can be REG, offsettable MEM, integer constant, or
7527 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7528 split and "num" is its length. lo_half and hi_half are output arrays
7529 that parallel "operands". */
7531 void
7532 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7534 while (num--)
7536 rtx op = operands[num];
7538 /* simplify_subreg refuse to split volatile memory addresses, but we
7539 still have to handle it. */
7540 if (GET_CODE (op) == MEM)
7542 lo_half[num] = adjust_address (op, DImode, 0);
7543 hi_half[num] = adjust_address (op, DImode, 8);
7545 else
7547 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7548 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7553 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7554 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7555 is the expression of the binary operation. The output may either be
7556 emitted here, or returned to the caller, like all output_* functions.
7558 There is no guarantee that the operands are the same mode, as they
7559 might be within FLOAT or FLOAT_EXTEND expressions. */
7561 #ifndef SYSV386_COMPAT
7562 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7563 wants to fix the assemblers because that causes incompatibility
7564 with gcc. No-one wants to fix gcc because that causes
7565 incompatibility with assemblers... You can use the option of
7566 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7567 #define SYSV386_COMPAT 1
7568 #endif
7570 const char *
7571 output_387_binary_op (rtx insn, rtx *operands)
7573 static char buf[30];
7574 const char *p;
7575 const char *ssep;
7576 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7578 #ifdef ENABLE_CHECKING
7579 /* Even if we do not want to check the inputs, this documents input
7580 constraints. Which helps in understanding the following code. */
7581 if (STACK_REG_P (operands[0])
7582 && ((REG_P (operands[1])
7583 && REGNO (operands[0]) == REGNO (operands[1])
7584 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7585 || (REG_P (operands[2])
7586 && REGNO (operands[0]) == REGNO (operands[2])
7587 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7588 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7589 ; /* ok */
7590 else
7591 gcc_assert (is_sse);
7592 #endif
7594 switch (GET_CODE (operands[3]))
7596 case PLUS:
7597 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7598 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7599 p = "fiadd";
7600 else
7601 p = "fadd";
7602 ssep = "add";
7603 break;
7605 case MINUS:
7606 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7607 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7608 p = "fisub";
7609 else
7610 p = "fsub";
7611 ssep = "sub";
7612 break;
7614 case MULT:
7615 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7616 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7617 p = "fimul";
7618 else
7619 p = "fmul";
7620 ssep = "mul";
7621 break;
7623 case DIV:
7624 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7625 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7626 p = "fidiv";
7627 else
7628 p = "fdiv";
7629 ssep = "div";
7630 break;
7632 default:
7633 gcc_unreachable ();
7636 if (is_sse)
7638 strcpy (buf, ssep);
7639 if (GET_MODE (operands[0]) == SFmode)
7640 strcat (buf, "ss\t{%2, %0|%0, %2}");
7641 else
7642 strcat (buf, "sd\t{%2, %0|%0, %2}");
7643 return buf;
7645 strcpy (buf, p);
7647 switch (GET_CODE (operands[3]))
7649 case MULT:
7650 case PLUS:
7651 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7653 rtx temp = operands[2];
7654 operands[2] = operands[1];
7655 operands[1] = temp;
7658 /* know operands[0] == operands[1]. */
7660 if (GET_CODE (operands[2]) == MEM)
7662 p = "%z2\t%2";
7663 break;
7666 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7668 if (STACK_TOP_P (operands[0]))
7669 /* How is it that we are storing to a dead operand[2]?
7670 Well, presumably operands[1] is dead too. We can't
7671 store the result to st(0) as st(0) gets popped on this
7672 instruction. Instead store to operands[2] (which I
7673 think has to be st(1)). st(1) will be popped later.
7674 gcc <= 2.8.1 didn't have this check and generated
7675 assembly code that the Unixware assembler rejected. */
7676 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7677 else
7678 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7679 break;
7682 if (STACK_TOP_P (operands[0]))
7683 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7684 else
7685 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7686 break;
7688 case MINUS:
7689 case DIV:
7690 if (GET_CODE (operands[1]) == MEM)
7692 p = "r%z1\t%1";
7693 break;
7696 if (GET_CODE (operands[2]) == MEM)
7698 p = "%z2\t%2";
7699 break;
7702 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7704 #if SYSV386_COMPAT
7705 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7706 derived assemblers, confusingly reverse the direction of
7707 the operation for fsub{r} and fdiv{r} when the
7708 destination register is not st(0). The Intel assembler
7709 doesn't have this brain damage. Read !SYSV386_COMPAT to
7710 figure out what the hardware really does. */
7711 if (STACK_TOP_P (operands[0]))
7712 p = "{p\t%0, %2|rp\t%2, %0}";
7713 else
7714 p = "{rp\t%2, %0|p\t%0, %2}";
7715 #else
7716 if (STACK_TOP_P (operands[0]))
7717 /* As above for fmul/fadd, we can't store to st(0). */
7718 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7719 else
7720 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7721 #endif
7722 break;
7725 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7727 #if SYSV386_COMPAT
7728 if (STACK_TOP_P (operands[0]))
7729 p = "{rp\t%0, %1|p\t%1, %0}";
7730 else
7731 p = "{p\t%1, %0|rp\t%0, %1}";
7732 #else
7733 if (STACK_TOP_P (operands[0]))
7734 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7735 else
7736 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7737 #endif
7738 break;
7741 if (STACK_TOP_P (operands[0]))
7743 if (STACK_TOP_P (operands[1]))
7744 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7745 else
7746 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7747 break;
7749 else if (STACK_TOP_P (operands[1]))
7751 #if SYSV386_COMPAT
7752 p = "{\t%1, %0|r\t%0, %1}";
7753 #else
7754 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7755 #endif
7757 else
7759 #if SYSV386_COMPAT
7760 p = "{r\t%2, %0|\t%0, %2}";
7761 #else
7762 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7763 #endif
7765 break;
7767 default:
7768 gcc_unreachable ();
7771 strcat (buf, p);
7772 return buf;
7775 /* Return needed mode for entity in optimize_mode_switching pass. */
7778 ix86_mode_needed (int entity, rtx insn)
7780 enum attr_i387_cw mode;
7782 /* The mode UNINITIALIZED is used to store control word after a
7783 function call or ASM pattern. The mode ANY specify that function
7784 has no requirements on the control word and make no changes in the
7785 bits we are interested in. */
7787 if (CALL_P (insn)
7788 || (NONJUMP_INSN_P (insn)
7789 && (asm_noperands (PATTERN (insn)) >= 0
7790 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
7791 return I387_CW_UNINITIALIZED;
7793 if (recog_memoized (insn) < 0)
7794 return I387_CW_ANY;
7796 mode = get_attr_i387_cw (insn);
7798 switch (entity)
7800 case I387_TRUNC:
7801 if (mode == I387_CW_TRUNC)
7802 return mode;
7803 break;
7805 case I387_FLOOR:
7806 if (mode == I387_CW_FLOOR)
7807 return mode;
7808 break;
7810 case I387_CEIL:
7811 if (mode == I387_CW_CEIL)
7812 return mode;
7813 break;
7815 case I387_MASK_PM:
7816 if (mode == I387_CW_MASK_PM)
7817 return mode;
7818 break;
7820 default:
7821 gcc_unreachable ();
7824 return I387_CW_ANY;
7827 /* Output code to initialize control word copies used by trunc?f?i and
7828 rounding patterns. CURRENT_MODE is set to current control word,
7829 while NEW_MODE is set to new control word. */
7831 void
7832 emit_i387_cw_initialization (int mode)
7834 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
7835 rtx new_mode;
7837 int slot;
7839 rtx reg = gen_reg_rtx (HImode);
7841 emit_insn (gen_x86_fnstcw_1 (stored_mode));
7842 emit_move_insn (reg, stored_mode);
7844 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
7846 switch (mode)
7848 case I387_CW_TRUNC:
7849 /* round toward zero (truncate) */
7850 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7851 slot = SLOT_CW_TRUNC;
7852 break;
7854 case I387_CW_FLOOR:
7855 /* round down toward -oo */
7856 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7857 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7858 slot = SLOT_CW_FLOOR;
7859 break;
7861 case I387_CW_CEIL:
7862 /* round up toward +oo */
7863 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7864 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7865 slot = SLOT_CW_CEIL;
7866 break;
7868 case I387_CW_MASK_PM:
7869 /* mask precision exception for nearbyint() */
7870 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7871 slot = SLOT_CW_MASK_PM;
7872 break;
7874 default:
7875 gcc_unreachable ();
7878 else
7880 switch (mode)
7882 case I387_CW_TRUNC:
7883 /* round toward zero (truncate) */
7884 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7885 slot = SLOT_CW_TRUNC;
7886 break;
7888 case I387_CW_FLOOR:
7889 /* round down toward -oo */
7890 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7891 slot = SLOT_CW_FLOOR;
7892 break;
7894 case I387_CW_CEIL:
7895 /* round up toward +oo */
7896 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7897 slot = SLOT_CW_CEIL;
7898 break;
7900 case I387_CW_MASK_PM:
7901 /* mask precision exception for nearbyint() */
7902 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7903 slot = SLOT_CW_MASK_PM;
7904 break;
7906 default:
7907 gcc_unreachable ();
7911 gcc_assert (slot < MAX_386_STACK_LOCALS);
7913 new_mode = assign_386_stack_local (HImode, slot);
7914 emit_move_insn (new_mode, reg);
7917 /* Output code for INSN to convert a float to a signed int. OPERANDS
7918 are the insn operands. The output may be [HSD]Imode and the input
7919 operand may be [SDX]Fmode. */
7921 const char *
7922 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7924 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7925 int dimode_p = GET_MODE (operands[0]) == DImode;
7926 int round_mode = get_attr_i387_cw (insn);
7928 /* Jump through a hoop or two for DImode, since the hardware has no
7929 non-popping instruction. We used to do this a different way, but
7930 that was somewhat fragile and broke with post-reload splitters. */
7931 if ((dimode_p || fisttp) && !stack_top_dies)
7932 output_asm_insn ("fld\t%y1", operands);
7934 gcc_assert (STACK_TOP_P (operands[1]));
7935 gcc_assert (GET_CODE (operands[0]) == MEM);
7937 if (fisttp)
7938 output_asm_insn ("fisttp%z0\t%0", operands);
7939 else
7941 if (round_mode != I387_CW_ANY)
7942 output_asm_insn ("fldcw\t%3", operands);
7943 if (stack_top_dies || dimode_p)
7944 output_asm_insn ("fistp%z0\t%0", operands);
7945 else
7946 output_asm_insn ("fist%z0\t%0", operands);
7947 if (round_mode != I387_CW_ANY)
7948 output_asm_insn ("fldcw\t%2", operands);
7951 return "";
7954 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7955 should be used. UNORDERED_P is true when fucom should be used. */
7957 const char *
7958 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7960 int stack_top_dies;
7961 rtx cmp_op0, cmp_op1;
7962 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7964 if (eflags_p)
7966 cmp_op0 = operands[0];
7967 cmp_op1 = operands[1];
7969 else
7971 cmp_op0 = operands[1];
7972 cmp_op1 = operands[2];
7975 if (is_sse)
7977 if (GET_MODE (operands[0]) == SFmode)
7978 if (unordered_p)
7979 return "ucomiss\t{%1, %0|%0, %1}";
7980 else
7981 return "comiss\t{%1, %0|%0, %1}";
7982 else
7983 if (unordered_p)
7984 return "ucomisd\t{%1, %0|%0, %1}";
7985 else
7986 return "comisd\t{%1, %0|%0, %1}";
7989 gcc_assert (STACK_TOP_P (cmp_op0));
7991 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7993 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7995 if (stack_top_dies)
7997 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7998 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8000 else
8001 return "ftst\n\tfnstsw\t%0";
8004 if (STACK_REG_P (cmp_op1)
8005 && stack_top_dies
8006 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8007 && REGNO (cmp_op1) != FIRST_STACK_REG)
8009 /* If both the top of the 387 stack dies, and the other operand
8010 is also a stack register that dies, then this must be a
8011 `fcompp' float compare */
8013 if (eflags_p)
8015 /* There is no double popping fcomi variant. Fortunately,
8016 eflags is immune from the fstp's cc clobbering. */
8017 if (unordered_p)
8018 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8019 else
8020 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8021 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8023 else
8025 if (unordered_p)
8026 return "fucompp\n\tfnstsw\t%0";
8027 else
8028 return "fcompp\n\tfnstsw\t%0";
8031 else
8033 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8035 static const char * const alt[16] =
8037 "fcom%z2\t%y2\n\tfnstsw\t%0",
8038 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8039 "fucom%z2\t%y2\n\tfnstsw\t%0",
8040 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8042 "ficom%z2\t%y2\n\tfnstsw\t%0",
8043 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8044 NULL,
8045 NULL,
8047 "fcomi\t{%y1, %0|%0, %y1}",
8048 "fcomip\t{%y1, %0|%0, %y1}",
8049 "fucomi\t{%y1, %0|%0, %y1}",
8050 "fucomip\t{%y1, %0|%0, %y1}",
8052 NULL,
8053 NULL,
8054 NULL,
8055 NULL
8058 int mask;
8059 const char *ret;
8061 mask = eflags_p << 3;
8062 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8063 mask |= unordered_p << 1;
8064 mask |= stack_top_dies;
8066 gcc_assert (mask < 16);
8067 ret = alt[mask];
8068 gcc_assert (ret);
8070 return ret;
8074 void
8075 ix86_output_addr_vec_elt (FILE *file, int value)
8077 const char *directive = ASM_LONG;
8079 #ifdef ASM_QUAD
8080 if (TARGET_64BIT)
8081 directive = ASM_QUAD;
8082 #else
8083 gcc_assert (!TARGET_64BIT);
8084 #endif
8086 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8089 void
8090 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8092 if (TARGET_64BIT)
8093 fprintf (file, "%s%s%d-%s%d\n",
8094 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8095 else if (HAVE_AS_GOTOFF_IN_DATA)
8096 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8097 #if TARGET_MACHO
8098 else if (TARGET_MACHO)
8100 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8101 machopic_output_function_base_name (file);
8102 fprintf(file, "\n");
8104 #endif
8105 else
8106 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8107 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8110 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8111 for the target. */
8113 void
8114 ix86_expand_clear (rtx dest)
8116 rtx tmp;
8118 /* We play register width games, which are only valid after reload. */
8119 gcc_assert (reload_completed);
8121 /* Avoid HImode and its attendant prefix byte. */
8122 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8123 dest = gen_rtx_REG (SImode, REGNO (dest));
8125 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8127 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8128 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8130 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8131 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8134 emit_insn (tmp);
8137 /* X is an unchanging MEM. If it is a constant pool reference, return
8138 the constant pool rtx, else NULL. */
8141 maybe_get_pool_constant (rtx x)
8143 x = ix86_delegitimize_address (XEXP (x, 0));
8145 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8146 return get_pool_constant (x);
8148 return NULL_RTX;
8151 void
8152 ix86_expand_move (enum machine_mode mode, rtx operands[])
8154 int strict = (reload_in_progress || reload_completed);
8155 rtx op0, op1;
8156 enum tls_model model;
8158 op0 = operands[0];
8159 op1 = operands[1];
8161 if (GET_CODE (op1) == SYMBOL_REF)
8163 model = SYMBOL_REF_TLS_MODEL (op1);
8164 if (model)
8166 op1 = legitimize_tls_address (op1, model, true);
8167 op1 = force_operand (op1, op0);
8168 if (op1 == op0)
8169 return;
8172 else if (GET_CODE (op1) == CONST
8173 && GET_CODE (XEXP (op1, 0)) == PLUS
8174 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8176 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8177 if (model)
8179 rtx addend = XEXP (XEXP (op1, 0), 1);
8180 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8181 op1 = force_operand (op1, NULL);
8182 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8183 op0, 1, OPTAB_DIRECT);
8184 if (op1 == op0)
8185 return;
8189 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8191 #if TARGET_MACHO
8192 if (MACHOPIC_PURE)
8194 rtx temp = ((reload_in_progress
8195 || ((op0 && GET_CODE (op0) == REG)
8196 && mode == Pmode))
8197 ? op0 : gen_reg_rtx (Pmode));
8198 op1 = machopic_indirect_data_reference (op1, temp);
8199 op1 = machopic_legitimize_pic_address (op1, mode,
8200 temp == op1 ? 0 : temp);
8202 else if (MACHOPIC_INDIRECT)
8203 op1 = machopic_indirect_data_reference (op1, 0);
8204 if (op0 == op1)
8205 return;
8206 #else
8207 if (GET_CODE (op0) == MEM)
8208 op1 = force_reg (Pmode, op1);
8209 else
8210 op1 = legitimize_address (op1, op1, Pmode);
8211 #endif /* TARGET_MACHO */
8213 else
8215 if (GET_CODE (op0) == MEM
8216 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8217 || !push_operand (op0, mode))
8218 && GET_CODE (op1) == MEM)
8219 op1 = force_reg (mode, op1);
8221 if (push_operand (op0, mode)
8222 && ! general_no_elim_operand (op1, mode))
8223 op1 = copy_to_mode_reg (mode, op1);
8225 /* Force large constants in 64bit compilation into register
8226 to get them CSEed. */
8227 if (TARGET_64BIT && mode == DImode
8228 && immediate_operand (op1, mode)
8229 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8230 && !register_operand (op0, mode)
8231 && optimize && !reload_completed && !reload_in_progress)
8232 op1 = copy_to_mode_reg (mode, op1);
8234 if (FLOAT_MODE_P (mode))
8236 /* If we are loading a floating point constant to a register,
8237 force the value to memory now, since we'll get better code
8238 out the back end. */
8240 if (strict)
8242 else if (GET_CODE (op1) == CONST_DOUBLE)
8244 op1 = validize_mem (force_const_mem (mode, op1));
8245 if (!register_operand (op0, mode))
8247 rtx temp = gen_reg_rtx (mode);
8248 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8249 emit_move_insn (op0, temp);
8250 return;
8256 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8259 void
8260 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8262 rtx op0 = operands[0], op1 = operands[1];
8264 /* Force constants other than zero into memory. We do not know how
8265 the instructions used to build constants modify the upper 64 bits
8266 of the register, once we have that information we may be able
8267 to handle some of them more efficiently. */
8268 if ((reload_in_progress | reload_completed) == 0
8269 && register_operand (op0, mode)
8270 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8271 op1 = validize_mem (force_const_mem (mode, op1));
8273 /* Make operand1 a register if it isn't already. */
8274 if (!no_new_pseudos
8275 && !register_operand (op0, mode)
8276 && !register_operand (op1, mode))
8278 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8279 return;
8282 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8285 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8286 straight to ix86_expand_vector_move. */
8288 void
8289 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8291 rtx op0, op1, m;
8293 op0 = operands[0];
8294 op1 = operands[1];
8296 if (MEM_P (op1))
8298 /* If we're optimizing for size, movups is the smallest. */
8299 if (optimize_size)
8301 op0 = gen_lowpart (V4SFmode, op0);
8302 op1 = gen_lowpart (V4SFmode, op1);
8303 emit_insn (gen_sse_movups (op0, op1));
8304 return;
8307 /* ??? If we have typed data, then it would appear that using
8308 movdqu is the only way to get unaligned data loaded with
8309 integer type. */
8310 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8312 op0 = gen_lowpart (V16QImode, op0);
8313 op1 = gen_lowpart (V16QImode, op1);
8314 emit_insn (gen_sse2_movdqu (op0, op1));
8315 return;
8318 if (TARGET_SSE2 && mode == V2DFmode)
8320 rtx zero;
8322 /* When SSE registers are split into halves, we can avoid
8323 writing to the top half twice. */
8324 if (TARGET_SSE_SPLIT_REGS)
8326 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8327 zero = op0;
8329 else
8331 /* ??? Not sure about the best option for the Intel chips.
8332 The following would seem to satisfy; the register is
8333 entirely cleared, breaking the dependency chain. We
8334 then store to the upper half, with a dependency depth
8335 of one. A rumor has it that Intel recommends two movsd
8336 followed by an unpacklpd, but this is unconfirmed. And
8337 given that the dependency depth of the unpacklpd would
8338 still be one, I'm not sure why this would be better. */
8339 zero = CONST0_RTX (V2DFmode);
8342 m = adjust_address (op1, DFmode, 0);
8343 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8344 m = adjust_address (op1, DFmode, 8);
8345 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8347 else
8349 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8350 emit_move_insn (op0, CONST0_RTX (mode));
8351 else
8352 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8354 if (mode != V4SFmode)
8355 op0 = gen_lowpart (V4SFmode, op0);
8356 m = adjust_address (op1, V2SFmode, 0);
8357 emit_insn (gen_sse_loadlps (op0, op0, m));
8358 m = adjust_address (op1, V2SFmode, 8);
8359 emit_insn (gen_sse_loadhps (op0, op0, m));
8362 else if (MEM_P (op0))
8364 /* If we're optimizing for size, movups is the smallest. */
8365 if (optimize_size)
8367 op0 = gen_lowpart (V4SFmode, op0);
8368 op1 = gen_lowpart (V4SFmode, op1);
8369 emit_insn (gen_sse_movups (op0, op1));
8370 return;
8373 /* ??? Similar to above, only less clear because of quote
8374 typeless stores unquote. */
8375 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8376 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8378 op0 = gen_lowpart (V16QImode, op0);
8379 op1 = gen_lowpart (V16QImode, op1);
8380 emit_insn (gen_sse2_movdqu (op0, op1));
8381 return;
8384 if (TARGET_SSE2 && mode == V2DFmode)
8386 m = adjust_address (op0, DFmode, 0);
8387 emit_insn (gen_sse2_storelpd (m, op1));
8388 m = adjust_address (op0, DFmode, 8);
8389 emit_insn (gen_sse2_storehpd (m, op1));
8391 else
8393 if (mode != V4SFmode)
8394 op1 = gen_lowpart (V4SFmode, op1);
8395 m = adjust_address (op0, V2SFmode, 0);
8396 emit_insn (gen_sse_storelps (m, op1));
8397 m = adjust_address (op0, V2SFmode, 8);
8398 emit_insn (gen_sse_storehps (m, op1));
8401 else
8402 gcc_unreachable ();
8405 /* Expand a push in MODE. This is some mode for which we do not support
8406 proper push instructions, at least from the registers that we expect
8407 the value to live in. */
8409 void
8410 ix86_expand_push (enum machine_mode mode, rtx x)
8412 rtx tmp;
8414 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8415 GEN_INT (-GET_MODE_SIZE (mode)),
8416 stack_pointer_rtx, 1, OPTAB_DIRECT);
8417 if (tmp != stack_pointer_rtx)
8418 emit_move_insn (stack_pointer_rtx, tmp);
8420 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8421 emit_move_insn (tmp, x);
8424 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8425 destination to use for the operation. If different from the true
8426 destination in operands[0], a copy operation will be required. */
8429 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8430 rtx operands[])
8432 int matching_memory;
8433 rtx src1, src2, dst;
8435 dst = operands[0];
8436 src1 = operands[1];
8437 src2 = operands[2];
8439 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8440 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8441 && (rtx_equal_p (dst, src2)
8442 || immediate_operand (src1, mode)))
8444 rtx temp = src1;
8445 src1 = src2;
8446 src2 = temp;
8449 /* If the destination is memory, and we do not have matching source
8450 operands, do things in registers. */
8451 matching_memory = 0;
8452 if (GET_CODE (dst) == MEM)
8454 if (rtx_equal_p (dst, src1))
8455 matching_memory = 1;
8456 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8457 && rtx_equal_p (dst, src2))
8458 matching_memory = 2;
8459 else
8460 dst = gen_reg_rtx (mode);
8463 /* Both source operands cannot be in memory. */
8464 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8466 if (matching_memory != 2)
8467 src2 = force_reg (mode, src2);
8468 else
8469 src1 = force_reg (mode, src1);
8472 /* If the operation is not commutable, source 1 cannot be a constant
8473 or non-matching memory. */
8474 if ((CONSTANT_P (src1)
8475 || (!matching_memory && GET_CODE (src1) == MEM))
8476 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8477 src1 = force_reg (mode, src1);
8479 src1 = operands[1] = src1;
8480 src2 = operands[2] = src2;
8481 return dst;
8484 /* Similarly, but assume that the destination has already been
8485 set up properly. */
8487 void
8488 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8489 enum machine_mode mode, rtx operands[])
8491 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8492 gcc_assert (dst == operands[0]);
8495 /* Attempt to expand a binary operator. Make the expansion closer to the
8496 actual machine, then just general_operand, which will allow 3 separate
8497 memory references (one output, two input) in a single insn. */
8499 void
8500 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8501 rtx operands[])
8503 rtx src1, src2, dst, op, clob;
8505 dst = ix86_fixup_binary_operands (code, mode, operands);
8506 src1 = operands[1];
8507 src2 = operands[2];
8509 /* Emit the instruction. */
8511 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8512 if (reload_in_progress)
8514 /* Reload doesn't know about the flags register, and doesn't know that
8515 it doesn't want to clobber it. We can only do this with PLUS. */
8516 gcc_assert (code == PLUS);
8517 emit_insn (op);
8519 else
8521 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8522 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8525 /* Fix up the destination if needed. */
8526 if (dst != operands[0])
8527 emit_move_insn (operands[0], dst);
8530 /* Return TRUE or FALSE depending on whether the binary operator meets the
8531 appropriate constraints. */
8534 ix86_binary_operator_ok (enum rtx_code code,
8535 enum machine_mode mode ATTRIBUTE_UNUSED,
8536 rtx operands[3])
8538 /* Both source operands cannot be in memory. */
8539 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8540 return 0;
8541 /* If the operation is not commutable, source 1 cannot be a constant. */
8542 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8543 return 0;
8544 /* If the destination is memory, we must have a matching source operand. */
8545 if (GET_CODE (operands[0]) == MEM
8546 && ! (rtx_equal_p (operands[0], operands[1])
8547 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8548 && rtx_equal_p (operands[0], operands[2]))))
8549 return 0;
8550 /* If the operation is not commutable and the source 1 is memory, we must
8551 have a matching destination. */
8552 if (GET_CODE (operands[1]) == MEM
8553 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8554 && ! rtx_equal_p (operands[0], operands[1]))
8555 return 0;
8556 return 1;
8559 /* Attempt to expand a unary operator. Make the expansion closer to the
8560 actual machine, then just general_operand, which will allow 2 separate
8561 memory references (one output, one input) in a single insn. */
8563 void
8564 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8565 rtx operands[])
8567 int matching_memory;
8568 rtx src, dst, op, clob;
8570 dst = operands[0];
8571 src = operands[1];
8573 /* If the destination is memory, and we do not have matching source
8574 operands, do things in registers. */
8575 matching_memory = 0;
8576 if (MEM_P (dst))
8578 if (rtx_equal_p (dst, src))
8579 matching_memory = 1;
8580 else
8581 dst = gen_reg_rtx (mode);
8584 /* When source operand is memory, destination must match. */
8585 if (MEM_P (src) && !matching_memory)
8586 src = force_reg (mode, src);
8588 /* Emit the instruction. */
8590 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8591 if (reload_in_progress || code == NOT)
8593 /* Reload doesn't know about the flags register, and doesn't know that
8594 it doesn't want to clobber it. */
8595 gcc_assert (code == NOT);
8596 emit_insn (op);
8598 else
8600 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8601 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8604 /* Fix up the destination if needed. */
8605 if (dst != operands[0])
8606 emit_move_insn (operands[0], dst);
8609 /* Return TRUE or FALSE depending on whether the unary operator meets the
8610 appropriate constraints. */
8613 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8614 enum machine_mode mode ATTRIBUTE_UNUSED,
8615 rtx operands[2] ATTRIBUTE_UNUSED)
8617 /* If one of operands is memory, source and destination must match. */
8618 if ((GET_CODE (operands[0]) == MEM
8619 || GET_CODE (operands[1]) == MEM)
8620 && ! rtx_equal_p (operands[0], operands[1]))
8621 return FALSE;
8622 return TRUE;
8625 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8626 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8627 true, then replicate the mask for all elements of the vector register.
8628 If INVERT is true, then create a mask excluding the sign bit. */
8631 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8633 enum machine_mode vec_mode;
8634 HOST_WIDE_INT hi, lo;
8635 int shift = 63;
8636 rtvec v;
8637 rtx mask;
8639 /* Find the sign bit, sign extended to 2*HWI. */
8640 if (mode == SFmode)
8641 lo = 0x80000000, hi = lo < 0;
8642 else if (HOST_BITS_PER_WIDE_INT >= 64)
8643 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8644 else
8645 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8647 if (invert)
8648 lo = ~lo, hi = ~hi;
8650 /* Force this value into the low part of a fp vector constant. */
8651 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8652 mask = gen_lowpart (mode, mask);
8654 if (mode == SFmode)
8656 if (vect)
8657 v = gen_rtvec (4, mask, mask, mask, mask);
8658 else
8659 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8660 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8661 vec_mode = V4SFmode;
8663 else
8665 if (vect)
8666 v = gen_rtvec (2, mask, mask);
8667 else
8668 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8669 vec_mode = V2DFmode;
8672 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8675 /* Generate code for floating point ABS or NEG. */
8677 void
8678 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8679 rtx operands[])
8681 rtx mask, set, use, clob, dst, src;
8682 bool matching_memory;
8683 bool use_sse = false;
8684 bool vector_mode = VECTOR_MODE_P (mode);
8685 enum machine_mode elt_mode = mode;
8687 if (vector_mode)
8689 elt_mode = GET_MODE_INNER (mode);
8690 use_sse = true;
8692 else if (TARGET_SSE_MATH)
8693 use_sse = SSE_FLOAT_MODE_P (mode);
8695 /* NEG and ABS performed with SSE use bitwise mask operations.
8696 Create the appropriate mask now. */
8697 if (use_sse)
8698 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8699 else
8701 /* When not using SSE, we don't use the mask, but prefer to keep the
8702 same general form of the insn pattern to reduce duplication when
8703 it comes time to split. */
8704 mask = const0_rtx;
8707 dst = operands[0];
8708 src = operands[1];
8710 /* If the destination is memory, and we don't have matching source
8711 operands, do things in registers. */
8712 matching_memory = false;
8713 if (MEM_P (dst))
8715 if (rtx_equal_p (dst, src))
8716 matching_memory = true;
8717 else
8718 dst = gen_reg_rtx (mode);
8720 if (MEM_P (src) && !matching_memory)
8721 src = force_reg (mode, src);
8723 if (vector_mode)
8725 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8726 set = gen_rtx_SET (VOIDmode, dst, set);
8727 emit_insn (set);
8729 else
8731 set = gen_rtx_fmt_e (code, mode, src);
8732 set = gen_rtx_SET (VOIDmode, dst, set);
8733 use = gen_rtx_USE (VOIDmode, mask);
8734 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8735 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8738 if (dst != operands[0])
8739 emit_move_insn (operands[0], dst);
8742 /* Expand a copysign operation. Special case operand 0 being a constant. */
8744 void
8745 ix86_expand_copysign (rtx operands[])
8747 enum machine_mode mode, vmode;
8748 rtx dest, op0, op1, mask, nmask;
8750 dest = operands[0];
8751 op0 = operands[1];
8752 op1 = operands[2];
8754 mode = GET_MODE (dest);
8755 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8757 if (GET_CODE (op0) == CONST_DOUBLE)
8759 rtvec v;
8761 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8762 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8764 if (op0 == CONST0_RTX (mode))
8765 op0 = CONST0_RTX (vmode);
8766 else
8768 if (mode == SFmode)
8769 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8770 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8771 else
8772 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8773 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8776 mask = ix86_build_signbit_mask (mode, 0, 0);
8778 if (mode == SFmode)
8779 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8780 else
8781 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8783 else
8785 nmask = ix86_build_signbit_mask (mode, 0, 1);
8786 mask = ix86_build_signbit_mask (mode, 0, 0);
8788 if (mode == SFmode)
8789 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8790 else
8791 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8795 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8796 be a constant, and so has already been expanded into a vector constant. */
8798 void
8799 ix86_split_copysign_const (rtx operands[])
8801 enum machine_mode mode, vmode;
8802 rtx dest, op0, op1, mask, x;
8804 dest = operands[0];
8805 op0 = operands[1];
8806 op1 = operands[2];
8807 mask = operands[3];
8809 mode = GET_MODE (dest);
8810 vmode = GET_MODE (mask);
8812 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8813 x = gen_rtx_AND (vmode, dest, mask);
8814 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8816 if (op0 != CONST0_RTX (vmode))
8818 x = gen_rtx_IOR (vmode, dest, op0);
8819 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8823 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8824 so we have to do two masks. */
8826 void
8827 ix86_split_copysign_var (rtx operands[])
8829 enum machine_mode mode, vmode;
8830 rtx dest, scratch, op0, op1, mask, nmask, x;
8832 dest = operands[0];
8833 scratch = operands[1];
8834 op0 = operands[2];
8835 op1 = operands[3];
8836 nmask = operands[4];
8837 mask = operands[5];
8839 mode = GET_MODE (dest);
8840 vmode = GET_MODE (mask);
8842 if (rtx_equal_p (op0, op1))
8844 /* Shouldn't happen often (it's useless, obviously), but when it does
8845 we'd generate incorrect code if we continue below. */
8846 emit_move_insn (dest, op0);
8847 return;
8850 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8852 gcc_assert (REGNO (op1) == REGNO (scratch));
8854 x = gen_rtx_AND (vmode, scratch, mask);
8855 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8857 dest = mask;
8858 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8859 x = gen_rtx_NOT (vmode, dest);
8860 x = gen_rtx_AND (vmode, x, op0);
8861 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8863 else
8865 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8867 x = gen_rtx_AND (vmode, scratch, mask);
8869 else /* alternative 2,4 */
8871 gcc_assert (REGNO (mask) == REGNO (scratch));
8872 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8873 x = gen_rtx_AND (vmode, scratch, op1);
8875 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8877 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8879 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8880 x = gen_rtx_AND (vmode, dest, nmask);
8882 else /* alternative 3,4 */
8884 gcc_assert (REGNO (nmask) == REGNO (dest));
8885 dest = nmask;
8886 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8887 x = gen_rtx_AND (vmode, dest, op0);
8889 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8892 x = gen_rtx_IOR (vmode, dest, scratch);
8893 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8896 /* Return TRUE or FALSE depending on whether the first SET in INSN
8897 has source and destination with matching CC modes, and that the
8898 CC mode is at least as constrained as REQ_MODE. */
8901 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8903 rtx set;
8904 enum machine_mode set_mode;
8906 set = PATTERN (insn);
8907 if (GET_CODE (set) == PARALLEL)
8908 set = XVECEXP (set, 0, 0);
8909 gcc_assert (GET_CODE (set) == SET);
8910 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
8912 set_mode = GET_MODE (SET_DEST (set));
8913 switch (set_mode)
8915 case CCNOmode:
8916 if (req_mode != CCNOmode
8917 && (req_mode != CCmode
8918 || XEXP (SET_SRC (set), 1) != const0_rtx))
8919 return 0;
8920 break;
8921 case CCmode:
8922 if (req_mode == CCGCmode)
8923 return 0;
8924 /* FALLTHRU */
8925 case CCGCmode:
8926 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8927 return 0;
8928 /* FALLTHRU */
8929 case CCGOCmode:
8930 if (req_mode == CCZmode)
8931 return 0;
8932 /* FALLTHRU */
8933 case CCZmode:
8934 break;
8936 default:
8937 gcc_unreachable ();
8940 return (GET_MODE (SET_SRC (set)) == set_mode);
8943 /* Generate insn patterns to do an integer compare of OPERANDS. */
8945 static rtx
8946 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8948 enum machine_mode cmpmode;
8949 rtx tmp, flags;
8951 cmpmode = SELECT_CC_MODE (code, op0, op1);
8952 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8954 /* This is very simple, but making the interface the same as in the
8955 FP case makes the rest of the code easier. */
8956 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8957 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8959 /* Return the test that should be put into the flags user, i.e.
8960 the bcc, scc, or cmov instruction. */
8961 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8964 /* Figure out whether to use ordered or unordered fp comparisons.
8965 Return the appropriate mode to use. */
8967 enum machine_mode
8968 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8970 /* ??? In order to make all comparisons reversible, we do all comparisons
8971 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8972 all forms trapping and nontrapping comparisons, we can make inequality
8973 comparisons trapping again, since it results in better code when using
8974 FCOM based compares. */
8975 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8978 enum machine_mode
8979 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8981 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8982 return ix86_fp_compare_mode (code);
8983 switch (code)
8985 /* Only zero flag is needed. */
8986 case EQ: /* ZF=0 */
8987 case NE: /* ZF!=0 */
8988 return CCZmode;
8989 /* Codes needing carry flag. */
8990 case GEU: /* CF=0 */
8991 case GTU: /* CF=0 & ZF=0 */
8992 case LTU: /* CF=1 */
8993 case LEU: /* CF=1 | ZF=1 */
8994 return CCmode;
8995 /* Codes possibly doable only with sign flag when
8996 comparing against zero. */
8997 case GE: /* SF=OF or SF=0 */
8998 case LT: /* SF<>OF or SF=1 */
8999 if (op1 == const0_rtx)
9000 return CCGOCmode;
9001 else
9002 /* For other cases Carry flag is not required. */
9003 return CCGCmode;
9004 /* Codes doable only with sign flag when comparing
9005 against zero, but we miss jump instruction for it
9006 so we need to use relational tests against overflow
9007 that thus needs to be zero. */
9008 case GT: /* ZF=0 & SF=OF */
9009 case LE: /* ZF=1 | SF<>OF */
9010 if (op1 == const0_rtx)
9011 return CCNOmode;
9012 else
9013 return CCGCmode;
9014 /* strcmp pattern do (use flags) and combine may ask us for proper
9015 mode. */
9016 case USE:
9017 return CCmode;
9018 default:
9019 gcc_unreachable ();
9023 /* Return the fixed registers used for condition codes. */
9025 static bool
9026 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9028 *p1 = FLAGS_REG;
9029 *p2 = FPSR_REG;
9030 return true;
9033 /* If two condition code modes are compatible, return a condition code
9034 mode which is compatible with both. Otherwise, return
9035 VOIDmode. */
9037 static enum machine_mode
9038 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9040 if (m1 == m2)
9041 return m1;
9043 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9044 return VOIDmode;
9046 if ((m1 == CCGCmode && m2 == CCGOCmode)
9047 || (m1 == CCGOCmode && m2 == CCGCmode))
9048 return CCGCmode;
9050 switch (m1)
9052 default:
9053 gcc_unreachable ();
9055 case CCmode:
9056 case CCGCmode:
9057 case CCGOCmode:
9058 case CCNOmode:
9059 case CCZmode:
9060 switch (m2)
9062 default:
9063 return VOIDmode;
9065 case CCmode:
9066 case CCGCmode:
9067 case CCGOCmode:
9068 case CCNOmode:
9069 case CCZmode:
9070 return CCmode;
9073 case CCFPmode:
9074 case CCFPUmode:
9075 /* These are only compatible with themselves, which we already
9076 checked above. */
9077 return VOIDmode;
9081 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9084 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9086 enum rtx_code swapped_code = swap_condition (code);
9087 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9088 || (ix86_fp_comparison_cost (swapped_code)
9089 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9092 /* Swap, force into registers, or otherwise massage the two operands
9093 to a fp comparison. The operands are updated in place; the new
9094 comparison code is returned. */
9096 static enum rtx_code
9097 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9099 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9100 rtx op0 = *pop0, op1 = *pop1;
9101 enum machine_mode op_mode = GET_MODE (op0);
9102 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9104 /* All of the unordered compare instructions only work on registers.
9105 The same is true of the fcomi compare instructions. The XFmode
9106 compare instructions require registers except when comparing
9107 against zero or when converting operand 1 from fixed point to
9108 floating point. */
9110 if (!is_sse
9111 && (fpcmp_mode == CCFPUmode
9112 || (op_mode == XFmode
9113 && ! (standard_80387_constant_p (op0) == 1
9114 || standard_80387_constant_p (op1) == 1)
9115 && GET_CODE (op1) != FLOAT)
9116 || ix86_use_fcomi_compare (code)))
9118 op0 = force_reg (op_mode, op0);
9119 op1 = force_reg (op_mode, op1);
9121 else
9123 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9124 things around if they appear profitable, otherwise force op0
9125 into a register. */
9127 if (standard_80387_constant_p (op0) == 0
9128 || (GET_CODE (op0) == MEM
9129 && ! (standard_80387_constant_p (op1) == 0
9130 || GET_CODE (op1) == MEM)))
9132 rtx tmp;
9133 tmp = op0, op0 = op1, op1 = tmp;
9134 code = swap_condition (code);
9137 if (GET_CODE (op0) != REG)
9138 op0 = force_reg (op_mode, op0);
9140 if (CONSTANT_P (op1))
9142 int tmp = standard_80387_constant_p (op1);
9143 if (tmp == 0)
9144 op1 = validize_mem (force_const_mem (op_mode, op1));
9145 else if (tmp == 1)
9147 if (TARGET_CMOVE)
9148 op1 = force_reg (op_mode, op1);
9150 else
9151 op1 = force_reg (op_mode, op1);
9155 /* Try to rearrange the comparison to make it cheaper. */
9156 if (ix86_fp_comparison_cost (code)
9157 > ix86_fp_comparison_cost (swap_condition (code))
9158 && (GET_CODE (op1) == REG || !no_new_pseudos))
9160 rtx tmp;
9161 tmp = op0, op0 = op1, op1 = tmp;
9162 code = swap_condition (code);
9163 if (GET_CODE (op0) != REG)
9164 op0 = force_reg (op_mode, op0);
9167 *pop0 = op0;
9168 *pop1 = op1;
9169 return code;
9172 /* Convert comparison codes we use to represent FP comparison to integer
9173 code that will result in proper branch. Return UNKNOWN if no such code
9174 is available. */
9176 enum rtx_code
9177 ix86_fp_compare_code_to_integer (enum rtx_code code)
9179 switch (code)
9181 case GT:
9182 return GTU;
9183 case GE:
9184 return GEU;
9185 case ORDERED:
9186 case UNORDERED:
9187 return code;
9188 break;
9189 case UNEQ:
9190 return EQ;
9191 break;
9192 case UNLT:
9193 return LTU;
9194 break;
9195 case UNLE:
9196 return LEU;
9197 break;
9198 case LTGT:
9199 return NE;
9200 break;
9201 default:
9202 return UNKNOWN;
9206 /* Split comparison code CODE into comparisons we can do using branch
9207 instructions. BYPASS_CODE is comparison code for branch that will
9208 branch around FIRST_CODE and SECOND_CODE. If some of branches
9209 is not required, set value to UNKNOWN.
9210 We never require more than two branches. */
9212 void
9213 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9214 enum rtx_code *first_code,
9215 enum rtx_code *second_code)
9217 *first_code = code;
9218 *bypass_code = UNKNOWN;
9219 *second_code = UNKNOWN;
9221 /* The fcomi comparison sets flags as follows:
9223 cmp ZF PF CF
9224 > 0 0 0
9225 < 0 0 1
9226 = 1 0 0
9227 un 1 1 1 */
9229 switch (code)
9231 case GT: /* GTU - CF=0 & ZF=0 */
9232 case GE: /* GEU - CF=0 */
9233 case ORDERED: /* PF=0 */
9234 case UNORDERED: /* PF=1 */
9235 case UNEQ: /* EQ - ZF=1 */
9236 case UNLT: /* LTU - CF=1 */
9237 case UNLE: /* LEU - CF=1 | ZF=1 */
9238 case LTGT: /* EQ - ZF=0 */
9239 break;
9240 case LT: /* LTU - CF=1 - fails on unordered */
9241 *first_code = UNLT;
9242 *bypass_code = UNORDERED;
9243 break;
9244 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9245 *first_code = UNLE;
9246 *bypass_code = UNORDERED;
9247 break;
9248 case EQ: /* EQ - ZF=1 - fails on unordered */
9249 *first_code = UNEQ;
9250 *bypass_code = UNORDERED;
9251 break;
9252 case NE: /* NE - ZF=0 - fails on unordered */
9253 *first_code = LTGT;
9254 *second_code = UNORDERED;
9255 break;
9256 case UNGE: /* GEU - CF=0 - fails on unordered */
9257 *first_code = GE;
9258 *second_code = UNORDERED;
9259 break;
9260 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9261 *first_code = GT;
9262 *second_code = UNORDERED;
9263 break;
9264 default:
9265 gcc_unreachable ();
9267 if (!TARGET_IEEE_FP)
9269 *second_code = UNKNOWN;
9270 *bypass_code = UNKNOWN;
9274 /* Return cost of comparison done fcom + arithmetics operations on AX.
9275 All following functions do use number of instructions as a cost metrics.
9276 In future this should be tweaked to compute bytes for optimize_size and
9277 take into account performance of various instructions on various CPUs. */
9278 static int
9279 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9281 if (!TARGET_IEEE_FP)
9282 return 4;
9283 /* The cost of code output by ix86_expand_fp_compare. */
9284 switch (code)
9286 case UNLE:
9287 case UNLT:
9288 case LTGT:
9289 case GT:
9290 case GE:
9291 case UNORDERED:
9292 case ORDERED:
9293 case UNEQ:
9294 return 4;
9295 break;
9296 case LT:
9297 case NE:
9298 case EQ:
9299 case UNGE:
9300 return 5;
9301 break;
9302 case LE:
9303 case UNGT:
9304 return 6;
9305 break;
9306 default:
9307 gcc_unreachable ();
9311 /* Return cost of comparison done using fcomi operation.
9312 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9313 static int
9314 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9316 enum rtx_code bypass_code, first_code, second_code;
9317 /* Return arbitrarily high cost when instruction is not supported - this
9318 prevents gcc from using it. */
9319 if (!TARGET_CMOVE)
9320 return 1024;
9321 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9322 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9325 /* Return cost of comparison done using sahf operation.
9326 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9327 static int
9328 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9330 enum rtx_code bypass_code, first_code, second_code;
9331 /* Return arbitrarily high cost when instruction is not preferred - this
9332 avoids gcc from using it. */
9333 if (!TARGET_USE_SAHF && !optimize_size)
9334 return 1024;
9335 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9336 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9339 /* Compute cost of the comparison done using any method.
9340 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9341 static int
9342 ix86_fp_comparison_cost (enum rtx_code code)
9344 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9345 int min;
9347 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9348 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9350 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9351 if (min > sahf_cost)
9352 min = sahf_cost;
9353 if (min > fcomi_cost)
9354 min = fcomi_cost;
9355 return min;
9358 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9360 static rtx
9361 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9362 rtx *second_test, rtx *bypass_test)
9364 enum machine_mode fpcmp_mode, intcmp_mode;
9365 rtx tmp, tmp2;
9366 int cost = ix86_fp_comparison_cost (code);
9367 enum rtx_code bypass_code, first_code, second_code;
9369 fpcmp_mode = ix86_fp_compare_mode (code);
9370 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9372 if (second_test)
9373 *second_test = NULL_RTX;
9374 if (bypass_test)
9375 *bypass_test = NULL_RTX;
9377 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9379 /* Do fcomi/sahf based test when profitable. */
9380 if ((bypass_code == UNKNOWN || bypass_test)
9381 && (second_code == UNKNOWN || second_test)
9382 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9384 if (TARGET_CMOVE)
9386 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9387 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9388 tmp);
9389 emit_insn (tmp);
9391 else
9393 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9394 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9395 if (!scratch)
9396 scratch = gen_reg_rtx (HImode);
9397 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9398 emit_insn (gen_x86_sahf_1 (scratch));
9401 /* The FP codes work out to act like unsigned. */
9402 intcmp_mode = fpcmp_mode;
9403 code = first_code;
9404 if (bypass_code != UNKNOWN)
9405 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9406 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9407 const0_rtx);
9408 if (second_code != UNKNOWN)
9409 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9410 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9411 const0_rtx);
9413 else
9415 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9416 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9417 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9418 if (!scratch)
9419 scratch = gen_reg_rtx (HImode);
9420 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9422 /* In the unordered case, we have to check C2 for NaN's, which
9423 doesn't happen to work out to anything nice combination-wise.
9424 So do some bit twiddling on the value we've got in AH to come
9425 up with an appropriate set of condition codes. */
9427 intcmp_mode = CCNOmode;
9428 switch (code)
9430 case GT:
9431 case UNGT:
9432 if (code == GT || !TARGET_IEEE_FP)
9434 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9435 code = EQ;
9437 else
9439 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9440 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9441 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9442 intcmp_mode = CCmode;
9443 code = GEU;
9445 break;
9446 case LT:
9447 case UNLT:
9448 if (code == LT && TARGET_IEEE_FP)
9450 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9451 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9452 intcmp_mode = CCmode;
9453 code = EQ;
9455 else
9457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9458 code = NE;
9460 break;
9461 case GE:
9462 case UNGE:
9463 if (code == GE || !TARGET_IEEE_FP)
9465 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9466 code = EQ;
9468 else
9470 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9471 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9472 GEN_INT (0x01)));
9473 code = NE;
9475 break;
9476 case LE:
9477 case UNLE:
9478 if (code == LE && TARGET_IEEE_FP)
9480 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9481 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9482 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9483 intcmp_mode = CCmode;
9484 code = LTU;
9486 else
9488 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9489 code = NE;
9491 break;
9492 case EQ:
9493 case UNEQ:
9494 if (code == EQ && TARGET_IEEE_FP)
9496 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9497 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9498 intcmp_mode = CCmode;
9499 code = EQ;
9501 else
9503 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9504 code = NE;
9505 break;
9507 break;
9508 case NE:
9509 case LTGT:
9510 if (code == NE && TARGET_IEEE_FP)
9512 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9513 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9514 GEN_INT (0x40)));
9515 code = NE;
9517 else
9519 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9520 code = EQ;
9522 break;
9524 case UNORDERED:
9525 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9526 code = NE;
9527 break;
9528 case ORDERED:
9529 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9530 code = EQ;
9531 break;
9533 default:
9534 gcc_unreachable ();
9538 /* Return the test that should be put into the flags user, i.e.
9539 the bcc, scc, or cmov instruction. */
9540 return gen_rtx_fmt_ee (code, VOIDmode,
9541 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9542 const0_rtx);
9546 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9548 rtx op0, op1, ret;
9549 op0 = ix86_compare_op0;
9550 op1 = ix86_compare_op1;
9552 if (second_test)
9553 *second_test = NULL_RTX;
9554 if (bypass_test)
9555 *bypass_test = NULL_RTX;
9557 if (ix86_compare_emitted)
9559 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9560 ix86_compare_emitted = NULL_RTX;
9562 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9563 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9564 second_test, bypass_test);
9565 else
9566 ret = ix86_expand_int_compare (code, op0, op1);
9568 return ret;
9571 /* Return true if the CODE will result in nontrivial jump sequence. */
9572 bool
9573 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9575 enum rtx_code bypass_code, first_code, second_code;
9576 if (!TARGET_CMOVE)
9577 return true;
9578 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9579 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9582 void
9583 ix86_expand_branch (enum rtx_code code, rtx label)
9585 rtx tmp;
9587 switch (GET_MODE (ix86_compare_op0))
9589 case QImode:
9590 case HImode:
9591 case SImode:
9592 simple:
9593 tmp = ix86_expand_compare (code, NULL, NULL);
9594 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9595 gen_rtx_LABEL_REF (VOIDmode, label),
9596 pc_rtx);
9597 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9598 return;
9600 case SFmode:
9601 case DFmode:
9602 case XFmode:
9604 rtvec vec;
9605 int use_fcomi;
9606 enum rtx_code bypass_code, first_code, second_code;
9608 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9609 &ix86_compare_op1);
9611 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9613 /* Check whether we will use the natural sequence with one jump. If
9614 so, we can expand jump early. Otherwise delay expansion by
9615 creating compound insn to not confuse optimizers. */
9616 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9617 && TARGET_CMOVE)
9619 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9620 gen_rtx_LABEL_REF (VOIDmode, label),
9621 pc_rtx, NULL_RTX, NULL_RTX);
9623 else
9625 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9626 ix86_compare_op0, ix86_compare_op1);
9627 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9628 gen_rtx_LABEL_REF (VOIDmode, label),
9629 pc_rtx);
9630 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9632 use_fcomi = ix86_use_fcomi_compare (code);
9633 vec = rtvec_alloc (3 + !use_fcomi);
9634 RTVEC_ELT (vec, 0) = tmp;
9635 RTVEC_ELT (vec, 1)
9636 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9637 RTVEC_ELT (vec, 2)
9638 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9639 if (! use_fcomi)
9640 RTVEC_ELT (vec, 3)
9641 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9643 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9645 return;
9648 case DImode:
9649 if (TARGET_64BIT)
9650 goto simple;
9651 case TImode:
9652 /* Expand DImode branch into multiple compare+branch. */
9654 rtx lo[2], hi[2], label2;
9655 enum rtx_code code1, code2, code3;
9656 enum machine_mode submode;
9658 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9660 tmp = ix86_compare_op0;
9661 ix86_compare_op0 = ix86_compare_op1;
9662 ix86_compare_op1 = tmp;
9663 code = swap_condition (code);
9665 if (GET_MODE (ix86_compare_op0) == DImode)
9667 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9668 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9669 submode = SImode;
9671 else
9673 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
9674 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
9675 submode = DImode;
9678 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9679 avoid two branches. This costs one extra insn, so disable when
9680 optimizing for size. */
9682 if ((code == EQ || code == NE)
9683 && (!optimize_size
9684 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9686 rtx xor0, xor1;
9688 xor1 = hi[0];
9689 if (hi[1] != const0_rtx)
9690 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
9691 NULL_RTX, 0, OPTAB_WIDEN);
9693 xor0 = lo[0];
9694 if (lo[1] != const0_rtx)
9695 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
9696 NULL_RTX, 0, OPTAB_WIDEN);
9698 tmp = expand_binop (submode, ior_optab, xor1, xor0,
9699 NULL_RTX, 0, OPTAB_WIDEN);
9701 ix86_compare_op0 = tmp;
9702 ix86_compare_op1 = const0_rtx;
9703 ix86_expand_branch (code, label);
9704 return;
9707 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9708 op1 is a constant and the low word is zero, then we can just
9709 examine the high word. */
9711 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9712 switch (code)
9714 case LT: case LTU: case GE: case GEU:
9715 ix86_compare_op0 = hi[0];
9716 ix86_compare_op1 = hi[1];
9717 ix86_expand_branch (code, label);
9718 return;
9719 default:
9720 break;
9723 /* Otherwise, we need two or three jumps. */
9725 label2 = gen_label_rtx ();
9727 code1 = code;
9728 code2 = swap_condition (code);
9729 code3 = unsigned_condition (code);
9731 switch (code)
9733 case LT: case GT: case LTU: case GTU:
9734 break;
9736 case LE: code1 = LT; code2 = GT; break;
9737 case GE: code1 = GT; code2 = LT; break;
9738 case LEU: code1 = LTU; code2 = GTU; break;
9739 case GEU: code1 = GTU; code2 = LTU; break;
9741 case EQ: code1 = UNKNOWN; code2 = NE; break;
9742 case NE: code2 = UNKNOWN; break;
9744 default:
9745 gcc_unreachable ();
9749 * a < b =>
9750 * if (hi(a) < hi(b)) goto true;
9751 * if (hi(a) > hi(b)) goto false;
9752 * if (lo(a) < lo(b)) goto true;
9753 * false:
9756 ix86_compare_op0 = hi[0];
9757 ix86_compare_op1 = hi[1];
9759 if (code1 != UNKNOWN)
9760 ix86_expand_branch (code1, label);
9761 if (code2 != UNKNOWN)
9762 ix86_expand_branch (code2, label2);
9764 ix86_compare_op0 = lo[0];
9765 ix86_compare_op1 = lo[1];
9766 ix86_expand_branch (code3, label);
9768 if (code2 != UNKNOWN)
9769 emit_label (label2);
9770 return;
9773 default:
9774 gcc_unreachable ();
9778 /* Split branch based on floating point condition. */
9779 void
9780 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9781 rtx target1, rtx target2, rtx tmp, rtx pushed)
9783 rtx second, bypass;
9784 rtx label = NULL_RTX;
9785 rtx condition;
9786 int bypass_probability = -1, second_probability = -1, probability = -1;
9787 rtx i;
9789 if (target2 != pc_rtx)
9791 rtx tmp = target2;
9792 code = reverse_condition_maybe_unordered (code);
9793 target2 = target1;
9794 target1 = tmp;
9797 condition = ix86_expand_fp_compare (code, op1, op2,
9798 tmp, &second, &bypass);
9800 /* Remove pushed operand from stack. */
9801 if (pushed)
9802 ix86_free_from_memory (GET_MODE (pushed));
9804 if (split_branch_probability >= 0)
9806 /* Distribute the probabilities across the jumps.
9807 Assume the BYPASS and SECOND to be always test
9808 for UNORDERED. */
9809 probability = split_branch_probability;
9811 /* Value of 1 is low enough to make no need for probability
9812 to be updated. Later we may run some experiments and see
9813 if unordered values are more frequent in practice. */
9814 if (bypass)
9815 bypass_probability = 1;
9816 if (second)
9817 second_probability = 1;
9819 if (bypass != NULL_RTX)
9821 label = gen_label_rtx ();
9822 i = emit_jump_insn (gen_rtx_SET
9823 (VOIDmode, pc_rtx,
9824 gen_rtx_IF_THEN_ELSE (VOIDmode,
9825 bypass,
9826 gen_rtx_LABEL_REF (VOIDmode,
9827 label),
9828 pc_rtx)));
9829 if (bypass_probability >= 0)
9830 REG_NOTES (i)
9831 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9832 GEN_INT (bypass_probability),
9833 REG_NOTES (i));
9835 i = emit_jump_insn (gen_rtx_SET
9836 (VOIDmode, pc_rtx,
9837 gen_rtx_IF_THEN_ELSE (VOIDmode,
9838 condition, target1, target2)));
9839 if (probability >= 0)
9840 REG_NOTES (i)
9841 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9842 GEN_INT (probability),
9843 REG_NOTES (i));
9844 if (second != NULL_RTX)
9846 i = emit_jump_insn (gen_rtx_SET
9847 (VOIDmode, pc_rtx,
9848 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9849 target2)));
9850 if (second_probability >= 0)
9851 REG_NOTES (i)
9852 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9853 GEN_INT (second_probability),
9854 REG_NOTES (i));
9856 if (label != NULL_RTX)
9857 emit_label (label);
9861 ix86_expand_setcc (enum rtx_code code, rtx dest)
9863 rtx ret, tmp, tmpreg, equiv;
9864 rtx second_test, bypass_test;
9866 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
9867 return 0; /* FAIL */
9869 gcc_assert (GET_MODE (dest) == QImode);
9871 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9872 PUT_MODE (ret, QImode);
9874 tmp = dest;
9875 tmpreg = dest;
9877 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9878 if (bypass_test || second_test)
9880 rtx test = second_test;
9881 int bypass = 0;
9882 rtx tmp2 = gen_reg_rtx (QImode);
9883 if (bypass_test)
9885 gcc_assert (!second_test);
9886 test = bypass_test;
9887 bypass = 1;
9888 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9890 PUT_MODE (test, QImode);
9891 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9893 if (bypass)
9894 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9895 else
9896 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9899 /* Attach a REG_EQUAL note describing the comparison result. */
9900 if (ix86_compare_op0 && ix86_compare_op1)
9902 equiv = simplify_gen_relational (code, QImode,
9903 GET_MODE (ix86_compare_op0),
9904 ix86_compare_op0, ix86_compare_op1);
9905 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9908 return 1; /* DONE */
9911 /* Expand comparison setting or clearing carry flag. Return true when
9912 successful and set pop for the operation. */
9913 static bool
9914 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9916 enum machine_mode mode =
9917 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9919 /* Do not handle DImode compares that go trought special path. Also we can't
9920 deal with FP compares yet. This is possible to add. */
9921 if (mode == (TARGET_64BIT ? TImode : DImode))
9922 return false;
9923 if (FLOAT_MODE_P (mode))
9925 rtx second_test = NULL, bypass_test = NULL;
9926 rtx compare_op, compare_seq;
9928 /* Shortcut: following common codes never translate into carry flag compares. */
9929 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9930 || code == ORDERED || code == UNORDERED)
9931 return false;
9933 /* These comparisons require zero flag; swap operands so they won't. */
9934 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9935 && !TARGET_IEEE_FP)
9937 rtx tmp = op0;
9938 op0 = op1;
9939 op1 = tmp;
9940 code = swap_condition (code);
9943 /* Try to expand the comparison and verify that we end up with carry flag
9944 based comparison. This is fails to be true only when we decide to expand
9945 comparison using arithmetic that is not too common scenario. */
9946 start_sequence ();
9947 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9948 &second_test, &bypass_test);
9949 compare_seq = get_insns ();
9950 end_sequence ();
9952 if (second_test || bypass_test)
9953 return false;
9954 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9955 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9956 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9957 else
9958 code = GET_CODE (compare_op);
9959 if (code != LTU && code != GEU)
9960 return false;
9961 emit_insn (compare_seq);
9962 *pop = compare_op;
9963 return true;
9965 if (!INTEGRAL_MODE_P (mode))
9966 return false;
9967 switch (code)
9969 case LTU:
9970 case GEU:
9971 break;
9973 /* Convert a==0 into (unsigned)a<1. */
9974 case EQ:
9975 case NE:
9976 if (op1 != const0_rtx)
9977 return false;
9978 op1 = const1_rtx;
9979 code = (code == EQ ? LTU : GEU);
9980 break;
9982 /* Convert a>b into b<a or a>=b-1. */
9983 case GTU:
9984 case LEU:
9985 if (GET_CODE (op1) == CONST_INT)
9987 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9988 /* Bail out on overflow. We still can swap operands but that
9989 would force loading of the constant into register. */
9990 if (op1 == const0_rtx
9991 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9992 return false;
9993 code = (code == GTU ? GEU : LTU);
9995 else
9997 rtx tmp = op1;
9998 op1 = op0;
9999 op0 = tmp;
10000 code = (code == GTU ? LTU : GEU);
10002 break;
10004 /* Convert a>=0 into (unsigned)a<0x80000000. */
10005 case LT:
10006 case GE:
10007 if (mode == DImode || op1 != const0_rtx)
10008 return false;
10009 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10010 code = (code == LT ? GEU : LTU);
10011 break;
10012 case LE:
10013 case GT:
10014 if (mode == DImode || op1 != constm1_rtx)
10015 return false;
10016 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10017 code = (code == LE ? GEU : LTU);
10018 break;
10020 default:
10021 return false;
10023 /* Swapping operands may cause constant to appear as first operand. */
10024 if (!nonimmediate_operand (op0, VOIDmode))
10026 if (no_new_pseudos)
10027 return false;
10028 op0 = force_reg (mode, op0);
10030 ix86_compare_op0 = op0;
10031 ix86_compare_op1 = op1;
10032 *pop = ix86_expand_compare (code, NULL, NULL);
10033 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10034 return true;
10038 ix86_expand_int_movcc (rtx operands[])
10040 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10041 rtx compare_seq, compare_op;
10042 rtx second_test, bypass_test;
10043 enum machine_mode mode = GET_MODE (operands[0]);
10044 bool sign_bit_compare_p = false;;
10046 start_sequence ();
10047 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10048 compare_seq = get_insns ();
10049 end_sequence ();
10051 compare_code = GET_CODE (compare_op);
10053 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10054 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10055 sign_bit_compare_p = true;
10057 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10058 HImode insns, we'd be swallowed in word prefix ops. */
10060 if ((mode != HImode || TARGET_FAST_PREFIX)
10061 && (mode != (TARGET_64BIT ? TImode : DImode))
10062 && GET_CODE (operands[2]) == CONST_INT
10063 && GET_CODE (operands[3]) == CONST_INT)
10065 rtx out = operands[0];
10066 HOST_WIDE_INT ct = INTVAL (operands[2]);
10067 HOST_WIDE_INT cf = INTVAL (operands[3]);
10068 HOST_WIDE_INT diff;
10070 diff = ct - cf;
10071 /* Sign bit compares are better done using shifts than we do by using
10072 sbb. */
10073 if (sign_bit_compare_p
10074 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10075 ix86_compare_op1, &compare_op))
10077 /* Detect overlap between destination and compare sources. */
10078 rtx tmp = out;
10080 if (!sign_bit_compare_p)
10082 bool fpcmp = false;
10084 compare_code = GET_CODE (compare_op);
10086 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10087 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10089 fpcmp = true;
10090 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10093 /* To simplify rest of code, restrict to the GEU case. */
10094 if (compare_code == LTU)
10096 HOST_WIDE_INT tmp = ct;
10097 ct = cf;
10098 cf = tmp;
10099 compare_code = reverse_condition (compare_code);
10100 code = reverse_condition (code);
10102 else
10104 if (fpcmp)
10105 PUT_CODE (compare_op,
10106 reverse_condition_maybe_unordered
10107 (GET_CODE (compare_op)));
10108 else
10109 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10111 diff = ct - cf;
10113 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10114 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10115 tmp = gen_reg_rtx (mode);
10117 if (mode == DImode)
10118 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10119 else
10120 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10122 else
10124 if (code == GT || code == GE)
10125 code = reverse_condition (code);
10126 else
10128 HOST_WIDE_INT tmp = ct;
10129 ct = cf;
10130 cf = tmp;
10131 diff = ct - cf;
10133 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10134 ix86_compare_op1, VOIDmode, 0, -1);
10137 if (diff == 1)
10140 * cmpl op0,op1
10141 * sbbl dest,dest
10142 * [addl dest, ct]
10144 * Size 5 - 8.
10146 if (ct)
10147 tmp = expand_simple_binop (mode, PLUS,
10148 tmp, GEN_INT (ct),
10149 copy_rtx (tmp), 1, OPTAB_DIRECT);
10151 else if (cf == -1)
10154 * cmpl op0,op1
10155 * sbbl dest,dest
10156 * orl $ct, dest
10158 * Size 8.
10160 tmp = expand_simple_binop (mode, IOR,
10161 tmp, GEN_INT (ct),
10162 copy_rtx (tmp), 1, OPTAB_DIRECT);
10164 else if (diff == -1 && ct)
10167 * cmpl op0,op1
10168 * sbbl dest,dest
10169 * notl dest
10170 * [addl dest, cf]
10172 * Size 8 - 11.
10174 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10175 if (cf)
10176 tmp = expand_simple_binop (mode, PLUS,
10177 copy_rtx (tmp), GEN_INT (cf),
10178 copy_rtx (tmp), 1, OPTAB_DIRECT);
10180 else
10183 * cmpl op0,op1
10184 * sbbl dest,dest
10185 * [notl dest]
10186 * andl cf - ct, dest
10187 * [addl dest, ct]
10189 * Size 8 - 11.
10192 if (cf == 0)
10194 cf = ct;
10195 ct = 0;
10196 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10199 tmp = expand_simple_binop (mode, AND,
10200 copy_rtx (tmp),
10201 gen_int_mode (cf - ct, mode),
10202 copy_rtx (tmp), 1, OPTAB_DIRECT);
10203 if (ct)
10204 tmp = expand_simple_binop (mode, PLUS,
10205 copy_rtx (tmp), GEN_INT (ct),
10206 copy_rtx (tmp), 1, OPTAB_DIRECT);
10209 if (!rtx_equal_p (tmp, out))
10210 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10212 return 1; /* DONE */
10215 if (diff < 0)
10217 HOST_WIDE_INT tmp;
10218 tmp = ct, ct = cf, cf = tmp;
10219 diff = -diff;
10220 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10222 /* We may be reversing unordered compare to normal compare, that
10223 is not valid in general (we may convert non-trapping condition
10224 to trapping one), however on i386 we currently emit all
10225 comparisons unordered. */
10226 compare_code = reverse_condition_maybe_unordered (compare_code);
10227 code = reverse_condition_maybe_unordered (code);
10229 else
10231 compare_code = reverse_condition (compare_code);
10232 code = reverse_condition (code);
10236 compare_code = UNKNOWN;
10237 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10238 && GET_CODE (ix86_compare_op1) == CONST_INT)
10240 if (ix86_compare_op1 == const0_rtx
10241 && (code == LT || code == GE))
10242 compare_code = code;
10243 else if (ix86_compare_op1 == constm1_rtx)
10245 if (code == LE)
10246 compare_code = LT;
10247 else if (code == GT)
10248 compare_code = GE;
10252 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10253 if (compare_code != UNKNOWN
10254 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10255 && (cf == -1 || ct == -1))
10257 /* If lea code below could be used, only optimize
10258 if it results in a 2 insn sequence. */
10260 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10261 || diff == 3 || diff == 5 || diff == 9)
10262 || (compare_code == LT && ct == -1)
10263 || (compare_code == GE && cf == -1))
10266 * notl op1 (if necessary)
10267 * sarl $31, op1
10268 * orl cf, op1
10270 if (ct != -1)
10272 cf = ct;
10273 ct = -1;
10274 code = reverse_condition (code);
10277 out = emit_store_flag (out, code, ix86_compare_op0,
10278 ix86_compare_op1, VOIDmode, 0, -1);
10280 out = expand_simple_binop (mode, IOR,
10281 out, GEN_INT (cf),
10282 out, 1, OPTAB_DIRECT);
10283 if (out != operands[0])
10284 emit_move_insn (operands[0], out);
10286 return 1; /* DONE */
10291 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10292 || diff == 3 || diff == 5 || diff == 9)
10293 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10294 && (mode != DImode
10295 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10298 * xorl dest,dest
10299 * cmpl op1,op2
10300 * setcc dest
10301 * lea cf(dest*(ct-cf)),dest
10303 * Size 14.
10305 * This also catches the degenerate setcc-only case.
10308 rtx tmp;
10309 int nops;
10311 out = emit_store_flag (out, code, ix86_compare_op0,
10312 ix86_compare_op1, VOIDmode, 0, 1);
10314 nops = 0;
10315 /* On x86_64 the lea instruction operates on Pmode, so we need
10316 to get arithmetics done in proper mode to match. */
10317 if (diff == 1)
10318 tmp = copy_rtx (out);
10319 else
10321 rtx out1;
10322 out1 = copy_rtx (out);
10323 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10324 nops++;
10325 if (diff & 1)
10327 tmp = gen_rtx_PLUS (mode, tmp, out1);
10328 nops++;
10331 if (cf != 0)
10333 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10334 nops++;
10336 if (!rtx_equal_p (tmp, out))
10338 if (nops == 1)
10339 out = force_operand (tmp, copy_rtx (out));
10340 else
10341 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10343 if (!rtx_equal_p (out, operands[0]))
10344 emit_move_insn (operands[0], copy_rtx (out));
10346 return 1; /* DONE */
10350 * General case: Jumpful:
10351 * xorl dest,dest cmpl op1, op2
10352 * cmpl op1, op2 movl ct, dest
10353 * setcc dest jcc 1f
10354 * decl dest movl cf, dest
10355 * andl (cf-ct),dest 1:
10356 * addl ct,dest
10358 * Size 20. Size 14.
10360 * This is reasonably steep, but branch mispredict costs are
10361 * high on modern cpus, so consider failing only if optimizing
10362 * for space.
10365 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10366 && BRANCH_COST >= 2)
10368 if (cf == 0)
10370 cf = ct;
10371 ct = 0;
10372 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10373 /* We may be reversing unordered compare to normal compare,
10374 that is not valid in general (we may convert non-trapping
10375 condition to trapping one), however on i386 we currently
10376 emit all comparisons unordered. */
10377 code = reverse_condition_maybe_unordered (code);
10378 else
10380 code = reverse_condition (code);
10381 if (compare_code != UNKNOWN)
10382 compare_code = reverse_condition (compare_code);
10386 if (compare_code != UNKNOWN)
10388 /* notl op1 (if needed)
10389 sarl $31, op1
10390 andl (cf-ct), op1
10391 addl ct, op1
10393 For x < 0 (resp. x <= -1) there will be no notl,
10394 so if possible swap the constants to get rid of the
10395 complement.
10396 True/false will be -1/0 while code below (store flag
10397 followed by decrement) is 0/-1, so the constants need
10398 to be exchanged once more. */
10400 if (compare_code == GE || !cf)
10402 code = reverse_condition (code);
10403 compare_code = LT;
10405 else
10407 HOST_WIDE_INT tmp = cf;
10408 cf = ct;
10409 ct = tmp;
10412 out = emit_store_flag (out, code, ix86_compare_op0,
10413 ix86_compare_op1, VOIDmode, 0, -1);
10415 else
10417 out = emit_store_flag (out, code, ix86_compare_op0,
10418 ix86_compare_op1, VOIDmode, 0, 1);
10420 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10421 copy_rtx (out), 1, OPTAB_DIRECT);
10424 out = expand_simple_binop (mode, AND, copy_rtx (out),
10425 gen_int_mode (cf - ct, mode),
10426 copy_rtx (out), 1, OPTAB_DIRECT);
10427 if (ct)
10428 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10429 copy_rtx (out), 1, OPTAB_DIRECT);
10430 if (!rtx_equal_p (out, operands[0]))
10431 emit_move_insn (operands[0], copy_rtx (out));
10433 return 1; /* DONE */
10437 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10439 /* Try a few things more with specific constants and a variable. */
10441 optab op;
10442 rtx var, orig_out, out, tmp;
10444 if (BRANCH_COST <= 2)
10445 return 0; /* FAIL */
10447 /* If one of the two operands is an interesting constant, load a
10448 constant with the above and mask it in with a logical operation. */
10450 if (GET_CODE (operands[2]) == CONST_INT)
10452 var = operands[3];
10453 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10454 operands[3] = constm1_rtx, op = and_optab;
10455 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10456 operands[3] = const0_rtx, op = ior_optab;
10457 else
10458 return 0; /* FAIL */
10460 else if (GET_CODE (operands[3]) == CONST_INT)
10462 var = operands[2];
10463 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10464 operands[2] = constm1_rtx, op = and_optab;
10465 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10466 operands[2] = const0_rtx, op = ior_optab;
10467 else
10468 return 0; /* FAIL */
10470 else
10471 return 0; /* FAIL */
10473 orig_out = operands[0];
10474 tmp = gen_reg_rtx (mode);
10475 operands[0] = tmp;
10477 /* Recurse to get the constant loaded. */
10478 if (ix86_expand_int_movcc (operands) == 0)
10479 return 0; /* FAIL */
10481 /* Mask in the interesting variable. */
10482 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10483 OPTAB_WIDEN);
10484 if (!rtx_equal_p (out, orig_out))
10485 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10487 return 1; /* DONE */
10491 * For comparison with above,
10493 * movl cf,dest
10494 * movl ct,tmp
10495 * cmpl op1,op2
10496 * cmovcc tmp,dest
10498 * Size 15.
10501 if (! nonimmediate_operand (operands[2], mode))
10502 operands[2] = force_reg (mode, operands[2]);
10503 if (! nonimmediate_operand (operands[3], mode))
10504 operands[3] = force_reg (mode, operands[3]);
10506 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10508 rtx tmp = gen_reg_rtx (mode);
10509 emit_move_insn (tmp, operands[3]);
10510 operands[3] = tmp;
10512 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10514 rtx tmp = gen_reg_rtx (mode);
10515 emit_move_insn (tmp, operands[2]);
10516 operands[2] = tmp;
10519 if (! register_operand (operands[2], VOIDmode)
10520 && (mode == QImode
10521 || ! register_operand (operands[3], VOIDmode)))
10522 operands[2] = force_reg (mode, operands[2]);
10524 if (mode == QImode
10525 && ! register_operand (operands[3], VOIDmode))
10526 operands[3] = force_reg (mode, operands[3]);
10528 emit_insn (compare_seq);
10529 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10530 gen_rtx_IF_THEN_ELSE (mode,
10531 compare_op, operands[2],
10532 operands[3])));
10533 if (bypass_test)
10534 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10535 gen_rtx_IF_THEN_ELSE (mode,
10536 bypass_test,
10537 copy_rtx (operands[3]),
10538 copy_rtx (operands[0]))));
10539 if (second_test)
10540 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10541 gen_rtx_IF_THEN_ELSE (mode,
10542 second_test,
10543 copy_rtx (operands[2]),
10544 copy_rtx (operands[0]))));
10546 return 1; /* DONE */
10549 /* Swap, force into registers, or otherwise massage the two operands
10550 to an sse comparison with a mask result. Thus we differ a bit from
10551 ix86_prepare_fp_compare_args which expects to produce a flags result.
10553 The DEST operand exists to help determine whether to commute commutative
10554 operators. The POP0/POP1 operands are updated in place. The new
10555 comparison code is returned, or UNKNOWN if not implementable. */
10557 static enum rtx_code
10558 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10559 rtx *pop0, rtx *pop1)
10561 rtx tmp;
10563 switch (code)
10565 case LTGT:
10566 case UNEQ:
10567 /* We have no LTGT as an operator. We could implement it with
10568 NE & ORDERED, but this requires an extra temporary. It's
10569 not clear that it's worth it. */
10570 return UNKNOWN;
10572 case LT:
10573 case LE:
10574 case UNGT:
10575 case UNGE:
10576 /* These are supported directly. */
10577 break;
10579 case EQ:
10580 case NE:
10581 case UNORDERED:
10582 case ORDERED:
10583 /* For commutative operators, try to canonicalize the destination
10584 operand to be first in the comparison - this helps reload to
10585 avoid extra moves. */
10586 if (!dest || !rtx_equal_p (dest, *pop1))
10587 break;
10588 /* FALLTHRU */
10590 case GE:
10591 case GT:
10592 case UNLE:
10593 case UNLT:
10594 /* These are not supported directly. Swap the comparison operands
10595 to transform into something that is supported. */
10596 tmp = *pop0;
10597 *pop0 = *pop1;
10598 *pop1 = tmp;
10599 code = swap_condition (code);
10600 break;
10602 default:
10603 gcc_unreachable ();
10606 return code;
10609 /* Detect conditional moves that exactly match min/max operational
10610 semantics. Note that this is IEEE safe, as long as we don't
10611 interchange the operands.
10613 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10614 and TRUE if the operation is successful and instructions are emitted. */
10616 static bool
10617 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10618 rtx cmp_op1, rtx if_true, rtx if_false)
10620 enum machine_mode mode;
10621 bool is_min;
10622 rtx tmp;
10624 if (code == LT)
10626 else if (code == UNGE)
10628 tmp = if_true;
10629 if_true = if_false;
10630 if_false = tmp;
10632 else
10633 return false;
10635 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10636 is_min = true;
10637 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10638 is_min = false;
10639 else
10640 return false;
10642 mode = GET_MODE (dest);
10644 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10645 but MODE may be a vector mode and thus not appropriate. */
10646 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10648 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10649 rtvec v;
10651 if_true = force_reg (mode, if_true);
10652 v = gen_rtvec (2, if_true, if_false);
10653 tmp = gen_rtx_UNSPEC (mode, v, u);
10655 else
10657 code = is_min ? SMIN : SMAX;
10658 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10661 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10662 return true;
10665 /* Expand an sse vector comparison. Return the register with the result. */
10667 static rtx
10668 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10669 rtx op_true, rtx op_false)
10671 enum machine_mode mode = GET_MODE (dest);
10672 rtx x;
10674 cmp_op0 = force_reg (mode, cmp_op0);
10675 if (!nonimmediate_operand (cmp_op1, mode))
10676 cmp_op1 = force_reg (mode, cmp_op1);
10678 if (optimize
10679 || reg_overlap_mentioned_p (dest, op_true)
10680 || reg_overlap_mentioned_p (dest, op_false))
10681 dest = gen_reg_rtx (mode);
10683 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10684 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10686 return dest;
10689 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10690 operations. This is used for both scalar and vector conditional moves. */
10692 static void
10693 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10695 enum machine_mode mode = GET_MODE (dest);
10696 rtx t2, t3, x;
10698 if (op_false == CONST0_RTX (mode))
10700 op_true = force_reg (mode, op_true);
10701 x = gen_rtx_AND (mode, cmp, op_true);
10702 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10704 else if (op_true == CONST0_RTX (mode))
10706 op_false = force_reg (mode, op_false);
10707 x = gen_rtx_NOT (mode, cmp);
10708 x = gen_rtx_AND (mode, x, op_false);
10709 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10711 else
10713 op_true = force_reg (mode, op_true);
10714 op_false = force_reg (mode, op_false);
10716 t2 = gen_reg_rtx (mode);
10717 if (optimize)
10718 t3 = gen_reg_rtx (mode);
10719 else
10720 t3 = dest;
10722 x = gen_rtx_AND (mode, op_true, cmp);
10723 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10725 x = gen_rtx_NOT (mode, cmp);
10726 x = gen_rtx_AND (mode, x, op_false);
10727 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10729 x = gen_rtx_IOR (mode, t3, t2);
10730 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10734 /* Expand a floating-point conditional move. Return true if successful. */
10737 ix86_expand_fp_movcc (rtx operands[])
10739 enum machine_mode mode = GET_MODE (operands[0]);
10740 enum rtx_code code = GET_CODE (operands[1]);
10741 rtx tmp, compare_op, second_test, bypass_test;
10743 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10745 enum machine_mode cmode;
10747 /* Since we've no cmove for sse registers, don't force bad register
10748 allocation just to gain access to it. Deny movcc when the
10749 comparison mode doesn't match the move mode. */
10750 cmode = GET_MODE (ix86_compare_op0);
10751 if (cmode == VOIDmode)
10752 cmode = GET_MODE (ix86_compare_op1);
10753 if (cmode != mode)
10754 return 0;
10756 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10757 &ix86_compare_op0,
10758 &ix86_compare_op1);
10759 if (code == UNKNOWN)
10760 return 0;
10762 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10763 ix86_compare_op1, operands[2],
10764 operands[3]))
10765 return 1;
10767 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10768 ix86_compare_op1, operands[2], operands[3]);
10769 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10770 return 1;
10773 /* The floating point conditional move instructions don't directly
10774 support conditions resulting from a signed integer comparison. */
10776 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10778 /* The floating point conditional move instructions don't directly
10779 support signed integer comparisons. */
10781 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10783 gcc_assert (!second_test && !bypass_test);
10784 tmp = gen_reg_rtx (QImode);
10785 ix86_expand_setcc (code, tmp);
10786 code = NE;
10787 ix86_compare_op0 = tmp;
10788 ix86_compare_op1 = const0_rtx;
10789 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10791 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10793 tmp = gen_reg_rtx (mode);
10794 emit_move_insn (tmp, operands[3]);
10795 operands[3] = tmp;
10797 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10799 tmp = gen_reg_rtx (mode);
10800 emit_move_insn (tmp, operands[2]);
10801 operands[2] = tmp;
10804 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10805 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10806 operands[2], operands[3])));
10807 if (bypass_test)
10808 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10809 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10810 operands[3], operands[0])));
10811 if (second_test)
10812 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10813 gen_rtx_IF_THEN_ELSE (mode, second_test,
10814 operands[2], operands[0])));
10816 return 1;
10819 /* Expand a floating-point vector conditional move; a vcond operation
10820 rather than a movcc operation. */
10822 bool
10823 ix86_expand_fp_vcond (rtx operands[])
10825 enum rtx_code code = GET_CODE (operands[3]);
10826 rtx cmp;
10828 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10829 &operands[4], &operands[5]);
10830 if (code == UNKNOWN)
10831 return false;
10833 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
10834 operands[5], operands[1], operands[2]))
10835 return true;
10837 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10838 operands[1], operands[2]);
10839 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10840 return true;
10843 /* Expand a signed integral vector conditional move. */
10845 bool
10846 ix86_expand_int_vcond (rtx operands[])
10848 enum machine_mode mode = GET_MODE (operands[0]);
10849 enum rtx_code code = GET_CODE (operands[3]);
10850 bool negate = false;
10851 rtx x, cop0, cop1;
10853 cop0 = operands[4];
10854 cop1 = operands[5];
10856 /* Canonicalize the comparison to EQ, GT, GTU. */
10857 switch (code)
10859 case EQ:
10860 case GT:
10861 case GTU:
10862 break;
10864 case NE:
10865 case LE:
10866 case LEU:
10867 code = reverse_condition (code);
10868 negate = true;
10869 break;
10871 case GE:
10872 case GEU:
10873 code = reverse_condition (code);
10874 negate = true;
10875 /* FALLTHRU */
10877 case LT:
10878 case LTU:
10879 code = swap_condition (code);
10880 x = cop0, cop0 = cop1, cop1 = x;
10881 break;
10883 default:
10884 gcc_unreachable ();
10887 /* Unsigned parallel compare is not supported by the hardware. Play some
10888 tricks to turn this into a signed comparison against 0. */
10889 if (code == GTU)
10891 switch (mode)
10893 case V4SImode:
10895 rtx t1, t2, mask;
10897 /* Perform a parallel modulo subtraction. */
10898 t1 = gen_reg_rtx (mode);
10899 emit_insn (gen_subv4si3 (t1, cop0, cop1));
10901 /* Extract the original sign bit of op0. */
10902 mask = GEN_INT (-0x80000000);
10903 mask = gen_rtx_CONST_VECTOR (mode,
10904 gen_rtvec (4, mask, mask, mask, mask));
10905 mask = force_reg (mode, mask);
10906 t2 = gen_reg_rtx (mode);
10907 emit_insn (gen_andv4si3 (t2, cop0, mask));
10909 /* XOR it back into the result of the subtraction. This results
10910 in the sign bit set iff we saw unsigned underflow. */
10911 x = gen_reg_rtx (mode);
10912 emit_insn (gen_xorv4si3 (x, t1, t2));
10914 code = GT;
10916 break;
10918 case V16QImode:
10919 case V8HImode:
10920 /* Perform a parallel unsigned saturating subtraction. */
10921 x = gen_reg_rtx (mode);
10922 emit_insn (gen_rtx_SET (VOIDmode, x,
10923 gen_rtx_US_MINUS (mode, cop0, cop1)));
10925 code = EQ;
10926 negate = !negate;
10927 break;
10929 default:
10930 gcc_unreachable ();
10933 cop0 = x;
10934 cop1 = CONST0_RTX (mode);
10937 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
10938 operands[1+negate], operands[2-negate]);
10940 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
10941 operands[2-negate]);
10942 return true;
10945 /* Expand conditional increment or decrement using adb/sbb instructions.
10946 The default case using setcc followed by the conditional move can be
10947 done by generic code. */
10949 ix86_expand_int_addcc (rtx operands[])
10951 enum rtx_code code = GET_CODE (operands[1]);
10952 rtx compare_op;
10953 rtx val = const0_rtx;
10954 bool fpcmp = false;
10955 enum machine_mode mode = GET_MODE (operands[0]);
10957 if (operands[3] != const1_rtx
10958 && operands[3] != constm1_rtx)
10959 return 0;
10960 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10961 ix86_compare_op1, &compare_op))
10962 return 0;
10963 code = GET_CODE (compare_op);
10965 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10966 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10968 fpcmp = true;
10969 code = ix86_fp_compare_code_to_integer (code);
10972 if (code != LTU)
10974 val = constm1_rtx;
10975 if (fpcmp)
10976 PUT_CODE (compare_op,
10977 reverse_condition_maybe_unordered
10978 (GET_CODE (compare_op)));
10979 else
10980 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10982 PUT_MODE (compare_op, mode);
10984 /* Construct either adc or sbb insn. */
10985 if ((code == LTU) == (operands[3] == constm1_rtx))
10987 switch (GET_MODE (operands[0]))
10989 case QImode:
10990 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10991 break;
10992 case HImode:
10993 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10994 break;
10995 case SImode:
10996 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10997 break;
10998 case DImode:
10999 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11000 break;
11001 default:
11002 gcc_unreachable ();
11005 else
11007 switch (GET_MODE (operands[0]))
11009 case QImode:
11010 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11011 break;
11012 case HImode:
11013 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11014 break;
11015 case SImode:
11016 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11017 break;
11018 case DImode:
11019 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11020 break;
11021 default:
11022 gcc_unreachable ();
11025 return 1; /* DONE */
11029 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11030 works for floating pointer parameters and nonoffsetable memories.
11031 For pushes, it returns just stack offsets; the values will be saved
11032 in the right order. Maximally three parts are generated. */
11034 static int
11035 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11037 int size;
11039 if (!TARGET_64BIT)
11040 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11041 else
11042 size = (GET_MODE_SIZE (mode) + 4) / 8;
11044 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11045 gcc_assert (size >= 2 && size <= 3);
11047 /* Optimize constant pool reference to immediates. This is used by fp
11048 moves, that force all constants to memory to allow combining. */
11049 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11051 rtx tmp = maybe_get_pool_constant (operand);
11052 if (tmp)
11053 operand = tmp;
11056 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11058 /* The only non-offsetable memories we handle are pushes. */
11059 int ok = push_operand (operand, VOIDmode);
11061 gcc_assert (ok);
11063 operand = copy_rtx (operand);
11064 PUT_MODE (operand, Pmode);
11065 parts[0] = parts[1] = parts[2] = operand;
11066 return size;
11069 if (GET_CODE (operand) == CONST_VECTOR)
11071 enum machine_mode imode = int_mode_for_mode (mode);
11072 /* Caution: if we looked through a constant pool memory above,
11073 the operand may actually have a different mode now. That's
11074 ok, since we want to pun this all the way back to an integer. */
11075 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11076 gcc_assert (operand != NULL);
11077 mode = imode;
11080 if (!TARGET_64BIT)
11082 if (mode == DImode)
11083 split_di (&operand, 1, &parts[0], &parts[1]);
11084 else
11086 if (REG_P (operand))
11088 gcc_assert (reload_completed);
11089 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11090 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11091 if (size == 3)
11092 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11094 else if (offsettable_memref_p (operand))
11096 operand = adjust_address (operand, SImode, 0);
11097 parts[0] = operand;
11098 parts[1] = adjust_address (operand, SImode, 4);
11099 if (size == 3)
11100 parts[2] = adjust_address (operand, SImode, 8);
11102 else if (GET_CODE (operand) == CONST_DOUBLE)
11104 REAL_VALUE_TYPE r;
11105 long l[4];
11107 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11108 switch (mode)
11110 case XFmode:
11111 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11112 parts[2] = gen_int_mode (l[2], SImode);
11113 break;
11114 case DFmode:
11115 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11116 break;
11117 default:
11118 gcc_unreachable ();
11120 parts[1] = gen_int_mode (l[1], SImode);
11121 parts[0] = gen_int_mode (l[0], SImode);
11123 else
11124 gcc_unreachable ();
11127 else
11129 if (mode == TImode)
11130 split_ti (&operand, 1, &parts[0], &parts[1]);
11131 if (mode == XFmode || mode == TFmode)
11133 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11134 if (REG_P (operand))
11136 gcc_assert (reload_completed);
11137 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11138 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11140 else if (offsettable_memref_p (operand))
11142 operand = adjust_address (operand, DImode, 0);
11143 parts[0] = operand;
11144 parts[1] = adjust_address (operand, upper_mode, 8);
11146 else if (GET_CODE (operand) == CONST_DOUBLE)
11148 REAL_VALUE_TYPE r;
11149 long l[4];
11151 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11152 real_to_target (l, &r, mode);
11154 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11155 if (HOST_BITS_PER_WIDE_INT >= 64)
11156 parts[0]
11157 = gen_int_mode
11158 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11159 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11160 DImode);
11161 else
11162 parts[0] = immed_double_const (l[0], l[1], DImode);
11164 if (upper_mode == SImode)
11165 parts[1] = gen_int_mode (l[2], SImode);
11166 else if (HOST_BITS_PER_WIDE_INT >= 64)
11167 parts[1]
11168 = gen_int_mode
11169 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11170 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11171 DImode);
11172 else
11173 parts[1] = immed_double_const (l[2], l[3], DImode);
11175 else
11176 gcc_unreachable ();
11180 return size;
11183 /* Emit insns to perform a move or push of DI, DF, and XF values.
11184 Return false when normal moves are needed; true when all required
11185 insns have been emitted. Operands 2-4 contain the input values
11186 int the correct order; operands 5-7 contain the output values. */
11188 void
11189 ix86_split_long_move (rtx operands[])
11191 rtx part[2][3];
11192 int nparts;
11193 int push = 0;
11194 int collisions = 0;
11195 enum machine_mode mode = GET_MODE (operands[0]);
11197 /* The DFmode expanders may ask us to move double.
11198 For 64bit target this is single move. By hiding the fact
11199 here we simplify i386.md splitters. */
11200 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11202 /* Optimize constant pool reference to immediates. This is used by
11203 fp moves, that force all constants to memory to allow combining. */
11205 if (GET_CODE (operands[1]) == MEM
11206 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11207 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11208 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11209 if (push_operand (operands[0], VOIDmode))
11211 operands[0] = copy_rtx (operands[0]);
11212 PUT_MODE (operands[0], Pmode);
11214 else
11215 operands[0] = gen_lowpart (DImode, operands[0]);
11216 operands[1] = gen_lowpart (DImode, operands[1]);
11217 emit_move_insn (operands[0], operands[1]);
11218 return;
11221 /* The only non-offsettable memory we handle is push. */
11222 if (push_operand (operands[0], VOIDmode))
11223 push = 1;
11224 else
11225 gcc_assert (GET_CODE (operands[0]) != MEM
11226 || offsettable_memref_p (operands[0]));
11228 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11229 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11231 /* When emitting push, take care for source operands on the stack. */
11232 if (push && GET_CODE (operands[1]) == MEM
11233 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11235 if (nparts == 3)
11236 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11237 XEXP (part[1][2], 0));
11238 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11239 XEXP (part[1][1], 0));
11242 /* We need to do copy in the right order in case an address register
11243 of the source overlaps the destination. */
11244 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11246 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11247 collisions++;
11248 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11249 collisions++;
11250 if (nparts == 3
11251 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11252 collisions++;
11254 /* Collision in the middle part can be handled by reordering. */
11255 if (collisions == 1 && nparts == 3
11256 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11258 rtx tmp;
11259 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11260 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11263 /* If there are more collisions, we can't handle it by reordering.
11264 Do an lea to the last part and use only one colliding move. */
11265 else if (collisions > 1)
11267 rtx base;
11269 collisions = 1;
11271 base = part[0][nparts - 1];
11273 /* Handle the case when the last part isn't valid for lea.
11274 Happens in 64-bit mode storing the 12-byte XFmode. */
11275 if (GET_MODE (base) != Pmode)
11276 base = gen_rtx_REG (Pmode, REGNO (base));
11278 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11279 part[1][0] = replace_equiv_address (part[1][0], base);
11280 part[1][1] = replace_equiv_address (part[1][1],
11281 plus_constant (base, UNITS_PER_WORD));
11282 if (nparts == 3)
11283 part[1][2] = replace_equiv_address (part[1][2],
11284 plus_constant (base, 8));
11288 if (push)
11290 if (!TARGET_64BIT)
11292 if (nparts == 3)
11294 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11295 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11296 emit_move_insn (part[0][2], part[1][2]);
11299 else
11301 /* In 64bit mode we don't have 32bit push available. In case this is
11302 register, it is OK - we will just use larger counterpart. We also
11303 retype memory - these comes from attempt to avoid REX prefix on
11304 moving of second half of TFmode value. */
11305 if (GET_MODE (part[1][1]) == SImode)
11307 switch (GET_CODE (part[1][1]))
11309 case MEM:
11310 part[1][1] = adjust_address (part[1][1], DImode, 0);
11311 break;
11313 case REG:
11314 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11315 break;
11317 default:
11318 gcc_unreachable ();
11321 if (GET_MODE (part[1][0]) == SImode)
11322 part[1][0] = part[1][1];
11325 emit_move_insn (part[0][1], part[1][1]);
11326 emit_move_insn (part[0][0], part[1][0]);
11327 return;
11330 /* Choose correct order to not overwrite the source before it is copied. */
11331 if ((REG_P (part[0][0])
11332 && REG_P (part[1][1])
11333 && (REGNO (part[0][0]) == REGNO (part[1][1])
11334 || (nparts == 3
11335 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11336 || (collisions > 0
11337 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11339 if (nparts == 3)
11341 operands[2] = part[0][2];
11342 operands[3] = part[0][1];
11343 operands[4] = part[0][0];
11344 operands[5] = part[1][2];
11345 operands[6] = part[1][1];
11346 operands[7] = part[1][0];
11348 else
11350 operands[2] = part[0][1];
11351 operands[3] = part[0][0];
11352 operands[5] = part[1][1];
11353 operands[6] = part[1][0];
11356 else
11358 if (nparts == 3)
11360 operands[2] = part[0][0];
11361 operands[3] = part[0][1];
11362 operands[4] = part[0][2];
11363 operands[5] = part[1][0];
11364 operands[6] = part[1][1];
11365 operands[7] = part[1][2];
11367 else
11369 operands[2] = part[0][0];
11370 operands[3] = part[0][1];
11371 operands[5] = part[1][0];
11372 operands[6] = part[1][1];
11376 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11377 if (optimize_size)
11379 if (GET_CODE (operands[5]) == CONST_INT
11380 && operands[5] != const0_rtx
11381 && REG_P (operands[2]))
11383 if (GET_CODE (operands[6]) == CONST_INT
11384 && INTVAL (operands[6]) == INTVAL (operands[5]))
11385 operands[6] = operands[2];
11387 if (nparts == 3
11388 && GET_CODE (operands[7]) == CONST_INT
11389 && INTVAL (operands[7]) == INTVAL (operands[5]))
11390 operands[7] = operands[2];
11393 if (nparts == 3
11394 && GET_CODE (operands[6]) == CONST_INT
11395 && operands[6] != const0_rtx
11396 && REG_P (operands[3])
11397 && GET_CODE (operands[7]) == CONST_INT
11398 && INTVAL (operands[7]) == INTVAL (operands[6]))
11399 operands[7] = operands[3];
11402 emit_move_insn (operands[2], operands[5]);
11403 emit_move_insn (operands[3], operands[6]);
11404 if (nparts == 3)
11405 emit_move_insn (operands[4], operands[7]);
11407 return;
11410 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11411 left shift by a constant, either using a single shift or
11412 a sequence of add instructions. */
11414 static void
11415 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11417 if (count == 1)
11419 emit_insn ((mode == DImode
11420 ? gen_addsi3
11421 : gen_adddi3) (operand, operand, operand));
11423 else if (!optimize_size
11424 && count * ix86_cost->add <= ix86_cost->shift_const)
11426 int i;
11427 for (i=0; i<count; i++)
11429 emit_insn ((mode == DImode
11430 ? gen_addsi3
11431 : gen_adddi3) (operand, operand, operand));
11434 else
11435 emit_insn ((mode == DImode
11436 ? gen_ashlsi3
11437 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11440 void
11441 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11443 rtx low[2], high[2];
11444 int count;
11445 const int single_width = mode == DImode ? 32 : 64;
11447 if (GET_CODE (operands[2]) == CONST_INT)
11449 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11450 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11452 if (count >= single_width)
11454 emit_move_insn (high[0], low[1]);
11455 emit_move_insn (low[0], const0_rtx);
11457 if (count > single_width)
11458 ix86_expand_ashl_const (high[0], count - single_width, mode);
11460 else
11462 if (!rtx_equal_p (operands[0], operands[1]))
11463 emit_move_insn (operands[0], operands[1]);
11464 emit_insn ((mode == DImode
11465 ? gen_x86_shld_1
11466 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11467 ix86_expand_ashl_const (low[0], count, mode);
11469 return;
11472 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11474 if (operands[1] == const1_rtx)
11476 /* Assuming we've chosen a QImode capable registers, then 1 << N
11477 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11478 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11480 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11482 ix86_expand_clear (low[0]);
11483 ix86_expand_clear (high[0]);
11484 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11486 d = gen_lowpart (QImode, low[0]);
11487 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11488 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11489 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11491 d = gen_lowpart (QImode, high[0]);
11492 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11493 s = gen_rtx_NE (QImode, flags, const0_rtx);
11494 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11497 /* Otherwise, we can get the same results by manually performing
11498 a bit extract operation on bit 5/6, and then performing the two
11499 shifts. The two methods of getting 0/1 into low/high are exactly
11500 the same size. Avoiding the shift in the bit extract case helps
11501 pentium4 a bit; no one else seems to care much either way. */
11502 else
11504 rtx x;
11506 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11507 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
11508 else
11509 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
11510 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11512 emit_insn ((mode == DImode
11513 ? gen_lshrsi3
11514 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
11515 emit_insn ((mode == DImode
11516 ? gen_andsi3
11517 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
11518 emit_move_insn (low[0], high[0]);
11519 emit_insn ((mode == DImode
11520 ? gen_xorsi3
11521 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
11524 emit_insn ((mode == DImode
11525 ? gen_ashlsi3
11526 : gen_ashldi3) (low[0], low[0], operands[2]));
11527 emit_insn ((mode == DImode
11528 ? gen_ashlsi3
11529 : gen_ashldi3) (high[0], high[0], operands[2]));
11530 return;
11533 if (operands[1] == constm1_rtx)
11535 /* For -1 << N, we can avoid the shld instruction, because we
11536 know that we're shifting 0...31/63 ones into a -1. */
11537 emit_move_insn (low[0], constm1_rtx);
11538 if (optimize_size)
11539 emit_move_insn (high[0], low[0]);
11540 else
11541 emit_move_insn (high[0], constm1_rtx);
11543 else
11545 if (!rtx_equal_p (operands[0], operands[1]))
11546 emit_move_insn (operands[0], operands[1]);
11548 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11549 emit_insn ((mode == DImode
11550 ? gen_x86_shld_1
11551 : gen_x86_64_shld) (high[0], low[0], operands[2]));
11554 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
11556 if (TARGET_CMOVE && scratch)
11558 ix86_expand_clear (scratch);
11559 emit_insn ((mode == DImode
11560 ? gen_x86_shift_adj_1
11561 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
11563 else
11564 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11567 void
11568 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
11570 rtx low[2], high[2];
11571 int count;
11572 const int single_width = mode == DImode ? 32 : 64;
11574 if (GET_CODE (operands[2]) == CONST_INT)
11576 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11577 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11579 if (count == single_width * 2 - 1)
11581 emit_move_insn (high[0], high[1]);
11582 emit_insn ((mode == DImode
11583 ? gen_ashrsi3
11584 : gen_ashrdi3) (high[0], high[0],
11585 GEN_INT (single_width - 1)));
11586 emit_move_insn (low[0], high[0]);
11589 else if (count >= single_width)
11591 emit_move_insn (low[0], high[1]);
11592 emit_move_insn (high[0], low[0]);
11593 emit_insn ((mode == DImode
11594 ? gen_ashrsi3
11595 : gen_ashrdi3) (high[0], high[0],
11596 GEN_INT (single_width - 1)));
11597 if (count > single_width)
11598 emit_insn ((mode == DImode
11599 ? gen_ashrsi3
11600 : gen_ashrdi3) (low[0], low[0],
11601 GEN_INT (count - single_width)));
11603 else
11605 if (!rtx_equal_p (operands[0], operands[1]))
11606 emit_move_insn (operands[0], operands[1]);
11607 emit_insn ((mode == DImode
11608 ? gen_x86_shrd_1
11609 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11610 emit_insn ((mode == DImode
11611 ? gen_ashrsi3
11612 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
11615 else
11617 if (!rtx_equal_p (operands[0], operands[1]))
11618 emit_move_insn (operands[0], operands[1]);
11620 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11622 emit_insn ((mode == DImode
11623 ? gen_x86_shrd_1
11624 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11625 emit_insn ((mode == DImode
11626 ? gen_ashrsi3
11627 : gen_ashrdi3) (high[0], high[0], operands[2]));
11629 if (TARGET_CMOVE && scratch)
11631 emit_move_insn (scratch, high[0]);
11632 emit_insn ((mode == DImode
11633 ? gen_ashrsi3
11634 : gen_ashrdi3) (scratch, scratch,
11635 GEN_INT (single_width - 1)));
11636 emit_insn ((mode == DImode
11637 ? gen_x86_shift_adj_1
11638 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11639 scratch));
11641 else
11642 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11646 void
11647 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
11649 rtx low[2], high[2];
11650 int count;
11651 const int single_width = mode == DImode ? 32 : 64;
11653 if (GET_CODE (operands[2]) == CONST_INT)
11655 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11656 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11658 if (count >= single_width)
11660 emit_move_insn (low[0], high[1]);
11661 ix86_expand_clear (high[0]);
11663 if (count > single_width)
11664 emit_insn ((mode == DImode
11665 ? gen_lshrsi3
11666 : gen_lshrdi3) (low[0], low[0],
11667 GEN_INT (count - single_width)));
11669 else
11671 if (!rtx_equal_p (operands[0], operands[1]))
11672 emit_move_insn (operands[0], operands[1]);
11673 emit_insn ((mode == DImode
11674 ? gen_x86_shrd_1
11675 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11676 emit_insn ((mode == DImode
11677 ? gen_lshrsi3
11678 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
11681 else
11683 if (!rtx_equal_p (operands[0], operands[1]))
11684 emit_move_insn (operands[0], operands[1]);
11686 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11688 emit_insn ((mode == DImode
11689 ? gen_x86_shrd_1
11690 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11691 emit_insn ((mode == DImode
11692 ? gen_lshrsi3
11693 : gen_lshrdi3) (high[0], high[0], operands[2]));
11695 /* Heh. By reversing the arguments, we can reuse this pattern. */
11696 if (TARGET_CMOVE && scratch)
11698 ix86_expand_clear (scratch);
11699 emit_insn ((mode == DImode
11700 ? gen_x86_shift_adj_1
11701 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11702 scratch));
11704 else
11705 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11709 /* Helper function for the string operations below. Dest VARIABLE whether
11710 it is aligned to VALUE bytes. If true, jump to the label. */
11711 static rtx
11712 ix86_expand_aligntest (rtx variable, int value)
11714 rtx label = gen_label_rtx ();
11715 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11716 if (GET_MODE (variable) == DImode)
11717 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11718 else
11719 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11720 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11721 1, label);
11722 return label;
11725 /* Adjust COUNTER by the VALUE. */
11726 static void
11727 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11729 if (GET_MODE (countreg) == DImode)
11730 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11731 else
11732 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11735 /* Zero extend possibly SImode EXP to Pmode register. */
11737 ix86_zero_extend_to_Pmode (rtx exp)
11739 rtx r;
11740 if (GET_MODE (exp) == VOIDmode)
11741 return force_reg (Pmode, exp);
11742 if (GET_MODE (exp) == Pmode)
11743 return copy_to_mode_reg (Pmode, exp);
11744 r = gen_reg_rtx (Pmode);
11745 emit_insn (gen_zero_extendsidi2 (r, exp));
11746 return r;
11749 /* Expand string move (memcpy) operation. Use i386 string operations when
11750 profitable. expand_clrmem contains similar code. */
11752 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11754 rtx srcreg, destreg, countreg, srcexp, destexp;
11755 enum machine_mode counter_mode;
11756 HOST_WIDE_INT align = 0;
11757 unsigned HOST_WIDE_INT count = 0;
11759 if (GET_CODE (align_exp) == CONST_INT)
11760 align = INTVAL (align_exp);
11762 /* Can't use any of this if the user has appropriated esi or edi. */
11763 if (global_regs[4] || global_regs[5])
11764 return 0;
11766 /* This simple hack avoids all inlining code and simplifies code below. */
11767 if (!TARGET_ALIGN_STRINGOPS)
11768 align = 64;
11770 if (GET_CODE (count_exp) == CONST_INT)
11772 count = INTVAL (count_exp);
11773 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11774 return 0;
11777 /* Figure out proper mode for counter. For 32bits it is always SImode,
11778 for 64bits use SImode when possible, otherwise DImode.
11779 Set count to number of bytes copied when known at compile time. */
11780 if (!TARGET_64BIT
11781 || GET_MODE (count_exp) == SImode
11782 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11783 counter_mode = SImode;
11784 else
11785 counter_mode = DImode;
11787 gcc_assert (counter_mode == SImode || counter_mode == DImode);
11789 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11790 if (destreg != XEXP (dst, 0))
11791 dst = replace_equiv_address_nv (dst, destreg);
11792 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11793 if (srcreg != XEXP (src, 0))
11794 src = replace_equiv_address_nv (src, srcreg);
11796 /* When optimizing for size emit simple rep ; movsb instruction for
11797 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11798 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11799 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11800 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11801 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11802 known to be zero or not. The rep; movsb sequence causes higher
11803 register pressure though, so take that into account. */
11805 if ((!optimize || optimize_size)
11806 && (count == 0
11807 || ((count & 0x03)
11808 && (!optimize_size
11809 || count > 5 * 4
11810 || (count & 3) + count / 4 > 6))))
11812 emit_insn (gen_cld ());
11813 countreg = ix86_zero_extend_to_Pmode (count_exp);
11814 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11815 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11816 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11817 destexp, srcexp));
11820 /* For constant aligned (or small unaligned) copies use rep movsl
11821 followed by code copying the rest. For PentiumPro ensure 8 byte
11822 alignment to allow rep movsl acceleration. */
11824 else if (count != 0
11825 && (align >= 8
11826 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11827 || optimize_size || count < (unsigned int) 64))
11829 unsigned HOST_WIDE_INT offset = 0;
11830 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11831 rtx srcmem, dstmem;
11833 emit_insn (gen_cld ());
11834 if (count & ~(size - 1))
11836 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
11838 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
11840 while (offset < (count & ~(size - 1)))
11842 srcmem = adjust_automodify_address_nv (src, movs_mode,
11843 srcreg, offset);
11844 dstmem = adjust_automodify_address_nv (dst, movs_mode,
11845 destreg, offset);
11846 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11847 offset += size;
11850 else
11852 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
11853 & (TARGET_64BIT ? -1 : 0x3fffffff));
11854 countreg = copy_to_mode_reg (counter_mode, countreg);
11855 countreg = ix86_zero_extend_to_Pmode (countreg);
11857 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11858 GEN_INT (size == 4 ? 2 : 3));
11859 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11860 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11862 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11863 countreg, destexp, srcexp));
11864 offset = count & ~(size - 1);
11867 if (size == 8 && (count & 0x04))
11869 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11870 offset);
11871 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11872 offset);
11873 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11874 offset += 4;
11876 if (count & 0x02)
11878 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11879 offset);
11880 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11881 offset);
11882 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11883 offset += 2;
11885 if (count & 0x01)
11887 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11888 offset);
11889 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11890 offset);
11891 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11894 /* The generic code based on the glibc implementation:
11895 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11896 allowing accelerated copying there)
11897 - copy the data using rep movsl
11898 - copy the rest. */
11899 else
11901 rtx countreg2;
11902 rtx label = NULL;
11903 rtx srcmem, dstmem;
11904 int desired_alignment = (TARGET_PENTIUMPRO
11905 && (count == 0 || count >= (unsigned int) 260)
11906 ? 8 : UNITS_PER_WORD);
11907 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11908 dst = change_address (dst, BLKmode, destreg);
11909 src = change_address (src, BLKmode, srcreg);
11911 /* In case we don't know anything about the alignment, default to
11912 library version, since it is usually equally fast and result in
11913 shorter code.
11915 Also emit call when we know that the count is large and call overhead
11916 will not be important. */
11917 if (!TARGET_INLINE_ALL_STRINGOPS
11918 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11919 return 0;
11921 if (TARGET_SINGLE_STRINGOP)
11922 emit_insn (gen_cld ());
11924 countreg2 = gen_reg_rtx (Pmode);
11925 countreg = copy_to_mode_reg (counter_mode, count_exp);
11927 /* We don't use loops to align destination and to copy parts smaller
11928 than 4 bytes, because gcc is able to optimize such code better (in
11929 the case the destination or the count really is aligned, gcc is often
11930 able to predict the branches) and also it is friendlier to the
11931 hardware branch prediction.
11933 Using loops is beneficial for generic case, because we can
11934 handle small counts using the loops. Many CPUs (such as Athlon)
11935 have large REP prefix setup costs.
11937 This is quite costly. Maybe we can revisit this decision later or
11938 add some customizability to this code. */
11940 if (count == 0 && align < desired_alignment)
11942 label = gen_label_rtx ();
11943 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11944 LEU, 0, counter_mode, 1, label);
11946 if (align <= 1)
11948 rtx label = ix86_expand_aligntest (destreg, 1);
11949 srcmem = change_address (src, QImode, srcreg);
11950 dstmem = change_address (dst, QImode, destreg);
11951 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11952 ix86_adjust_counter (countreg, 1);
11953 emit_label (label);
11954 LABEL_NUSES (label) = 1;
11956 if (align <= 2)
11958 rtx label = ix86_expand_aligntest (destreg, 2);
11959 srcmem = change_address (src, HImode, srcreg);
11960 dstmem = change_address (dst, HImode, destreg);
11961 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11962 ix86_adjust_counter (countreg, 2);
11963 emit_label (label);
11964 LABEL_NUSES (label) = 1;
11966 if (align <= 4 && desired_alignment > 4)
11968 rtx label = ix86_expand_aligntest (destreg, 4);
11969 srcmem = change_address (src, SImode, srcreg);
11970 dstmem = change_address (dst, SImode, destreg);
11971 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11972 ix86_adjust_counter (countreg, 4);
11973 emit_label (label);
11974 LABEL_NUSES (label) = 1;
11977 if (label && desired_alignment > 4 && !TARGET_64BIT)
11979 emit_label (label);
11980 LABEL_NUSES (label) = 1;
11981 label = NULL_RTX;
11983 if (!TARGET_SINGLE_STRINGOP)
11984 emit_insn (gen_cld ());
11985 if (TARGET_64BIT)
11987 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11988 GEN_INT (3)));
11989 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11991 else
11993 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11994 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11996 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11997 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11998 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11999 countreg2, destexp, srcexp));
12001 if (label)
12003 emit_label (label);
12004 LABEL_NUSES (label) = 1;
12006 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12008 srcmem = change_address (src, SImode, srcreg);
12009 dstmem = change_address (dst, SImode, destreg);
12010 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12012 if ((align <= 4 || count == 0) && TARGET_64BIT)
12014 rtx label = ix86_expand_aligntest (countreg, 4);
12015 srcmem = change_address (src, SImode, srcreg);
12016 dstmem = change_address (dst, SImode, destreg);
12017 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12018 emit_label (label);
12019 LABEL_NUSES (label) = 1;
12021 if (align > 2 && count != 0 && (count & 2))
12023 srcmem = change_address (src, HImode, srcreg);
12024 dstmem = change_address (dst, HImode, destreg);
12025 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12027 if (align <= 2 || count == 0)
12029 rtx label = ix86_expand_aligntest (countreg, 2);
12030 srcmem = change_address (src, HImode, srcreg);
12031 dstmem = change_address (dst, HImode, destreg);
12032 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12033 emit_label (label);
12034 LABEL_NUSES (label) = 1;
12036 if (align > 1 && count != 0 && (count & 1))
12038 srcmem = change_address (src, QImode, srcreg);
12039 dstmem = change_address (dst, QImode, destreg);
12040 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12042 if (align <= 1 || count == 0)
12044 rtx label = ix86_expand_aligntest (countreg, 1);
12045 srcmem = change_address (src, QImode, srcreg);
12046 dstmem = change_address (dst, QImode, destreg);
12047 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12048 emit_label (label);
12049 LABEL_NUSES (label) = 1;
12053 return 1;
12056 /* Expand string clear operation (bzero). Use i386 string operations when
12057 profitable. expand_movmem contains similar code. */
12059 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12061 rtx destreg, zeroreg, countreg, destexp;
12062 enum machine_mode counter_mode;
12063 HOST_WIDE_INT align = 0;
12064 unsigned HOST_WIDE_INT count = 0;
12066 if (GET_CODE (align_exp) == CONST_INT)
12067 align = INTVAL (align_exp);
12069 /* Can't use any of this if the user has appropriated esi. */
12070 if (global_regs[4])
12071 return 0;
12073 /* This simple hack avoids all inlining code and simplifies code below. */
12074 if (!TARGET_ALIGN_STRINGOPS)
12075 align = 32;
12077 if (GET_CODE (count_exp) == CONST_INT)
12079 count = INTVAL (count_exp);
12080 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12081 return 0;
12083 /* Figure out proper mode for counter. For 32bits it is always SImode,
12084 for 64bits use SImode when possible, otherwise DImode.
12085 Set count to number of bytes copied when known at compile time. */
12086 if (!TARGET_64BIT
12087 || GET_MODE (count_exp) == SImode
12088 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12089 counter_mode = SImode;
12090 else
12091 counter_mode = DImode;
12093 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12094 if (destreg != XEXP (dst, 0))
12095 dst = replace_equiv_address_nv (dst, destreg);
12098 /* When optimizing for size emit simple rep ; movsb instruction for
12099 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12100 sequence is 7 bytes long, so if optimizing for size and count is
12101 small enough that some stosl, stosw and stosb instructions without
12102 rep are shorter, fall back into the next if. */
12104 if ((!optimize || optimize_size)
12105 && (count == 0
12106 || ((count & 0x03)
12107 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12109 emit_insn (gen_cld ());
12111 countreg = ix86_zero_extend_to_Pmode (count_exp);
12112 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12113 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12114 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12116 else if (count != 0
12117 && (align >= 8
12118 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12119 || optimize_size || count < (unsigned int) 64))
12121 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12122 unsigned HOST_WIDE_INT offset = 0;
12124 emit_insn (gen_cld ());
12126 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12127 if (count & ~(size - 1))
12129 unsigned HOST_WIDE_INT repcount;
12130 unsigned int max_nonrep;
12132 repcount = count >> (size == 4 ? 2 : 3);
12133 if (!TARGET_64BIT)
12134 repcount &= 0x3fffffff;
12136 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12137 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12138 bytes. In both cases the latter seems to be faster for small
12139 values of N. */
12140 max_nonrep = size == 4 ? 7 : 4;
12141 if (!optimize_size)
12142 switch (ix86_tune)
12144 case PROCESSOR_PENTIUM4:
12145 case PROCESSOR_NOCONA:
12146 max_nonrep = 3;
12147 break;
12148 default:
12149 break;
12152 if (repcount <= max_nonrep)
12153 while (repcount-- > 0)
12155 rtx mem = adjust_automodify_address_nv (dst,
12156 GET_MODE (zeroreg),
12157 destreg, offset);
12158 emit_insn (gen_strset (destreg, mem, zeroreg));
12159 offset += size;
12161 else
12163 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12164 countreg = ix86_zero_extend_to_Pmode (countreg);
12165 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12166 GEN_INT (size == 4 ? 2 : 3));
12167 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12168 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12169 destexp));
12170 offset = count & ~(size - 1);
12173 if (size == 8 && (count & 0x04))
12175 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12176 offset);
12177 emit_insn (gen_strset (destreg, mem,
12178 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12179 offset += 4;
12181 if (count & 0x02)
12183 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12184 offset);
12185 emit_insn (gen_strset (destreg, mem,
12186 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12187 offset += 2;
12189 if (count & 0x01)
12191 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12192 offset);
12193 emit_insn (gen_strset (destreg, mem,
12194 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12197 else
12199 rtx countreg2;
12200 rtx label = NULL;
12201 /* Compute desired alignment of the string operation. */
12202 int desired_alignment = (TARGET_PENTIUMPRO
12203 && (count == 0 || count >= (unsigned int) 260)
12204 ? 8 : UNITS_PER_WORD);
12206 /* In case we don't know anything about the alignment, default to
12207 library version, since it is usually equally fast and result in
12208 shorter code.
12210 Also emit call when we know that the count is large and call overhead
12211 will not be important. */
12212 if (!TARGET_INLINE_ALL_STRINGOPS
12213 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12214 return 0;
12216 if (TARGET_SINGLE_STRINGOP)
12217 emit_insn (gen_cld ());
12219 countreg2 = gen_reg_rtx (Pmode);
12220 countreg = copy_to_mode_reg (counter_mode, count_exp);
12221 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12222 /* Get rid of MEM_OFFSET, it won't be accurate. */
12223 dst = change_address (dst, BLKmode, destreg);
12225 if (count == 0 && align < desired_alignment)
12227 label = gen_label_rtx ();
12228 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12229 LEU, 0, counter_mode, 1, label);
12231 if (align <= 1)
12233 rtx label = ix86_expand_aligntest (destreg, 1);
12234 emit_insn (gen_strset (destreg, dst,
12235 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12236 ix86_adjust_counter (countreg, 1);
12237 emit_label (label);
12238 LABEL_NUSES (label) = 1;
12240 if (align <= 2)
12242 rtx label = ix86_expand_aligntest (destreg, 2);
12243 emit_insn (gen_strset (destreg, dst,
12244 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12245 ix86_adjust_counter (countreg, 2);
12246 emit_label (label);
12247 LABEL_NUSES (label) = 1;
12249 if (align <= 4 && desired_alignment > 4)
12251 rtx label = ix86_expand_aligntest (destreg, 4);
12252 emit_insn (gen_strset (destreg, dst,
12253 (TARGET_64BIT
12254 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12255 : zeroreg)));
12256 ix86_adjust_counter (countreg, 4);
12257 emit_label (label);
12258 LABEL_NUSES (label) = 1;
12261 if (label && desired_alignment > 4 && !TARGET_64BIT)
12263 emit_label (label);
12264 LABEL_NUSES (label) = 1;
12265 label = NULL_RTX;
12268 if (!TARGET_SINGLE_STRINGOP)
12269 emit_insn (gen_cld ());
12270 if (TARGET_64BIT)
12272 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12273 GEN_INT (3)));
12274 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12276 else
12278 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12279 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12281 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12282 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12284 if (label)
12286 emit_label (label);
12287 LABEL_NUSES (label) = 1;
12290 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12291 emit_insn (gen_strset (destreg, dst,
12292 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12293 if (TARGET_64BIT && (align <= 4 || count == 0))
12295 rtx label = ix86_expand_aligntest (countreg, 4);
12296 emit_insn (gen_strset (destreg, dst,
12297 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12298 emit_label (label);
12299 LABEL_NUSES (label) = 1;
12301 if (align > 2 && count != 0 && (count & 2))
12302 emit_insn (gen_strset (destreg, dst,
12303 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12304 if (align <= 2 || count == 0)
12306 rtx label = ix86_expand_aligntest (countreg, 2);
12307 emit_insn (gen_strset (destreg, dst,
12308 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12309 emit_label (label);
12310 LABEL_NUSES (label) = 1;
12312 if (align > 1 && count != 0 && (count & 1))
12313 emit_insn (gen_strset (destreg, dst,
12314 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12315 if (align <= 1 || count == 0)
12317 rtx label = ix86_expand_aligntest (countreg, 1);
12318 emit_insn (gen_strset (destreg, dst,
12319 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12320 emit_label (label);
12321 LABEL_NUSES (label) = 1;
12324 return 1;
12327 /* Expand strlen. */
12329 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12331 rtx addr, scratch1, scratch2, scratch3, scratch4;
12333 /* The generic case of strlen expander is long. Avoid it's
12334 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12336 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12337 && !TARGET_INLINE_ALL_STRINGOPS
12338 && !optimize_size
12339 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12340 return 0;
12342 addr = force_reg (Pmode, XEXP (src, 0));
12343 scratch1 = gen_reg_rtx (Pmode);
12345 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12346 && !optimize_size)
12348 /* Well it seems that some optimizer does not combine a call like
12349 foo(strlen(bar), strlen(bar));
12350 when the move and the subtraction is done here. It does calculate
12351 the length just once when these instructions are done inside of
12352 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12353 often used and I use one fewer register for the lifetime of
12354 output_strlen_unroll() this is better. */
12356 emit_move_insn (out, addr);
12358 ix86_expand_strlensi_unroll_1 (out, src, align);
12360 /* strlensi_unroll_1 returns the address of the zero at the end of
12361 the string, like memchr(), so compute the length by subtracting
12362 the start address. */
12363 if (TARGET_64BIT)
12364 emit_insn (gen_subdi3 (out, out, addr));
12365 else
12366 emit_insn (gen_subsi3 (out, out, addr));
12368 else
12370 rtx unspec;
12371 scratch2 = gen_reg_rtx (Pmode);
12372 scratch3 = gen_reg_rtx (Pmode);
12373 scratch4 = force_reg (Pmode, constm1_rtx);
12375 emit_move_insn (scratch3, addr);
12376 eoschar = force_reg (QImode, eoschar);
12378 emit_insn (gen_cld ());
12379 src = replace_equiv_address_nv (src, scratch3);
12381 /* If .md starts supporting :P, this can be done in .md. */
12382 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12383 scratch4), UNSPEC_SCAS);
12384 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12385 if (TARGET_64BIT)
12387 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12388 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12390 else
12392 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12393 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12396 return 1;
12399 /* Expand the appropriate insns for doing strlen if not just doing
12400 repnz; scasb
12402 out = result, initialized with the start address
12403 align_rtx = alignment of the address.
12404 scratch = scratch register, initialized with the startaddress when
12405 not aligned, otherwise undefined
12407 This is just the body. It needs the initializations mentioned above and
12408 some address computing at the end. These things are done in i386.md. */
12410 static void
12411 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12413 int align;
12414 rtx tmp;
12415 rtx align_2_label = NULL_RTX;
12416 rtx align_3_label = NULL_RTX;
12417 rtx align_4_label = gen_label_rtx ();
12418 rtx end_0_label = gen_label_rtx ();
12419 rtx mem;
12420 rtx tmpreg = gen_reg_rtx (SImode);
12421 rtx scratch = gen_reg_rtx (SImode);
12422 rtx cmp;
12424 align = 0;
12425 if (GET_CODE (align_rtx) == CONST_INT)
12426 align = INTVAL (align_rtx);
12428 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12430 /* Is there a known alignment and is it less than 4? */
12431 if (align < 4)
12433 rtx scratch1 = gen_reg_rtx (Pmode);
12434 emit_move_insn (scratch1, out);
12435 /* Is there a known alignment and is it not 2? */
12436 if (align != 2)
12438 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12439 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12441 /* Leave just the 3 lower bits. */
12442 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12443 NULL_RTX, 0, OPTAB_WIDEN);
12445 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12446 Pmode, 1, align_4_label);
12447 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12448 Pmode, 1, align_2_label);
12449 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12450 Pmode, 1, align_3_label);
12452 else
12454 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12455 check if is aligned to 4 - byte. */
12457 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12458 NULL_RTX, 0, OPTAB_WIDEN);
12460 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12461 Pmode, 1, align_4_label);
12464 mem = change_address (src, QImode, out);
12466 /* Now compare the bytes. */
12468 /* Compare the first n unaligned byte on a byte per byte basis. */
12469 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12470 QImode, 1, end_0_label);
12472 /* Increment the address. */
12473 if (TARGET_64BIT)
12474 emit_insn (gen_adddi3 (out, out, const1_rtx));
12475 else
12476 emit_insn (gen_addsi3 (out, out, const1_rtx));
12478 /* Not needed with an alignment of 2 */
12479 if (align != 2)
12481 emit_label (align_2_label);
12483 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12484 end_0_label);
12486 if (TARGET_64BIT)
12487 emit_insn (gen_adddi3 (out, out, const1_rtx));
12488 else
12489 emit_insn (gen_addsi3 (out, out, const1_rtx));
12491 emit_label (align_3_label);
12494 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12495 end_0_label);
12497 if (TARGET_64BIT)
12498 emit_insn (gen_adddi3 (out, out, const1_rtx));
12499 else
12500 emit_insn (gen_addsi3 (out, out, const1_rtx));
12503 /* Generate loop to check 4 bytes at a time. It is not a good idea to
12504 align this loop. It gives only huge programs, but does not help to
12505 speed up. */
12506 emit_label (align_4_label);
12508 mem = change_address (src, SImode, out);
12509 emit_move_insn (scratch, mem);
12510 if (TARGET_64BIT)
12511 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12512 else
12513 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12515 /* This formula yields a nonzero result iff one of the bytes is zero.
12516 This saves three branches inside loop and many cycles. */
12518 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12519 emit_insn (gen_one_cmplsi2 (scratch, scratch));
12520 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12521 emit_insn (gen_andsi3 (tmpreg, tmpreg,
12522 gen_int_mode (0x80808080, SImode)));
12523 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12524 align_4_label);
12526 if (TARGET_CMOVE)
12528 rtx reg = gen_reg_rtx (SImode);
12529 rtx reg2 = gen_reg_rtx (Pmode);
12530 emit_move_insn (reg, tmpreg);
12531 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12533 /* If zero is not in the first two bytes, move two bytes forward. */
12534 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12535 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12536 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12537 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12538 gen_rtx_IF_THEN_ELSE (SImode, tmp,
12539 reg,
12540 tmpreg)));
12541 /* Emit lea manually to avoid clobbering of flags. */
12542 emit_insn (gen_rtx_SET (SImode, reg2,
12543 gen_rtx_PLUS (Pmode, out, const2_rtx)));
12545 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12546 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12547 emit_insn (gen_rtx_SET (VOIDmode, out,
12548 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12549 reg2,
12550 out)));
12553 else
12555 rtx end_2_label = gen_label_rtx ();
12556 /* Is zero in the first two bytes? */
12558 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12559 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12560 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12561 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12562 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12563 pc_rtx);
12564 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12565 JUMP_LABEL (tmp) = end_2_label;
12567 /* Not in the first two. Move two bytes forward. */
12568 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12569 if (TARGET_64BIT)
12570 emit_insn (gen_adddi3 (out, out, const2_rtx));
12571 else
12572 emit_insn (gen_addsi3 (out, out, const2_rtx));
12574 emit_label (end_2_label);
12578 /* Avoid branch in fixing the byte. */
12579 tmpreg = gen_lowpart (QImode, tmpreg);
12580 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12581 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12582 if (TARGET_64BIT)
12583 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12584 else
12585 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12587 emit_label (end_0_label);
12590 void
12591 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12592 rtx callarg2 ATTRIBUTE_UNUSED,
12593 rtx pop, int sibcall)
12595 rtx use = NULL, call;
12597 if (pop == const0_rtx)
12598 pop = NULL;
12599 gcc_assert (!TARGET_64BIT || !pop);
12601 #if TARGET_MACHO
12602 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12603 fnaddr = machopic_indirect_call_target (fnaddr);
12604 #else
12605 /* Static functions and indirect calls don't need the pic register. */
12606 if (! TARGET_64BIT && flag_pic
12607 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12608 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12609 use_reg (&use, pic_offset_table_rtx);
12611 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12613 rtx al = gen_rtx_REG (QImode, 0);
12614 emit_move_insn (al, callarg2);
12615 use_reg (&use, al);
12617 #endif /* TARGET_MACHO */
12619 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12621 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12622 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12624 if (sibcall && TARGET_64BIT
12625 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12627 rtx addr;
12628 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12629 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12630 emit_move_insn (fnaddr, addr);
12631 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12634 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12635 if (retval)
12636 call = gen_rtx_SET (VOIDmode, retval, call);
12637 if (pop)
12639 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12640 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12641 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12644 call = emit_call_insn (call);
12645 if (use)
12646 CALL_INSN_FUNCTION_USAGE (call) = use;
12650 /* Clear stack slot assignments remembered from previous functions.
12651 This is called from INIT_EXPANDERS once before RTL is emitted for each
12652 function. */
12654 static struct machine_function *
12655 ix86_init_machine_status (void)
12657 struct machine_function *f;
12659 f = ggc_alloc_cleared (sizeof (struct machine_function));
12660 f->use_fast_prologue_epilogue_nregs = -1;
12662 return f;
12665 /* Return a MEM corresponding to a stack slot with mode MODE.
12666 Allocate a new slot if necessary.
12668 The RTL for a function can have several slots available: N is
12669 which slot to use. */
12672 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
12674 struct stack_local_entry *s;
12676 gcc_assert (n < MAX_386_STACK_LOCALS);
12678 for (s = ix86_stack_locals; s; s = s->next)
12679 if (s->mode == mode && s->n == n)
12680 return s->rtl;
12682 s = (struct stack_local_entry *)
12683 ggc_alloc (sizeof (struct stack_local_entry));
12684 s->n = n;
12685 s->mode = mode;
12686 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12688 s->next = ix86_stack_locals;
12689 ix86_stack_locals = s;
12690 return s->rtl;
12693 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12695 static GTY(()) rtx ix86_tls_symbol;
12697 ix86_tls_get_addr (void)
12700 if (!ix86_tls_symbol)
12702 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12703 (TARGET_GNU_TLS && !TARGET_64BIT)
12704 ? "___tls_get_addr"
12705 : "__tls_get_addr");
12708 return ix86_tls_symbol;
12711 /* Calculate the length of the memory address in the instruction
12712 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12715 memory_address_length (rtx addr)
12717 struct ix86_address parts;
12718 rtx base, index, disp;
12719 int len;
12720 int ok;
12722 if (GET_CODE (addr) == PRE_DEC
12723 || GET_CODE (addr) == POST_INC
12724 || GET_CODE (addr) == PRE_MODIFY
12725 || GET_CODE (addr) == POST_MODIFY)
12726 return 0;
12728 ok = ix86_decompose_address (addr, &parts);
12729 gcc_assert (ok);
12731 if (parts.base && GET_CODE (parts.base) == SUBREG)
12732 parts.base = SUBREG_REG (parts.base);
12733 if (parts.index && GET_CODE (parts.index) == SUBREG)
12734 parts.index = SUBREG_REG (parts.index);
12736 base = parts.base;
12737 index = parts.index;
12738 disp = parts.disp;
12739 len = 0;
12741 /* Rule of thumb:
12742 - esp as the base always wants an index,
12743 - ebp as the base always wants a displacement. */
12745 /* Register Indirect. */
12746 if (base && !index && !disp)
12748 /* esp (for its index) and ebp (for its displacement) need
12749 the two-byte modrm form. */
12750 if (addr == stack_pointer_rtx
12751 || addr == arg_pointer_rtx
12752 || addr == frame_pointer_rtx
12753 || addr == hard_frame_pointer_rtx)
12754 len = 1;
12757 /* Direct Addressing. */
12758 else if (disp && !base && !index)
12759 len = 4;
12761 else
12763 /* Find the length of the displacement constant. */
12764 if (disp)
12766 if (GET_CODE (disp) == CONST_INT
12767 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12768 && base)
12769 len = 1;
12770 else
12771 len = 4;
12773 /* ebp always wants a displacement. */
12774 else if (base == hard_frame_pointer_rtx)
12775 len = 1;
12777 /* An index requires the two-byte modrm form.... */
12778 if (index
12779 /* ...like esp, which always wants an index. */
12780 || base == stack_pointer_rtx
12781 || base == arg_pointer_rtx
12782 || base == frame_pointer_rtx)
12783 len += 1;
12786 return len;
12789 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12790 is set, expect that insn have 8bit immediate alternative. */
12792 ix86_attr_length_immediate_default (rtx insn, int shortform)
12794 int len = 0;
12795 int i;
12796 extract_insn_cached (insn);
12797 for (i = recog_data.n_operands - 1; i >= 0; --i)
12798 if (CONSTANT_P (recog_data.operand[i]))
12800 gcc_assert (!len);
12801 if (shortform
12802 && GET_CODE (recog_data.operand[i]) == CONST_INT
12803 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12804 len = 1;
12805 else
12807 switch (get_attr_mode (insn))
12809 case MODE_QI:
12810 len+=1;
12811 break;
12812 case MODE_HI:
12813 len+=2;
12814 break;
12815 case MODE_SI:
12816 len+=4;
12817 break;
12818 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12819 case MODE_DI:
12820 len+=4;
12821 break;
12822 default:
12823 fatal_insn ("unknown insn mode", insn);
12827 return len;
12829 /* Compute default value for "length_address" attribute. */
12831 ix86_attr_length_address_default (rtx insn)
12833 int i;
12835 if (get_attr_type (insn) == TYPE_LEA)
12837 rtx set = PATTERN (insn);
12839 if (GET_CODE (set) == PARALLEL)
12840 set = XVECEXP (set, 0, 0);
12842 gcc_assert (GET_CODE (set) == SET);
12844 return memory_address_length (SET_SRC (set));
12847 extract_insn_cached (insn);
12848 for (i = recog_data.n_operands - 1; i >= 0; --i)
12849 if (GET_CODE (recog_data.operand[i]) == MEM)
12851 return memory_address_length (XEXP (recog_data.operand[i], 0));
12852 break;
12854 return 0;
12857 /* Return the maximum number of instructions a cpu can issue. */
12859 static int
12860 ix86_issue_rate (void)
12862 switch (ix86_tune)
12864 case PROCESSOR_PENTIUM:
12865 case PROCESSOR_K6:
12866 return 2;
12868 case PROCESSOR_PENTIUMPRO:
12869 case PROCESSOR_PENTIUM4:
12870 case PROCESSOR_ATHLON:
12871 case PROCESSOR_K8:
12872 case PROCESSOR_NOCONA:
12873 return 3;
12875 default:
12876 return 1;
12880 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12881 by DEP_INSN and nothing set by DEP_INSN. */
12883 static int
12884 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12886 rtx set, set2;
12888 /* Simplify the test for uninteresting insns. */
12889 if (insn_type != TYPE_SETCC
12890 && insn_type != TYPE_ICMOV
12891 && insn_type != TYPE_FCMOV
12892 && insn_type != TYPE_IBR)
12893 return 0;
12895 if ((set = single_set (dep_insn)) != 0)
12897 set = SET_DEST (set);
12898 set2 = NULL_RTX;
12900 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12901 && XVECLEN (PATTERN (dep_insn), 0) == 2
12902 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12903 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12905 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12906 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12908 else
12909 return 0;
12911 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12912 return 0;
12914 /* This test is true if the dependent insn reads the flags but
12915 not any other potentially set register. */
12916 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12917 return 0;
12919 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12920 return 0;
12922 return 1;
12925 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12926 address with operands set by DEP_INSN. */
12928 static int
12929 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12931 rtx addr;
12933 if (insn_type == TYPE_LEA
12934 && TARGET_PENTIUM)
12936 addr = PATTERN (insn);
12938 if (GET_CODE (addr) == PARALLEL)
12939 addr = XVECEXP (addr, 0, 0);
12941 gcc_assert (GET_CODE (addr) == SET);
12943 addr = SET_SRC (addr);
12945 else
12947 int i;
12948 extract_insn_cached (insn);
12949 for (i = recog_data.n_operands - 1; i >= 0; --i)
12950 if (GET_CODE (recog_data.operand[i]) == MEM)
12952 addr = XEXP (recog_data.operand[i], 0);
12953 goto found;
12955 return 0;
12956 found:;
12959 return modified_in_p (addr, dep_insn);
12962 static int
12963 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12965 enum attr_type insn_type, dep_insn_type;
12966 enum attr_memory memory;
12967 rtx set, set2;
12968 int dep_insn_code_number;
12970 /* Anti and output dependencies have zero cost on all CPUs. */
12971 if (REG_NOTE_KIND (link) != 0)
12972 return 0;
12974 dep_insn_code_number = recog_memoized (dep_insn);
12976 /* If we can't recognize the insns, we can't really do anything. */
12977 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12978 return cost;
12980 insn_type = get_attr_type (insn);
12981 dep_insn_type = get_attr_type (dep_insn);
12983 switch (ix86_tune)
12985 case PROCESSOR_PENTIUM:
12986 /* Address Generation Interlock adds a cycle of latency. */
12987 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12988 cost += 1;
12990 /* ??? Compares pair with jump/setcc. */
12991 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12992 cost = 0;
12994 /* Floating point stores require value to be ready one cycle earlier. */
12995 if (insn_type == TYPE_FMOV
12996 && get_attr_memory (insn) == MEMORY_STORE
12997 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12998 cost += 1;
12999 break;
13001 case PROCESSOR_PENTIUMPRO:
13002 memory = get_attr_memory (insn);
13004 /* INT->FP conversion is expensive. */
13005 if (get_attr_fp_int_src (dep_insn))
13006 cost += 5;
13008 /* There is one cycle extra latency between an FP op and a store. */
13009 if (insn_type == TYPE_FMOV
13010 && (set = single_set (dep_insn)) != NULL_RTX
13011 && (set2 = single_set (insn)) != NULL_RTX
13012 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13013 && GET_CODE (SET_DEST (set2)) == MEM)
13014 cost += 1;
13016 /* Show ability of reorder buffer to hide latency of load by executing
13017 in parallel with previous instruction in case
13018 previous instruction is not needed to compute the address. */
13019 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13020 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13022 /* Claim moves to take one cycle, as core can issue one load
13023 at time and the next load can start cycle later. */
13024 if (dep_insn_type == TYPE_IMOV
13025 || dep_insn_type == TYPE_FMOV)
13026 cost = 1;
13027 else if (cost > 1)
13028 cost--;
13030 break;
13032 case PROCESSOR_K6:
13033 memory = get_attr_memory (insn);
13035 /* The esp dependency is resolved before the instruction is really
13036 finished. */
13037 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13038 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13039 return 1;
13041 /* INT->FP conversion is expensive. */
13042 if (get_attr_fp_int_src (dep_insn))
13043 cost += 5;
13045 /* Show ability of reorder buffer to hide latency of load by executing
13046 in parallel with previous instruction in case
13047 previous instruction is not needed to compute the address. */
13048 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13049 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13051 /* Claim moves to take one cycle, as core can issue one load
13052 at time and the next load can start cycle later. */
13053 if (dep_insn_type == TYPE_IMOV
13054 || dep_insn_type == TYPE_FMOV)
13055 cost = 1;
13056 else if (cost > 2)
13057 cost -= 2;
13058 else
13059 cost = 1;
13061 break;
13063 case PROCESSOR_ATHLON:
13064 case PROCESSOR_K8:
13065 memory = get_attr_memory (insn);
13067 /* Show ability of reorder buffer to hide latency of load by executing
13068 in parallel with previous instruction in case
13069 previous instruction is not needed to compute the address. */
13070 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13071 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13073 enum attr_unit unit = get_attr_unit (insn);
13074 int loadcost = 3;
13076 /* Because of the difference between the length of integer and
13077 floating unit pipeline preparation stages, the memory operands
13078 for floating point are cheaper.
13080 ??? For Athlon it the difference is most probably 2. */
13081 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13082 loadcost = 3;
13083 else
13084 loadcost = TARGET_ATHLON ? 2 : 0;
13086 if (cost >= loadcost)
13087 cost -= loadcost;
13088 else
13089 cost = 0;
13092 default:
13093 break;
13096 return cost;
13099 /* How many alternative schedules to try. This should be as wide as the
13100 scheduling freedom in the DFA, but no wider. Making this value too
13101 large results extra work for the scheduler. */
13103 static int
13104 ia32_multipass_dfa_lookahead (void)
13106 if (ix86_tune == PROCESSOR_PENTIUM)
13107 return 2;
13109 if (ix86_tune == PROCESSOR_PENTIUMPRO
13110 || ix86_tune == PROCESSOR_K6)
13111 return 1;
13113 else
13114 return 0;
13118 /* Compute the alignment given to a constant that is being placed in memory.
13119 EXP is the constant and ALIGN is the alignment that the object would
13120 ordinarily have.
13121 The value of this function is used instead of that alignment to align
13122 the object. */
13125 ix86_constant_alignment (tree exp, int align)
13127 if (TREE_CODE (exp) == REAL_CST)
13129 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13130 return 64;
13131 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13132 return 128;
13134 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13135 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13136 return BITS_PER_WORD;
13138 return align;
13141 /* Compute the alignment for a static variable.
13142 TYPE is the data type, and ALIGN is the alignment that
13143 the object would ordinarily have. The value of this function is used
13144 instead of that alignment to align the object. */
13147 ix86_data_alignment (tree type, int align)
13149 if (AGGREGATE_TYPE_P (type)
13150 && TYPE_SIZE (type)
13151 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13152 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
13153 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
13154 return 256;
13156 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13157 to 16byte boundary. */
13158 if (TARGET_64BIT)
13160 if (AGGREGATE_TYPE_P (type)
13161 && TYPE_SIZE (type)
13162 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13163 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13164 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13165 return 128;
13168 if (TREE_CODE (type) == ARRAY_TYPE)
13170 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13171 return 64;
13172 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13173 return 128;
13175 else if (TREE_CODE (type) == COMPLEX_TYPE)
13178 if (TYPE_MODE (type) == DCmode && align < 64)
13179 return 64;
13180 if (TYPE_MODE (type) == XCmode && align < 128)
13181 return 128;
13183 else if ((TREE_CODE (type) == RECORD_TYPE
13184 || TREE_CODE (type) == UNION_TYPE
13185 || TREE_CODE (type) == QUAL_UNION_TYPE)
13186 && TYPE_FIELDS (type))
13188 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13189 return 64;
13190 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13191 return 128;
13193 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13194 || TREE_CODE (type) == INTEGER_TYPE)
13196 if (TYPE_MODE (type) == DFmode && align < 64)
13197 return 64;
13198 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13199 return 128;
13202 return align;
13205 /* Compute the alignment for a local variable.
13206 TYPE is the data type, and ALIGN is the alignment that
13207 the object would ordinarily have. The value of this macro is used
13208 instead of that alignment to align the object. */
13211 ix86_local_alignment (tree type, int align)
13213 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13214 to 16byte boundary. */
13215 if (TARGET_64BIT)
13217 if (AGGREGATE_TYPE_P (type)
13218 && TYPE_SIZE (type)
13219 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13220 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13221 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13222 return 128;
13224 if (TREE_CODE (type) == ARRAY_TYPE)
13226 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13227 return 64;
13228 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13229 return 128;
13231 else if (TREE_CODE (type) == COMPLEX_TYPE)
13233 if (TYPE_MODE (type) == DCmode && align < 64)
13234 return 64;
13235 if (TYPE_MODE (type) == XCmode && align < 128)
13236 return 128;
13238 else if ((TREE_CODE (type) == RECORD_TYPE
13239 || TREE_CODE (type) == UNION_TYPE
13240 || TREE_CODE (type) == QUAL_UNION_TYPE)
13241 && TYPE_FIELDS (type))
13243 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13244 return 64;
13245 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13246 return 128;
13248 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13249 || TREE_CODE (type) == INTEGER_TYPE)
13252 if (TYPE_MODE (type) == DFmode && align < 64)
13253 return 64;
13254 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13255 return 128;
13257 return align;
13260 /* Emit RTL insns to initialize the variable parts of a trampoline.
13261 FNADDR is an RTX for the address of the function's pure code.
13262 CXT is an RTX for the static chain value for the function. */
13263 void
13264 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13266 if (!TARGET_64BIT)
13268 /* Compute offset from the end of the jmp to the target function. */
13269 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13270 plus_constant (tramp, 10),
13271 NULL_RTX, 1, OPTAB_DIRECT);
13272 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13273 gen_int_mode (0xb9, QImode));
13274 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13275 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13276 gen_int_mode (0xe9, QImode));
13277 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13279 else
13281 int offset = 0;
13282 /* Try to load address using shorter movl instead of movabs.
13283 We may want to support movq for kernel mode, but kernel does not use
13284 trampolines at the moment. */
13285 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13287 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13288 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13289 gen_int_mode (0xbb41, HImode));
13290 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13291 gen_lowpart (SImode, fnaddr));
13292 offset += 6;
13294 else
13296 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13297 gen_int_mode (0xbb49, HImode));
13298 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13299 fnaddr);
13300 offset += 10;
13302 /* Load static chain using movabs to r10. */
13303 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13304 gen_int_mode (0xba49, HImode));
13305 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13306 cxt);
13307 offset += 10;
13308 /* Jump to the r11 */
13309 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13310 gen_int_mode (0xff49, HImode));
13311 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13312 gen_int_mode (0xe3, QImode));
13313 offset += 3;
13314 gcc_assert (offset <= TRAMPOLINE_SIZE);
13317 #ifdef ENABLE_EXECUTE_STACK
13318 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13319 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13320 #endif
13323 /* Codes for all the SSE/MMX builtins. */
13324 enum ix86_builtins
13326 IX86_BUILTIN_ADDPS,
13327 IX86_BUILTIN_ADDSS,
13328 IX86_BUILTIN_DIVPS,
13329 IX86_BUILTIN_DIVSS,
13330 IX86_BUILTIN_MULPS,
13331 IX86_BUILTIN_MULSS,
13332 IX86_BUILTIN_SUBPS,
13333 IX86_BUILTIN_SUBSS,
13335 IX86_BUILTIN_CMPEQPS,
13336 IX86_BUILTIN_CMPLTPS,
13337 IX86_BUILTIN_CMPLEPS,
13338 IX86_BUILTIN_CMPGTPS,
13339 IX86_BUILTIN_CMPGEPS,
13340 IX86_BUILTIN_CMPNEQPS,
13341 IX86_BUILTIN_CMPNLTPS,
13342 IX86_BUILTIN_CMPNLEPS,
13343 IX86_BUILTIN_CMPNGTPS,
13344 IX86_BUILTIN_CMPNGEPS,
13345 IX86_BUILTIN_CMPORDPS,
13346 IX86_BUILTIN_CMPUNORDPS,
13347 IX86_BUILTIN_CMPNEPS,
13348 IX86_BUILTIN_CMPEQSS,
13349 IX86_BUILTIN_CMPLTSS,
13350 IX86_BUILTIN_CMPLESS,
13351 IX86_BUILTIN_CMPNEQSS,
13352 IX86_BUILTIN_CMPNLTSS,
13353 IX86_BUILTIN_CMPNLESS,
13354 IX86_BUILTIN_CMPNGTSS,
13355 IX86_BUILTIN_CMPNGESS,
13356 IX86_BUILTIN_CMPORDSS,
13357 IX86_BUILTIN_CMPUNORDSS,
13358 IX86_BUILTIN_CMPNESS,
13360 IX86_BUILTIN_COMIEQSS,
13361 IX86_BUILTIN_COMILTSS,
13362 IX86_BUILTIN_COMILESS,
13363 IX86_BUILTIN_COMIGTSS,
13364 IX86_BUILTIN_COMIGESS,
13365 IX86_BUILTIN_COMINEQSS,
13366 IX86_BUILTIN_UCOMIEQSS,
13367 IX86_BUILTIN_UCOMILTSS,
13368 IX86_BUILTIN_UCOMILESS,
13369 IX86_BUILTIN_UCOMIGTSS,
13370 IX86_BUILTIN_UCOMIGESS,
13371 IX86_BUILTIN_UCOMINEQSS,
13373 IX86_BUILTIN_CVTPI2PS,
13374 IX86_BUILTIN_CVTPS2PI,
13375 IX86_BUILTIN_CVTSI2SS,
13376 IX86_BUILTIN_CVTSI642SS,
13377 IX86_BUILTIN_CVTSS2SI,
13378 IX86_BUILTIN_CVTSS2SI64,
13379 IX86_BUILTIN_CVTTPS2PI,
13380 IX86_BUILTIN_CVTTSS2SI,
13381 IX86_BUILTIN_CVTTSS2SI64,
13383 IX86_BUILTIN_MAXPS,
13384 IX86_BUILTIN_MAXSS,
13385 IX86_BUILTIN_MINPS,
13386 IX86_BUILTIN_MINSS,
13388 IX86_BUILTIN_LOADUPS,
13389 IX86_BUILTIN_STOREUPS,
13390 IX86_BUILTIN_MOVSS,
13392 IX86_BUILTIN_MOVHLPS,
13393 IX86_BUILTIN_MOVLHPS,
13394 IX86_BUILTIN_LOADHPS,
13395 IX86_BUILTIN_LOADLPS,
13396 IX86_BUILTIN_STOREHPS,
13397 IX86_BUILTIN_STORELPS,
13399 IX86_BUILTIN_MASKMOVQ,
13400 IX86_BUILTIN_MOVMSKPS,
13401 IX86_BUILTIN_PMOVMSKB,
13403 IX86_BUILTIN_MOVNTPS,
13404 IX86_BUILTIN_MOVNTQ,
13406 IX86_BUILTIN_LOADDQU,
13407 IX86_BUILTIN_STOREDQU,
13409 IX86_BUILTIN_PACKSSWB,
13410 IX86_BUILTIN_PACKSSDW,
13411 IX86_BUILTIN_PACKUSWB,
13413 IX86_BUILTIN_PADDB,
13414 IX86_BUILTIN_PADDW,
13415 IX86_BUILTIN_PADDD,
13416 IX86_BUILTIN_PADDQ,
13417 IX86_BUILTIN_PADDSB,
13418 IX86_BUILTIN_PADDSW,
13419 IX86_BUILTIN_PADDUSB,
13420 IX86_BUILTIN_PADDUSW,
13421 IX86_BUILTIN_PSUBB,
13422 IX86_BUILTIN_PSUBW,
13423 IX86_BUILTIN_PSUBD,
13424 IX86_BUILTIN_PSUBQ,
13425 IX86_BUILTIN_PSUBSB,
13426 IX86_BUILTIN_PSUBSW,
13427 IX86_BUILTIN_PSUBUSB,
13428 IX86_BUILTIN_PSUBUSW,
13430 IX86_BUILTIN_PAND,
13431 IX86_BUILTIN_PANDN,
13432 IX86_BUILTIN_POR,
13433 IX86_BUILTIN_PXOR,
13435 IX86_BUILTIN_PAVGB,
13436 IX86_BUILTIN_PAVGW,
13438 IX86_BUILTIN_PCMPEQB,
13439 IX86_BUILTIN_PCMPEQW,
13440 IX86_BUILTIN_PCMPEQD,
13441 IX86_BUILTIN_PCMPGTB,
13442 IX86_BUILTIN_PCMPGTW,
13443 IX86_BUILTIN_PCMPGTD,
13445 IX86_BUILTIN_PMADDWD,
13447 IX86_BUILTIN_PMAXSW,
13448 IX86_BUILTIN_PMAXUB,
13449 IX86_BUILTIN_PMINSW,
13450 IX86_BUILTIN_PMINUB,
13452 IX86_BUILTIN_PMULHUW,
13453 IX86_BUILTIN_PMULHW,
13454 IX86_BUILTIN_PMULLW,
13456 IX86_BUILTIN_PSADBW,
13457 IX86_BUILTIN_PSHUFW,
13459 IX86_BUILTIN_PSLLW,
13460 IX86_BUILTIN_PSLLD,
13461 IX86_BUILTIN_PSLLQ,
13462 IX86_BUILTIN_PSRAW,
13463 IX86_BUILTIN_PSRAD,
13464 IX86_BUILTIN_PSRLW,
13465 IX86_BUILTIN_PSRLD,
13466 IX86_BUILTIN_PSRLQ,
13467 IX86_BUILTIN_PSLLWI,
13468 IX86_BUILTIN_PSLLDI,
13469 IX86_BUILTIN_PSLLQI,
13470 IX86_BUILTIN_PSRAWI,
13471 IX86_BUILTIN_PSRADI,
13472 IX86_BUILTIN_PSRLWI,
13473 IX86_BUILTIN_PSRLDI,
13474 IX86_BUILTIN_PSRLQI,
13476 IX86_BUILTIN_PUNPCKHBW,
13477 IX86_BUILTIN_PUNPCKHWD,
13478 IX86_BUILTIN_PUNPCKHDQ,
13479 IX86_BUILTIN_PUNPCKLBW,
13480 IX86_BUILTIN_PUNPCKLWD,
13481 IX86_BUILTIN_PUNPCKLDQ,
13483 IX86_BUILTIN_SHUFPS,
13485 IX86_BUILTIN_RCPPS,
13486 IX86_BUILTIN_RCPSS,
13487 IX86_BUILTIN_RSQRTPS,
13488 IX86_BUILTIN_RSQRTSS,
13489 IX86_BUILTIN_SQRTPS,
13490 IX86_BUILTIN_SQRTSS,
13492 IX86_BUILTIN_UNPCKHPS,
13493 IX86_BUILTIN_UNPCKLPS,
13495 IX86_BUILTIN_ANDPS,
13496 IX86_BUILTIN_ANDNPS,
13497 IX86_BUILTIN_ORPS,
13498 IX86_BUILTIN_XORPS,
13500 IX86_BUILTIN_EMMS,
13501 IX86_BUILTIN_LDMXCSR,
13502 IX86_BUILTIN_STMXCSR,
13503 IX86_BUILTIN_SFENCE,
13505 /* 3DNow! Original */
13506 IX86_BUILTIN_FEMMS,
13507 IX86_BUILTIN_PAVGUSB,
13508 IX86_BUILTIN_PF2ID,
13509 IX86_BUILTIN_PFACC,
13510 IX86_BUILTIN_PFADD,
13511 IX86_BUILTIN_PFCMPEQ,
13512 IX86_BUILTIN_PFCMPGE,
13513 IX86_BUILTIN_PFCMPGT,
13514 IX86_BUILTIN_PFMAX,
13515 IX86_BUILTIN_PFMIN,
13516 IX86_BUILTIN_PFMUL,
13517 IX86_BUILTIN_PFRCP,
13518 IX86_BUILTIN_PFRCPIT1,
13519 IX86_BUILTIN_PFRCPIT2,
13520 IX86_BUILTIN_PFRSQIT1,
13521 IX86_BUILTIN_PFRSQRT,
13522 IX86_BUILTIN_PFSUB,
13523 IX86_BUILTIN_PFSUBR,
13524 IX86_BUILTIN_PI2FD,
13525 IX86_BUILTIN_PMULHRW,
13527 /* 3DNow! Athlon Extensions */
13528 IX86_BUILTIN_PF2IW,
13529 IX86_BUILTIN_PFNACC,
13530 IX86_BUILTIN_PFPNACC,
13531 IX86_BUILTIN_PI2FW,
13532 IX86_BUILTIN_PSWAPDSI,
13533 IX86_BUILTIN_PSWAPDSF,
13535 /* SSE2 */
13536 IX86_BUILTIN_ADDPD,
13537 IX86_BUILTIN_ADDSD,
13538 IX86_BUILTIN_DIVPD,
13539 IX86_BUILTIN_DIVSD,
13540 IX86_BUILTIN_MULPD,
13541 IX86_BUILTIN_MULSD,
13542 IX86_BUILTIN_SUBPD,
13543 IX86_BUILTIN_SUBSD,
13545 IX86_BUILTIN_CMPEQPD,
13546 IX86_BUILTIN_CMPLTPD,
13547 IX86_BUILTIN_CMPLEPD,
13548 IX86_BUILTIN_CMPGTPD,
13549 IX86_BUILTIN_CMPGEPD,
13550 IX86_BUILTIN_CMPNEQPD,
13551 IX86_BUILTIN_CMPNLTPD,
13552 IX86_BUILTIN_CMPNLEPD,
13553 IX86_BUILTIN_CMPNGTPD,
13554 IX86_BUILTIN_CMPNGEPD,
13555 IX86_BUILTIN_CMPORDPD,
13556 IX86_BUILTIN_CMPUNORDPD,
13557 IX86_BUILTIN_CMPNEPD,
13558 IX86_BUILTIN_CMPEQSD,
13559 IX86_BUILTIN_CMPLTSD,
13560 IX86_BUILTIN_CMPLESD,
13561 IX86_BUILTIN_CMPNEQSD,
13562 IX86_BUILTIN_CMPNLTSD,
13563 IX86_BUILTIN_CMPNLESD,
13564 IX86_BUILTIN_CMPORDSD,
13565 IX86_BUILTIN_CMPUNORDSD,
13566 IX86_BUILTIN_CMPNESD,
13568 IX86_BUILTIN_COMIEQSD,
13569 IX86_BUILTIN_COMILTSD,
13570 IX86_BUILTIN_COMILESD,
13571 IX86_BUILTIN_COMIGTSD,
13572 IX86_BUILTIN_COMIGESD,
13573 IX86_BUILTIN_COMINEQSD,
13574 IX86_BUILTIN_UCOMIEQSD,
13575 IX86_BUILTIN_UCOMILTSD,
13576 IX86_BUILTIN_UCOMILESD,
13577 IX86_BUILTIN_UCOMIGTSD,
13578 IX86_BUILTIN_UCOMIGESD,
13579 IX86_BUILTIN_UCOMINEQSD,
13581 IX86_BUILTIN_MAXPD,
13582 IX86_BUILTIN_MAXSD,
13583 IX86_BUILTIN_MINPD,
13584 IX86_BUILTIN_MINSD,
13586 IX86_BUILTIN_ANDPD,
13587 IX86_BUILTIN_ANDNPD,
13588 IX86_BUILTIN_ORPD,
13589 IX86_BUILTIN_XORPD,
13591 IX86_BUILTIN_SQRTPD,
13592 IX86_BUILTIN_SQRTSD,
13594 IX86_BUILTIN_UNPCKHPD,
13595 IX86_BUILTIN_UNPCKLPD,
13597 IX86_BUILTIN_SHUFPD,
13599 IX86_BUILTIN_LOADUPD,
13600 IX86_BUILTIN_STOREUPD,
13601 IX86_BUILTIN_MOVSD,
13603 IX86_BUILTIN_LOADHPD,
13604 IX86_BUILTIN_LOADLPD,
13606 IX86_BUILTIN_CVTDQ2PD,
13607 IX86_BUILTIN_CVTDQ2PS,
13609 IX86_BUILTIN_CVTPD2DQ,
13610 IX86_BUILTIN_CVTPD2PI,
13611 IX86_BUILTIN_CVTPD2PS,
13612 IX86_BUILTIN_CVTTPD2DQ,
13613 IX86_BUILTIN_CVTTPD2PI,
13615 IX86_BUILTIN_CVTPI2PD,
13616 IX86_BUILTIN_CVTSI2SD,
13617 IX86_BUILTIN_CVTSI642SD,
13619 IX86_BUILTIN_CVTSD2SI,
13620 IX86_BUILTIN_CVTSD2SI64,
13621 IX86_BUILTIN_CVTSD2SS,
13622 IX86_BUILTIN_CVTSS2SD,
13623 IX86_BUILTIN_CVTTSD2SI,
13624 IX86_BUILTIN_CVTTSD2SI64,
13626 IX86_BUILTIN_CVTPS2DQ,
13627 IX86_BUILTIN_CVTPS2PD,
13628 IX86_BUILTIN_CVTTPS2DQ,
13630 IX86_BUILTIN_MOVNTI,
13631 IX86_BUILTIN_MOVNTPD,
13632 IX86_BUILTIN_MOVNTDQ,
13634 /* SSE2 MMX */
13635 IX86_BUILTIN_MASKMOVDQU,
13636 IX86_BUILTIN_MOVMSKPD,
13637 IX86_BUILTIN_PMOVMSKB128,
13639 IX86_BUILTIN_PACKSSWB128,
13640 IX86_BUILTIN_PACKSSDW128,
13641 IX86_BUILTIN_PACKUSWB128,
13643 IX86_BUILTIN_PADDB128,
13644 IX86_BUILTIN_PADDW128,
13645 IX86_BUILTIN_PADDD128,
13646 IX86_BUILTIN_PADDQ128,
13647 IX86_BUILTIN_PADDSB128,
13648 IX86_BUILTIN_PADDSW128,
13649 IX86_BUILTIN_PADDUSB128,
13650 IX86_BUILTIN_PADDUSW128,
13651 IX86_BUILTIN_PSUBB128,
13652 IX86_BUILTIN_PSUBW128,
13653 IX86_BUILTIN_PSUBD128,
13654 IX86_BUILTIN_PSUBQ128,
13655 IX86_BUILTIN_PSUBSB128,
13656 IX86_BUILTIN_PSUBSW128,
13657 IX86_BUILTIN_PSUBUSB128,
13658 IX86_BUILTIN_PSUBUSW128,
13660 IX86_BUILTIN_PAND128,
13661 IX86_BUILTIN_PANDN128,
13662 IX86_BUILTIN_POR128,
13663 IX86_BUILTIN_PXOR128,
13665 IX86_BUILTIN_PAVGB128,
13666 IX86_BUILTIN_PAVGW128,
13668 IX86_BUILTIN_PCMPEQB128,
13669 IX86_BUILTIN_PCMPEQW128,
13670 IX86_BUILTIN_PCMPEQD128,
13671 IX86_BUILTIN_PCMPGTB128,
13672 IX86_BUILTIN_PCMPGTW128,
13673 IX86_BUILTIN_PCMPGTD128,
13675 IX86_BUILTIN_PMADDWD128,
13677 IX86_BUILTIN_PMAXSW128,
13678 IX86_BUILTIN_PMAXUB128,
13679 IX86_BUILTIN_PMINSW128,
13680 IX86_BUILTIN_PMINUB128,
13682 IX86_BUILTIN_PMULUDQ,
13683 IX86_BUILTIN_PMULUDQ128,
13684 IX86_BUILTIN_PMULHUW128,
13685 IX86_BUILTIN_PMULHW128,
13686 IX86_BUILTIN_PMULLW128,
13688 IX86_BUILTIN_PSADBW128,
13689 IX86_BUILTIN_PSHUFHW,
13690 IX86_BUILTIN_PSHUFLW,
13691 IX86_BUILTIN_PSHUFD,
13693 IX86_BUILTIN_PSLLW128,
13694 IX86_BUILTIN_PSLLD128,
13695 IX86_BUILTIN_PSLLQ128,
13696 IX86_BUILTIN_PSRAW128,
13697 IX86_BUILTIN_PSRAD128,
13698 IX86_BUILTIN_PSRLW128,
13699 IX86_BUILTIN_PSRLD128,
13700 IX86_BUILTIN_PSRLQ128,
13701 IX86_BUILTIN_PSLLDQI128,
13702 IX86_BUILTIN_PSLLWI128,
13703 IX86_BUILTIN_PSLLDI128,
13704 IX86_BUILTIN_PSLLQI128,
13705 IX86_BUILTIN_PSRAWI128,
13706 IX86_BUILTIN_PSRADI128,
13707 IX86_BUILTIN_PSRLDQI128,
13708 IX86_BUILTIN_PSRLWI128,
13709 IX86_BUILTIN_PSRLDI128,
13710 IX86_BUILTIN_PSRLQI128,
13712 IX86_BUILTIN_PUNPCKHBW128,
13713 IX86_BUILTIN_PUNPCKHWD128,
13714 IX86_BUILTIN_PUNPCKHDQ128,
13715 IX86_BUILTIN_PUNPCKHQDQ128,
13716 IX86_BUILTIN_PUNPCKLBW128,
13717 IX86_BUILTIN_PUNPCKLWD128,
13718 IX86_BUILTIN_PUNPCKLDQ128,
13719 IX86_BUILTIN_PUNPCKLQDQ128,
13721 IX86_BUILTIN_CLFLUSH,
13722 IX86_BUILTIN_MFENCE,
13723 IX86_BUILTIN_LFENCE,
13725 /* Prescott New Instructions. */
13726 IX86_BUILTIN_ADDSUBPS,
13727 IX86_BUILTIN_HADDPS,
13728 IX86_BUILTIN_HSUBPS,
13729 IX86_BUILTIN_MOVSHDUP,
13730 IX86_BUILTIN_MOVSLDUP,
13731 IX86_BUILTIN_ADDSUBPD,
13732 IX86_BUILTIN_HADDPD,
13733 IX86_BUILTIN_HSUBPD,
13734 IX86_BUILTIN_LDDQU,
13736 IX86_BUILTIN_MONITOR,
13737 IX86_BUILTIN_MWAIT,
13739 IX86_BUILTIN_VEC_INIT_V2SI,
13740 IX86_BUILTIN_VEC_INIT_V4HI,
13741 IX86_BUILTIN_VEC_INIT_V8QI,
13742 IX86_BUILTIN_VEC_EXT_V2DF,
13743 IX86_BUILTIN_VEC_EXT_V2DI,
13744 IX86_BUILTIN_VEC_EXT_V4SF,
13745 IX86_BUILTIN_VEC_EXT_V4SI,
13746 IX86_BUILTIN_VEC_EXT_V8HI,
13747 IX86_BUILTIN_VEC_EXT_V2SI,
13748 IX86_BUILTIN_VEC_EXT_V4HI,
13749 IX86_BUILTIN_VEC_SET_V8HI,
13750 IX86_BUILTIN_VEC_SET_V4HI,
13752 IX86_BUILTIN_MAX
13755 #define def_builtin(MASK, NAME, TYPE, CODE) \
13756 do { \
13757 if ((MASK) & target_flags \
13758 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13759 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13760 NULL, NULL_TREE); \
13761 } while (0)
13763 /* Bits for builtin_description.flag. */
13765 /* Set when we don't support the comparison natively, and should
13766 swap_comparison in order to support it. */
13767 #define BUILTIN_DESC_SWAP_OPERANDS 1
13769 struct builtin_description
13771 const unsigned int mask;
13772 const enum insn_code icode;
13773 const char *const name;
13774 const enum ix86_builtins code;
13775 const enum rtx_code comparison;
13776 const unsigned int flag;
13779 static const struct builtin_description bdesc_comi[] =
13781 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13782 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13783 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13784 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13785 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13786 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13787 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13788 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13789 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13790 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13791 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13792 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13793 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13794 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13795 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13796 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13797 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13798 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13799 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13800 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13801 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13802 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13803 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13804 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13807 static const struct builtin_description bdesc_2arg[] =
13809 /* SSE */
13810 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13811 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13812 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13813 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13814 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13815 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13816 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13817 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13819 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13820 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13821 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13822 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13823 BUILTIN_DESC_SWAP_OPERANDS },
13824 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13825 BUILTIN_DESC_SWAP_OPERANDS },
13826 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13827 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13828 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13829 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13830 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13831 BUILTIN_DESC_SWAP_OPERANDS },
13832 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13833 BUILTIN_DESC_SWAP_OPERANDS },
13834 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13835 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13836 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13837 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13838 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13839 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13840 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13841 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13842 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13843 BUILTIN_DESC_SWAP_OPERANDS },
13844 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13845 BUILTIN_DESC_SWAP_OPERANDS },
13846 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13848 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13849 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13850 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13851 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13853 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13854 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13855 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13856 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13858 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13859 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13860 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13861 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13862 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13864 /* MMX */
13865 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13866 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13867 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13868 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13869 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13870 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13871 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13872 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13874 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13875 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13876 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13877 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13878 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13879 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13880 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13881 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13883 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13884 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13885 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13887 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13888 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13889 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13890 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13892 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13893 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13895 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13896 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13897 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13898 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13899 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13900 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13903 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13904 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13905 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13907 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13908 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13909 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13910 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13911 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13912 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13914 /* Special. */
13915 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13916 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13917 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13919 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13920 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13921 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13923 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13924 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13925 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13926 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13927 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13928 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13930 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13931 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13932 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13933 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13934 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13935 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13937 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13938 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13939 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13940 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13942 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13943 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13945 /* SSE2 */
13946 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13947 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13948 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13949 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13950 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13951 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13952 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13953 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13955 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13956 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13957 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13958 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13959 BUILTIN_DESC_SWAP_OPERANDS },
13960 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13961 BUILTIN_DESC_SWAP_OPERANDS },
13962 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13963 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13964 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13965 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13966 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13967 BUILTIN_DESC_SWAP_OPERANDS },
13968 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13969 BUILTIN_DESC_SWAP_OPERANDS },
13970 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13971 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13972 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13973 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13974 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13975 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13976 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13977 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13978 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13980 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13981 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13982 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13983 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13985 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13986 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13987 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13988 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13990 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13991 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13992 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13994 /* SSE2 MMX */
13995 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13996 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13997 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13998 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13999 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14000 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14001 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14002 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14004 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14005 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14006 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14007 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14008 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14009 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14010 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14011 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14013 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14014 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14016 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14017 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14018 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14019 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14021 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14022 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14024 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14025 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14026 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14027 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14028 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14029 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14031 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14032 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14033 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14034 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14036 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14037 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14038 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14039 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14040 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14041 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14042 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14043 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14045 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14046 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14047 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14049 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14050 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14052 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14053 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14055 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14056 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14057 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14059 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14060 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14061 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14063 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14064 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14066 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14068 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14069 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14070 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14071 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14073 /* SSE3 MMX */
14074 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14075 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14076 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14077 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14078 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14079 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14082 static const struct builtin_description bdesc_1arg[] =
14084 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14085 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14087 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14088 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14089 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14091 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14092 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14093 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14094 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14095 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14096 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14098 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14099 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14101 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14103 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14104 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14106 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14107 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14108 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14109 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14110 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14112 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14114 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14115 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14116 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14117 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14119 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14120 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14121 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14123 /* SSE3 */
14124 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14125 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14128 static void
14129 ix86_init_builtins (void)
14131 if (TARGET_MMX)
14132 ix86_init_mmx_sse_builtins ();
14135 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14136 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14137 builtins. */
14138 static void
14139 ix86_init_mmx_sse_builtins (void)
14141 const struct builtin_description * d;
14142 size_t i;
14144 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14145 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14146 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14147 tree V2DI_type_node
14148 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14149 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14150 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14151 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14152 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14153 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14154 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14156 tree pchar_type_node = build_pointer_type (char_type_node);
14157 tree pcchar_type_node = build_pointer_type (
14158 build_type_variant (char_type_node, 1, 0));
14159 tree pfloat_type_node = build_pointer_type (float_type_node);
14160 tree pcfloat_type_node = build_pointer_type (
14161 build_type_variant (float_type_node, 1, 0));
14162 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14163 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14164 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14166 /* Comparisons. */
14167 tree int_ftype_v4sf_v4sf
14168 = build_function_type_list (integer_type_node,
14169 V4SF_type_node, V4SF_type_node, NULL_TREE);
14170 tree v4si_ftype_v4sf_v4sf
14171 = build_function_type_list (V4SI_type_node,
14172 V4SF_type_node, V4SF_type_node, NULL_TREE);
14173 /* MMX/SSE/integer conversions. */
14174 tree int_ftype_v4sf
14175 = build_function_type_list (integer_type_node,
14176 V4SF_type_node, NULL_TREE);
14177 tree int64_ftype_v4sf
14178 = build_function_type_list (long_long_integer_type_node,
14179 V4SF_type_node, NULL_TREE);
14180 tree int_ftype_v8qi
14181 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14182 tree v4sf_ftype_v4sf_int
14183 = build_function_type_list (V4SF_type_node,
14184 V4SF_type_node, integer_type_node, NULL_TREE);
14185 tree v4sf_ftype_v4sf_int64
14186 = build_function_type_list (V4SF_type_node,
14187 V4SF_type_node, long_long_integer_type_node,
14188 NULL_TREE);
14189 tree v4sf_ftype_v4sf_v2si
14190 = build_function_type_list (V4SF_type_node,
14191 V4SF_type_node, V2SI_type_node, NULL_TREE);
14193 /* Miscellaneous. */
14194 tree v8qi_ftype_v4hi_v4hi
14195 = build_function_type_list (V8QI_type_node,
14196 V4HI_type_node, V4HI_type_node, NULL_TREE);
14197 tree v4hi_ftype_v2si_v2si
14198 = build_function_type_list (V4HI_type_node,
14199 V2SI_type_node, V2SI_type_node, NULL_TREE);
14200 tree v4sf_ftype_v4sf_v4sf_int
14201 = build_function_type_list (V4SF_type_node,
14202 V4SF_type_node, V4SF_type_node,
14203 integer_type_node, NULL_TREE);
14204 tree v2si_ftype_v4hi_v4hi
14205 = build_function_type_list (V2SI_type_node,
14206 V4HI_type_node, V4HI_type_node, NULL_TREE);
14207 tree v4hi_ftype_v4hi_int
14208 = build_function_type_list (V4HI_type_node,
14209 V4HI_type_node, integer_type_node, NULL_TREE);
14210 tree v4hi_ftype_v4hi_di
14211 = build_function_type_list (V4HI_type_node,
14212 V4HI_type_node, long_long_unsigned_type_node,
14213 NULL_TREE);
14214 tree v2si_ftype_v2si_di
14215 = build_function_type_list (V2SI_type_node,
14216 V2SI_type_node, long_long_unsigned_type_node,
14217 NULL_TREE);
14218 tree void_ftype_void
14219 = build_function_type (void_type_node, void_list_node);
14220 tree void_ftype_unsigned
14221 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14222 tree void_ftype_unsigned_unsigned
14223 = build_function_type_list (void_type_node, unsigned_type_node,
14224 unsigned_type_node, NULL_TREE);
14225 tree void_ftype_pcvoid_unsigned_unsigned
14226 = build_function_type_list (void_type_node, const_ptr_type_node,
14227 unsigned_type_node, unsigned_type_node,
14228 NULL_TREE);
14229 tree unsigned_ftype_void
14230 = build_function_type (unsigned_type_node, void_list_node);
14231 tree v2si_ftype_v4sf
14232 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14233 /* Loads/stores. */
14234 tree void_ftype_v8qi_v8qi_pchar
14235 = build_function_type_list (void_type_node,
14236 V8QI_type_node, V8QI_type_node,
14237 pchar_type_node, NULL_TREE);
14238 tree v4sf_ftype_pcfloat
14239 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14240 /* @@@ the type is bogus */
14241 tree v4sf_ftype_v4sf_pv2si
14242 = build_function_type_list (V4SF_type_node,
14243 V4SF_type_node, pv2si_type_node, NULL_TREE);
14244 tree void_ftype_pv2si_v4sf
14245 = build_function_type_list (void_type_node,
14246 pv2si_type_node, V4SF_type_node, NULL_TREE);
14247 tree void_ftype_pfloat_v4sf
14248 = build_function_type_list (void_type_node,
14249 pfloat_type_node, V4SF_type_node, NULL_TREE);
14250 tree void_ftype_pdi_di
14251 = build_function_type_list (void_type_node,
14252 pdi_type_node, long_long_unsigned_type_node,
14253 NULL_TREE);
14254 tree void_ftype_pv2di_v2di
14255 = build_function_type_list (void_type_node,
14256 pv2di_type_node, V2DI_type_node, NULL_TREE);
14257 /* Normal vector unops. */
14258 tree v4sf_ftype_v4sf
14259 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14261 /* Normal vector binops. */
14262 tree v4sf_ftype_v4sf_v4sf
14263 = build_function_type_list (V4SF_type_node,
14264 V4SF_type_node, V4SF_type_node, NULL_TREE);
14265 tree v8qi_ftype_v8qi_v8qi
14266 = build_function_type_list (V8QI_type_node,
14267 V8QI_type_node, V8QI_type_node, NULL_TREE);
14268 tree v4hi_ftype_v4hi_v4hi
14269 = build_function_type_list (V4HI_type_node,
14270 V4HI_type_node, V4HI_type_node, NULL_TREE);
14271 tree v2si_ftype_v2si_v2si
14272 = build_function_type_list (V2SI_type_node,
14273 V2SI_type_node, V2SI_type_node, NULL_TREE);
14274 tree di_ftype_di_di
14275 = build_function_type_list (long_long_unsigned_type_node,
14276 long_long_unsigned_type_node,
14277 long_long_unsigned_type_node, NULL_TREE);
14279 tree v2si_ftype_v2sf
14280 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14281 tree v2sf_ftype_v2si
14282 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14283 tree v2si_ftype_v2si
14284 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14285 tree v2sf_ftype_v2sf
14286 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14287 tree v2sf_ftype_v2sf_v2sf
14288 = build_function_type_list (V2SF_type_node,
14289 V2SF_type_node, V2SF_type_node, NULL_TREE);
14290 tree v2si_ftype_v2sf_v2sf
14291 = build_function_type_list (V2SI_type_node,
14292 V2SF_type_node, V2SF_type_node, NULL_TREE);
14293 tree pint_type_node = build_pointer_type (integer_type_node);
14294 tree pdouble_type_node = build_pointer_type (double_type_node);
14295 tree pcdouble_type_node = build_pointer_type (
14296 build_type_variant (double_type_node, 1, 0));
14297 tree int_ftype_v2df_v2df
14298 = build_function_type_list (integer_type_node,
14299 V2DF_type_node, V2DF_type_node, NULL_TREE);
14301 tree ti_ftype_ti_ti
14302 = build_function_type_list (intTI_type_node,
14303 intTI_type_node, intTI_type_node, NULL_TREE);
14304 tree void_ftype_pcvoid
14305 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14306 tree v4sf_ftype_v4si
14307 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14308 tree v4si_ftype_v4sf
14309 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14310 tree v2df_ftype_v4si
14311 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14312 tree v4si_ftype_v2df
14313 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14314 tree v2si_ftype_v2df
14315 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14316 tree v4sf_ftype_v2df
14317 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14318 tree v2df_ftype_v2si
14319 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14320 tree v2df_ftype_v4sf
14321 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14322 tree int_ftype_v2df
14323 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14324 tree int64_ftype_v2df
14325 = build_function_type_list (long_long_integer_type_node,
14326 V2DF_type_node, NULL_TREE);
14327 tree v2df_ftype_v2df_int
14328 = build_function_type_list (V2DF_type_node,
14329 V2DF_type_node, integer_type_node, NULL_TREE);
14330 tree v2df_ftype_v2df_int64
14331 = build_function_type_list (V2DF_type_node,
14332 V2DF_type_node, long_long_integer_type_node,
14333 NULL_TREE);
14334 tree v4sf_ftype_v4sf_v2df
14335 = build_function_type_list (V4SF_type_node,
14336 V4SF_type_node, V2DF_type_node, NULL_TREE);
14337 tree v2df_ftype_v2df_v4sf
14338 = build_function_type_list (V2DF_type_node,
14339 V2DF_type_node, V4SF_type_node, NULL_TREE);
14340 tree v2df_ftype_v2df_v2df_int
14341 = build_function_type_list (V2DF_type_node,
14342 V2DF_type_node, V2DF_type_node,
14343 integer_type_node,
14344 NULL_TREE);
14345 tree v2df_ftype_v2df_pcdouble
14346 = build_function_type_list (V2DF_type_node,
14347 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14348 tree void_ftype_pdouble_v2df
14349 = build_function_type_list (void_type_node,
14350 pdouble_type_node, V2DF_type_node, NULL_TREE);
14351 tree void_ftype_pint_int
14352 = build_function_type_list (void_type_node,
14353 pint_type_node, integer_type_node, NULL_TREE);
14354 tree void_ftype_v16qi_v16qi_pchar
14355 = build_function_type_list (void_type_node,
14356 V16QI_type_node, V16QI_type_node,
14357 pchar_type_node, NULL_TREE);
14358 tree v2df_ftype_pcdouble
14359 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14360 tree v2df_ftype_v2df_v2df
14361 = build_function_type_list (V2DF_type_node,
14362 V2DF_type_node, V2DF_type_node, NULL_TREE);
14363 tree v16qi_ftype_v16qi_v16qi
14364 = build_function_type_list (V16QI_type_node,
14365 V16QI_type_node, V16QI_type_node, NULL_TREE);
14366 tree v8hi_ftype_v8hi_v8hi
14367 = build_function_type_list (V8HI_type_node,
14368 V8HI_type_node, V8HI_type_node, NULL_TREE);
14369 tree v4si_ftype_v4si_v4si
14370 = build_function_type_list (V4SI_type_node,
14371 V4SI_type_node, V4SI_type_node, NULL_TREE);
14372 tree v2di_ftype_v2di_v2di
14373 = build_function_type_list (V2DI_type_node,
14374 V2DI_type_node, V2DI_type_node, NULL_TREE);
14375 tree v2di_ftype_v2df_v2df
14376 = build_function_type_list (V2DI_type_node,
14377 V2DF_type_node, V2DF_type_node, NULL_TREE);
14378 tree v2df_ftype_v2df
14379 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14380 tree v2di_ftype_v2di_int
14381 = build_function_type_list (V2DI_type_node,
14382 V2DI_type_node, integer_type_node, NULL_TREE);
14383 tree v4si_ftype_v4si_int
14384 = build_function_type_list (V4SI_type_node,
14385 V4SI_type_node, integer_type_node, NULL_TREE);
14386 tree v8hi_ftype_v8hi_int
14387 = build_function_type_list (V8HI_type_node,
14388 V8HI_type_node, integer_type_node, NULL_TREE);
14389 tree v8hi_ftype_v8hi_v2di
14390 = build_function_type_list (V8HI_type_node,
14391 V8HI_type_node, V2DI_type_node, NULL_TREE);
14392 tree v4si_ftype_v4si_v2di
14393 = build_function_type_list (V4SI_type_node,
14394 V4SI_type_node, V2DI_type_node, NULL_TREE);
14395 tree v4si_ftype_v8hi_v8hi
14396 = build_function_type_list (V4SI_type_node,
14397 V8HI_type_node, V8HI_type_node, NULL_TREE);
14398 tree di_ftype_v8qi_v8qi
14399 = build_function_type_list (long_long_unsigned_type_node,
14400 V8QI_type_node, V8QI_type_node, NULL_TREE);
14401 tree di_ftype_v2si_v2si
14402 = build_function_type_list (long_long_unsigned_type_node,
14403 V2SI_type_node, V2SI_type_node, NULL_TREE);
14404 tree v2di_ftype_v16qi_v16qi
14405 = build_function_type_list (V2DI_type_node,
14406 V16QI_type_node, V16QI_type_node, NULL_TREE);
14407 tree v2di_ftype_v4si_v4si
14408 = build_function_type_list (V2DI_type_node,
14409 V4SI_type_node, V4SI_type_node, NULL_TREE);
14410 tree int_ftype_v16qi
14411 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14412 tree v16qi_ftype_pcchar
14413 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14414 tree void_ftype_pchar_v16qi
14415 = build_function_type_list (void_type_node,
14416 pchar_type_node, V16QI_type_node, NULL_TREE);
14418 tree float80_type;
14419 tree float128_type;
14420 tree ftype;
14422 /* The __float80 type. */
14423 if (TYPE_MODE (long_double_type_node) == XFmode)
14424 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14425 "__float80");
14426 else
14428 /* The __float80 type. */
14429 float80_type = make_node (REAL_TYPE);
14430 TYPE_PRECISION (float80_type) = 80;
14431 layout_type (float80_type);
14432 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14435 float128_type = make_node (REAL_TYPE);
14436 TYPE_PRECISION (float128_type) = 128;
14437 layout_type (float128_type);
14438 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14440 /* Add all builtins that are more or less simple operations on two
14441 operands. */
14442 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14444 /* Use one of the operands; the target can have a different mode for
14445 mask-generating compares. */
14446 enum machine_mode mode;
14447 tree type;
14449 if (d->name == 0)
14450 continue;
14451 mode = insn_data[d->icode].operand[1].mode;
14453 switch (mode)
14455 case V16QImode:
14456 type = v16qi_ftype_v16qi_v16qi;
14457 break;
14458 case V8HImode:
14459 type = v8hi_ftype_v8hi_v8hi;
14460 break;
14461 case V4SImode:
14462 type = v4si_ftype_v4si_v4si;
14463 break;
14464 case V2DImode:
14465 type = v2di_ftype_v2di_v2di;
14466 break;
14467 case V2DFmode:
14468 type = v2df_ftype_v2df_v2df;
14469 break;
14470 case TImode:
14471 type = ti_ftype_ti_ti;
14472 break;
14473 case V4SFmode:
14474 type = v4sf_ftype_v4sf_v4sf;
14475 break;
14476 case V8QImode:
14477 type = v8qi_ftype_v8qi_v8qi;
14478 break;
14479 case V4HImode:
14480 type = v4hi_ftype_v4hi_v4hi;
14481 break;
14482 case V2SImode:
14483 type = v2si_ftype_v2si_v2si;
14484 break;
14485 case DImode:
14486 type = di_ftype_di_di;
14487 break;
14489 default:
14490 gcc_unreachable ();
14493 /* Override for comparisons. */
14494 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14495 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
14496 type = v4si_ftype_v4sf_v4sf;
14498 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
14499 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14500 type = v2di_ftype_v2df_v2df;
14502 def_builtin (d->mask, d->name, type, d->code);
14505 /* Add the remaining MMX insns with somewhat more complicated types. */
14506 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14507 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14508 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14509 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14511 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14512 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14513 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14515 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14516 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
14518 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
14519 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
14521 /* comi/ucomi insns. */
14522 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14523 if (d->mask == MASK_SSE2)
14524 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
14525 else
14526 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
14528 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
14529 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
14530 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
14532 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
14533 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
14534 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
14535 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
14536 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14537 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14538 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14539 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14540 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14541 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14542 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14544 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14546 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14547 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14549 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14550 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14551 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14552 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14554 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14555 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14556 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14557 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14559 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14561 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14563 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14564 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14565 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14566 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14567 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14568 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14570 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14572 /* Original 3DNow! */
14573 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14574 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14575 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14576 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14577 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14578 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14579 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14580 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14581 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14582 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14583 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14584 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14585 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14586 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14587 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14588 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14589 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14590 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14591 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14592 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14594 /* 3DNow! extension as used in the Athlon CPU. */
14595 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14596 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14597 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14598 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14599 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14600 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14602 /* SSE2 */
14603 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14605 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14606 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14608 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14609 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14611 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14612 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14613 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14614 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14615 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14617 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14618 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14619 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14620 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14622 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14623 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14625 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14627 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14628 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14630 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14631 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14632 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14633 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14634 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14636 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14638 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14639 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14640 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14641 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14643 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14644 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14645 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14647 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14648 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14649 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14650 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14652 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14653 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14654 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14656 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14657 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14659 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14660 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14662 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
14663 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
14664 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14666 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
14667 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
14668 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14670 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
14671 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
14673 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14674 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14675 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14676 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14678 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14679 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14680 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14681 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14683 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14684 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14686 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14688 /* Prescott New Instructions. */
14689 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14690 void_ftype_pcvoid_unsigned_unsigned,
14691 IX86_BUILTIN_MONITOR);
14692 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14693 void_ftype_unsigned_unsigned,
14694 IX86_BUILTIN_MWAIT);
14695 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14696 v4sf_ftype_v4sf,
14697 IX86_BUILTIN_MOVSHDUP);
14698 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14699 v4sf_ftype_v4sf,
14700 IX86_BUILTIN_MOVSLDUP);
14701 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14702 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14704 /* Access to the vec_init patterns. */
14705 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14706 integer_type_node, NULL_TREE);
14707 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14708 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14710 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14711 short_integer_type_node,
14712 short_integer_type_node,
14713 short_integer_type_node, NULL_TREE);
14714 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14715 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14717 ftype = build_function_type_list (V8QI_type_node, char_type_node,
14718 char_type_node, char_type_node,
14719 char_type_node, char_type_node,
14720 char_type_node, char_type_node,
14721 char_type_node, NULL_TREE);
14722 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14723 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14725 /* Access to the vec_extract patterns. */
14726 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14727 integer_type_node, NULL_TREE);
14728 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14729 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14731 ftype = build_function_type_list (long_long_integer_type_node,
14732 V2DI_type_node, integer_type_node,
14733 NULL_TREE);
14734 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14735 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14737 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14738 integer_type_node, NULL_TREE);
14739 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14740 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14742 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14743 integer_type_node, NULL_TREE);
14744 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14745 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14747 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14748 integer_type_node, NULL_TREE);
14749 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14750 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14752 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14753 integer_type_node, NULL_TREE);
14754 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14755 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14757 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14758 integer_type_node, NULL_TREE);
14759 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14760 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14762 /* Access to the vec_set patterns. */
14763 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14764 intHI_type_node,
14765 integer_type_node, NULL_TREE);
14766 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14767 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14769 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14770 intHI_type_node,
14771 integer_type_node, NULL_TREE);
14772 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14773 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14776 /* Errors in the source file can cause expand_expr to return const0_rtx
14777 where we expect a vector. To avoid crashing, use one of the vector
14778 clear instructions. */
14779 static rtx
14780 safe_vector_operand (rtx x, enum machine_mode mode)
14782 if (x == const0_rtx)
14783 x = CONST0_RTX (mode);
14784 return x;
14787 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14789 static rtx
14790 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14792 rtx pat, xops[3];
14793 tree arg0 = TREE_VALUE (arglist);
14794 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14795 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14796 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14797 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14798 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14799 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14801 if (VECTOR_MODE_P (mode0))
14802 op0 = safe_vector_operand (op0, mode0);
14803 if (VECTOR_MODE_P (mode1))
14804 op1 = safe_vector_operand (op1, mode1);
14806 if (optimize || !target
14807 || GET_MODE (target) != tmode
14808 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14809 target = gen_reg_rtx (tmode);
14811 if (GET_MODE (op1) == SImode && mode1 == TImode)
14813 rtx x = gen_reg_rtx (V4SImode);
14814 emit_insn (gen_sse2_loadd (x, op1));
14815 op1 = gen_lowpart (TImode, x);
14818 /* The insn must want input operands in the same modes as the
14819 result. */
14820 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
14821 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
14823 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14824 op0 = copy_to_mode_reg (mode0, op0);
14825 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14826 op1 = copy_to_mode_reg (mode1, op1);
14828 /* ??? Using ix86_fixup_binary_operands is problematic when
14829 we've got mismatched modes. Fake it. */
14831 xops[0] = target;
14832 xops[1] = op0;
14833 xops[2] = op1;
14835 if (tmode == mode0 && tmode == mode1)
14837 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14838 op0 = xops[1];
14839 op1 = xops[2];
14841 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14843 op0 = force_reg (mode0, op0);
14844 op1 = force_reg (mode1, op1);
14845 target = gen_reg_rtx (tmode);
14848 pat = GEN_FCN (icode) (target, op0, op1);
14849 if (! pat)
14850 return 0;
14851 emit_insn (pat);
14852 return target;
14855 /* Subroutine of ix86_expand_builtin to take care of stores. */
14857 static rtx
14858 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14860 rtx pat;
14861 tree arg0 = TREE_VALUE (arglist);
14862 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14863 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14864 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14865 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14866 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14868 if (VECTOR_MODE_P (mode1))
14869 op1 = safe_vector_operand (op1, mode1);
14871 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14872 op1 = copy_to_mode_reg (mode1, op1);
14874 pat = GEN_FCN (icode) (op0, op1);
14875 if (pat)
14876 emit_insn (pat);
14877 return 0;
14880 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14882 static rtx
14883 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14884 rtx target, int do_load)
14886 rtx pat;
14887 tree arg0 = TREE_VALUE (arglist);
14888 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14889 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14890 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14892 if (optimize || !target
14893 || GET_MODE (target) != tmode
14894 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14895 target = gen_reg_rtx (tmode);
14896 if (do_load)
14897 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14898 else
14900 if (VECTOR_MODE_P (mode0))
14901 op0 = safe_vector_operand (op0, mode0);
14903 if ((optimize && !register_operand (op0, mode0))
14904 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14905 op0 = copy_to_mode_reg (mode0, op0);
14908 pat = GEN_FCN (icode) (target, op0);
14909 if (! pat)
14910 return 0;
14911 emit_insn (pat);
14912 return target;
14915 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14916 sqrtss, rsqrtss, rcpss. */
14918 static rtx
14919 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14921 rtx pat;
14922 tree arg0 = TREE_VALUE (arglist);
14923 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14924 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14925 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14927 if (optimize || !target
14928 || GET_MODE (target) != tmode
14929 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14930 target = gen_reg_rtx (tmode);
14932 if (VECTOR_MODE_P (mode0))
14933 op0 = safe_vector_operand (op0, mode0);
14935 if ((optimize && !register_operand (op0, mode0))
14936 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14937 op0 = copy_to_mode_reg (mode0, op0);
14939 op1 = op0;
14940 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14941 op1 = copy_to_mode_reg (mode0, op1);
14943 pat = GEN_FCN (icode) (target, op0, op1);
14944 if (! pat)
14945 return 0;
14946 emit_insn (pat);
14947 return target;
14950 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14952 static rtx
14953 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14954 rtx target)
14956 rtx pat;
14957 tree arg0 = TREE_VALUE (arglist);
14958 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14959 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14960 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14961 rtx op2;
14962 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14963 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14964 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14965 enum rtx_code comparison = d->comparison;
14967 if (VECTOR_MODE_P (mode0))
14968 op0 = safe_vector_operand (op0, mode0);
14969 if (VECTOR_MODE_P (mode1))
14970 op1 = safe_vector_operand (op1, mode1);
14972 /* Swap operands if we have a comparison that isn't available in
14973 hardware. */
14974 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14976 rtx tmp = gen_reg_rtx (mode1);
14977 emit_move_insn (tmp, op1);
14978 op1 = op0;
14979 op0 = tmp;
14982 if (optimize || !target
14983 || GET_MODE (target) != tmode
14984 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14985 target = gen_reg_rtx (tmode);
14987 if ((optimize && !register_operand (op0, mode0))
14988 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14989 op0 = copy_to_mode_reg (mode0, op0);
14990 if ((optimize && !register_operand (op1, mode1))
14991 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14992 op1 = copy_to_mode_reg (mode1, op1);
14994 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14995 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14996 if (! pat)
14997 return 0;
14998 emit_insn (pat);
14999 return target;
15002 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15004 static rtx
15005 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15006 rtx target)
15008 rtx pat;
15009 tree arg0 = TREE_VALUE (arglist);
15010 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15011 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15012 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15013 rtx op2;
15014 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15015 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15016 enum rtx_code comparison = d->comparison;
15018 if (VECTOR_MODE_P (mode0))
15019 op0 = safe_vector_operand (op0, mode0);
15020 if (VECTOR_MODE_P (mode1))
15021 op1 = safe_vector_operand (op1, mode1);
15023 /* Swap operands if we have a comparison that isn't available in
15024 hardware. */
15025 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15027 rtx tmp = op1;
15028 op1 = op0;
15029 op0 = tmp;
15032 target = gen_reg_rtx (SImode);
15033 emit_move_insn (target, const0_rtx);
15034 target = gen_rtx_SUBREG (QImode, target, 0);
15036 if ((optimize && !register_operand (op0, mode0))
15037 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15038 op0 = copy_to_mode_reg (mode0, op0);
15039 if ((optimize && !register_operand (op1, mode1))
15040 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15041 op1 = copy_to_mode_reg (mode1, op1);
15043 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15044 pat = GEN_FCN (d->icode) (op0, op1);
15045 if (! pat)
15046 return 0;
15047 emit_insn (pat);
15048 emit_insn (gen_rtx_SET (VOIDmode,
15049 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15050 gen_rtx_fmt_ee (comparison, QImode,
15051 SET_DEST (pat),
15052 const0_rtx)));
15054 return SUBREG_REG (target);
15057 /* Return the integer constant in ARG. Constrain it to be in the range
15058 of the subparts of VEC_TYPE; issue an error if not. */
15060 static int
15061 get_element_number (tree vec_type, tree arg)
15063 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15065 if (!host_integerp (arg, 1)
15066 || (elt = tree_low_cst (arg, 1), elt > max))
15068 error ("selector must be an integer constant in the range 0..%wi", max);
15069 return 0;
15072 return elt;
15075 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15076 ix86_expand_vector_init. We DO have language-level syntax for this, in
15077 the form of (type){ init-list }. Except that since we can't place emms
15078 instructions from inside the compiler, we can't allow the use of MMX
15079 registers unless the user explicitly asks for it. So we do *not* define
15080 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15081 we have builtins invoked by mmintrin.h that gives us license to emit
15082 these sorts of instructions. */
15084 static rtx
15085 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15087 enum machine_mode tmode = TYPE_MODE (type);
15088 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15089 int i, n_elt = GET_MODE_NUNITS (tmode);
15090 rtvec v = rtvec_alloc (n_elt);
15092 gcc_assert (VECTOR_MODE_P (tmode));
15094 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15096 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15097 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15100 gcc_assert (arglist == NULL);
15102 if (!target || !register_operand (target, tmode))
15103 target = gen_reg_rtx (tmode);
15105 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15106 return target;
15109 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15110 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15111 had a language-level syntax for referencing vector elements. */
15113 static rtx
15114 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15116 enum machine_mode tmode, mode0;
15117 tree arg0, arg1;
15118 int elt;
15119 rtx op0;
15121 arg0 = TREE_VALUE (arglist);
15122 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15124 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15125 elt = get_element_number (TREE_TYPE (arg0), arg1);
15127 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15128 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15129 gcc_assert (VECTOR_MODE_P (mode0));
15131 op0 = force_reg (mode0, op0);
15133 if (optimize || !target || !register_operand (target, tmode))
15134 target = gen_reg_rtx (tmode);
15136 ix86_expand_vector_extract (true, target, op0, elt);
15138 return target;
15141 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15142 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15143 a language-level syntax for referencing vector elements. */
15145 static rtx
15146 ix86_expand_vec_set_builtin (tree arglist)
15148 enum machine_mode tmode, mode1;
15149 tree arg0, arg1, arg2;
15150 int elt;
15151 rtx op0, op1;
15153 arg0 = TREE_VALUE (arglist);
15154 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15155 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15157 tmode = TYPE_MODE (TREE_TYPE (arg0));
15158 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15159 gcc_assert (VECTOR_MODE_P (tmode));
15161 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15162 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15163 elt = get_element_number (TREE_TYPE (arg0), arg2);
15165 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15166 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15168 op0 = force_reg (tmode, op0);
15169 op1 = force_reg (mode1, op1);
15171 ix86_expand_vector_set (true, op0, op1, elt);
15173 return op0;
15176 /* Expand an expression EXP that calls a built-in function,
15177 with result going to TARGET if that's convenient
15178 (and in mode MODE if that's convenient).
15179 SUBTARGET may be used as the target for computing one of EXP's operands.
15180 IGNORE is nonzero if the value is to be ignored. */
15182 static rtx
15183 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15184 enum machine_mode mode ATTRIBUTE_UNUSED,
15185 int ignore ATTRIBUTE_UNUSED)
15187 const struct builtin_description *d;
15188 size_t i;
15189 enum insn_code icode;
15190 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15191 tree arglist = TREE_OPERAND (exp, 1);
15192 tree arg0, arg1, arg2;
15193 rtx op0, op1, op2, pat;
15194 enum machine_mode tmode, mode0, mode1, mode2;
15195 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15197 switch (fcode)
15199 case IX86_BUILTIN_EMMS:
15200 emit_insn (gen_mmx_emms ());
15201 return 0;
15203 case IX86_BUILTIN_SFENCE:
15204 emit_insn (gen_sse_sfence ());
15205 return 0;
15207 case IX86_BUILTIN_MASKMOVQ:
15208 case IX86_BUILTIN_MASKMOVDQU:
15209 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15210 ? CODE_FOR_mmx_maskmovq
15211 : CODE_FOR_sse2_maskmovdqu);
15212 /* Note the arg order is different from the operand order. */
15213 arg1 = TREE_VALUE (arglist);
15214 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15215 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15216 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15217 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15218 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15219 mode0 = insn_data[icode].operand[0].mode;
15220 mode1 = insn_data[icode].operand[1].mode;
15221 mode2 = insn_data[icode].operand[2].mode;
15223 op0 = force_reg (Pmode, op0);
15224 op0 = gen_rtx_MEM (mode1, op0);
15226 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15227 op0 = copy_to_mode_reg (mode0, op0);
15228 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15229 op1 = copy_to_mode_reg (mode1, op1);
15230 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15231 op2 = copy_to_mode_reg (mode2, op2);
15232 pat = GEN_FCN (icode) (op0, op1, op2);
15233 if (! pat)
15234 return 0;
15235 emit_insn (pat);
15236 return 0;
15238 case IX86_BUILTIN_SQRTSS:
15239 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15240 case IX86_BUILTIN_RSQRTSS:
15241 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15242 case IX86_BUILTIN_RCPSS:
15243 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15245 case IX86_BUILTIN_LOADUPS:
15246 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15248 case IX86_BUILTIN_STOREUPS:
15249 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15251 case IX86_BUILTIN_LOADHPS:
15252 case IX86_BUILTIN_LOADLPS:
15253 case IX86_BUILTIN_LOADHPD:
15254 case IX86_BUILTIN_LOADLPD:
15255 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15256 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15257 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15258 : CODE_FOR_sse2_loadlpd);
15259 arg0 = TREE_VALUE (arglist);
15260 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15261 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15262 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15263 tmode = insn_data[icode].operand[0].mode;
15264 mode0 = insn_data[icode].operand[1].mode;
15265 mode1 = insn_data[icode].operand[2].mode;
15267 op0 = force_reg (mode0, op0);
15268 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15269 if (optimize || target == 0
15270 || GET_MODE (target) != tmode
15271 || !register_operand (target, tmode))
15272 target = gen_reg_rtx (tmode);
15273 pat = GEN_FCN (icode) (target, op0, op1);
15274 if (! pat)
15275 return 0;
15276 emit_insn (pat);
15277 return target;
15279 case IX86_BUILTIN_STOREHPS:
15280 case IX86_BUILTIN_STORELPS:
15281 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15282 : CODE_FOR_sse_storelps);
15283 arg0 = TREE_VALUE (arglist);
15284 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15285 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15286 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15287 mode0 = insn_data[icode].operand[0].mode;
15288 mode1 = insn_data[icode].operand[1].mode;
15290 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15291 op1 = force_reg (mode1, op1);
15293 pat = GEN_FCN (icode) (op0, op1);
15294 if (! pat)
15295 return 0;
15296 emit_insn (pat);
15297 return const0_rtx;
15299 case IX86_BUILTIN_MOVNTPS:
15300 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15301 case IX86_BUILTIN_MOVNTQ:
15302 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15304 case IX86_BUILTIN_LDMXCSR:
15305 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15306 target = assign_386_stack_local (SImode, SLOT_TEMP);
15307 emit_move_insn (target, op0);
15308 emit_insn (gen_sse_ldmxcsr (target));
15309 return 0;
15311 case IX86_BUILTIN_STMXCSR:
15312 target = assign_386_stack_local (SImode, SLOT_TEMP);
15313 emit_insn (gen_sse_stmxcsr (target));
15314 return copy_to_mode_reg (SImode, target);
15316 case IX86_BUILTIN_SHUFPS:
15317 case IX86_BUILTIN_SHUFPD:
15318 icode = (fcode == IX86_BUILTIN_SHUFPS
15319 ? CODE_FOR_sse_shufps
15320 : CODE_FOR_sse2_shufpd);
15321 arg0 = TREE_VALUE (arglist);
15322 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15323 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15324 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15325 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15326 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15327 tmode = insn_data[icode].operand[0].mode;
15328 mode0 = insn_data[icode].operand[1].mode;
15329 mode1 = insn_data[icode].operand[2].mode;
15330 mode2 = insn_data[icode].operand[3].mode;
15332 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15333 op0 = copy_to_mode_reg (mode0, op0);
15334 if ((optimize && !register_operand (op1, mode1))
15335 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15336 op1 = copy_to_mode_reg (mode1, op1);
15337 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15339 /* @@@ better error message */
15340 error ("mask must be an immediate");
15341 return gen_reg_rtx (tmode);
15343 if (optimize || target == 0
15344 || GET_MODE (target) != tmode
15345 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15346 target = gen_reg_rtx (tmode);
15347 pat = GEN_FCN (icode) (target, op0, op1, op2);
15348 if (! pat)
15349 return 0;
15350 emit_insn (pat);
15351 return target;
15353 case IX86_BUILTIN_PSHUFW:
15354 case IX86_BUILTIN_PSHUFD:
15355 case IX86_BUILTIN_PSHUFHW:
15356 case IX86_BUILTIN_PSHUFLW:
15357 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15358 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15359 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15360 : CODE_FOR_mmx_pshufw);
15361 arg0 = TREE_VALUE (arglist);
15362 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15363 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15364 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15365 tmode = insn_data[icode].operand[0].mode;
15366 mode1 = insn_data[icode].operand[1].mode;
15367 mode2 = insn_data[icode].operand[2].mode;
15369 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15370 op0 = copy_to_mode_reg (mode1, op0);
15371 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15373 /* @@@ better error message */
15374 error ("mask must be an immediate");
15375 return const0_rtx;
15377 if (target == 0
15378 || GET_MODE (target) != tmode
15379 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15380 target = gen_reg_rtx (tmode);
15381 pat = GEN_FCN (icode) (target, op0, op1);
15382 if (! pat)
15383 return 0;
15384 emit_insn (pat);
15385 return target;
15387 case IX86_BUILTIN_PSLLDQI128:
15388 case IX86_BUILTIN_PSRLDQI128:
15389 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
15390 : CODE_FOR_sse2_lshrti3);
15391 arg0 = TREE_VALUE (arglist);
15392 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15393 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15394 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15395 tmode = insn_data[icode].operand[0].mode;
15396 mode1 = insn_data[icode].operand[1].mode;
15397 mode2 = insn_data[icode].operand[2].mode;
15399 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15401 op0 = copy_to_reg (op0);
15402 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
15404 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15406 error ("shift must be an immediate");
15407 return const0_rtx;
15409 target = gen_reg_rtx (V2DImode);
15410 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
15411 if (! pat)
15412 return 0;
15413 emit_insn (pat);
15414 return target;
15416 case IX86_BUILTIN_FEMMS:
15417 emit_insn (gen_mmx_femms ());
15418 return NULL_RTX;
15420 case IX86_BUILTIN_PAVGUSB:
15421 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
15423 case IX86_BUILTIN_PF2ID:
15424 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
15426 case IX86_BUILTIN_PFACC:
15427 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
15429 case IX86_BUILTIN_PFADD:
15430 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
15432 case IX86_BUILTIN_PFCMPEQ:
15433 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
15435 case IX86_BUILTIN_PFCMPGE:
15436 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
15438 case IX86_BUILTIN_PFCMPGT:
15439 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
15441 case IX86_BUILTIN_PFMAX:
15442 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
15444 case IX86_BUILTIN_PFMIN:
15445 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
15447 case IX86_BUILTIN_PFMUL:
15448 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
15450 case IX86_BUILTIN_PFRCP:
15451 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
15453 case IX86_BUILTIN_PFRCPIT1:
15454 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
15456 case IX86_BUILTIN_PFRCPIT2:
15457 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
15459 case IX86_BUILTIN_PFRSQIT1:
15460 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
15462 case IX86_BUILTIN_PFRSQRT:
15463 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
15465 case IX86_BUILTIN_PFSUB:
15466 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
15468 case IX86_BUILTIN_PFSUBR:
15469 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
15471 case IX86_BUILTIN_PI2FD:
15472 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
15474 case IX86_BUILTIN_PMULHRW:
15475 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
15477 case IX86_BUILTIN_PF2IW:
15478 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
15480 case IX86_BUILTIN_PFNACC:
15481 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
15483 case IX86_BUILTIN_PFPNACC:
15484 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
15486 case IX86_BUILTIN_PI2FW:
15487 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
15489 case IX86_BUILTIN_PSWAPDSI:
15490 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
15492 case IX86_BUILTIN_PSWAPDSF:
15493 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
15495 case IX86_BUILTIN_SQRTSD:
15496 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
15497 case IX86_BUILTIN_LOADUPD:
15498 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
15499 case IX86_BUILTIN_STOREUPD:
15500 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
15502 case IX86_BUILTIN_MFENCE:
15503 emit_insn (gen_sse2_mfence ());
15504 return 0;
15505 case IX86_BUILTIN_LFENCE:
15506 emit_insn (gen_sse2_lfence ());
15507 return 0;
15509 case IX86_BUILTIN_CLFLUSH:
15510 arg0 = TREE_VALUE (arglist);
15511 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15512 icode = CODE_FOR_sse2_clflush;
15513 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15514 op0 = copy_to_mode_reg (Pmode, op0);
15516 emit_insn (gen_sse2_clflush (op0));
15517 return 0;
15519 case IX86_BUILTIN_MOVNTPD:
15520 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
15521 case IX86_BUILTIN_MOVNTDQ:
15522 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
15523 case IX86_BUILTIN_MOVNTI:
15524 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
15526 case IX86_BUILTIN_LOADDQU:
15527 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
15528 case IX86_BUILTIN_STOREDQU:
15529 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
15531 case IX86_BUILTIN_MONITOR:
15532 arg0 = TREE_VALUE (arglist);
15533 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15534 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15535 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15536 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15537 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15538 if (!REG_P (op0))
15539 op0 = copy_to_mode_reg (SImode, op0);
15540 if (!REG_P (op1))
15541 op1 = copy_to_mode_reg (SImode, op1);
15542 if (!REG_P (op2))
15543 op2 = copy_to_mode_reg (SImode, op2);
15544 emit_insn (gen_sse3_monitor (op0, op1, op2));
15545 return 0;
15547 case IX86_BUILTIN_MWAIT:
15548 arg0 = TREE_VALUE (arglist);
15549 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15550 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15551 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15552 if (!REG_P (op0))
15553 op0 = copy_to_mode_reg (SImode, op0);
15554 if (!REG_P (op1))
15555 op1 = copy_to_mode_reg (SImode, op1);
15556 emit_insn (gen_sse3_mwait (op0, op1));
15557 return 0;
15559 case IX86_BUILTIN_LDDQU:
15560 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15561 target, 1);
15563 case IX86_BUILTIN_VEC_INIT_V2SI:
15564 case IX86_BUILTIN_VEC_INIT_V4HI:
15565 case IX86_BUILTIN_VEC_INIT_V8QI:
15566 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15568 case IX86_BUILTIN_VEC_EXT_V2DF:
15569 case IX86_BUILTIN_VEC_EXT_V2DI:
15570 case IX86_BUILTIN_VEC_EXT_V4SF:
15571 case IX86_BUILTIN_VEC_EXT_V4SI:
15572 case IX86_BUILTIN_VEC_EXT_V8HI:
15573 case IX86_BUILTIN_VEC_EXT_V2SI:
15574 case IX86_BUILTIN_VEC_EXT_V4HI:
15575 return ix86_expand_vec_ext_builtin (arglist, target);
15577 case IX86_BUILTIN_VEC_SET_V8HI:
15578 case IX86_BUILTIN_VEC_SET_V4HI:
15579 return ix86_expand_vec_set_builtin (arglist);
15581 default:
15582 break;
15585 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15586 if (d->code == fcode)
15588 /* Compares are treated specially. */
15589 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15590 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15591 || d->icode == CODE_FOR_sse2_maskcmpv2df3
15592 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15593 return ix86_expand_sse_compare (d, arglist, target);
15595 return ix86_expand_binop_builtin (d->icode, arglist, target);
15598 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15599 if (d->code == fcode)
15600 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15602 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15603 if (d->code == fcode)
15604 return ix86_expand_sse_comi (d, arglist, target);
15606 gcc_unreachable ();
15609 /* Store OPERAND to the memory after reload is completed. This means
15610 that we can't easily use assign_stack_local. */
15612 ix86_force_to_memory (enum machine_mode mode, rtx operand)
15614 rtx result;
15616 gcc_assert (reload_completed);
15617 if (TARGET_RED_ZONE)
15619 result = gen_rtx_MEM (mode,
15620 gen_rtx_PLUS (Pmode,
15621 stack_pointer_rtx,
15622 GEN_INT (-RED_ZONE_SIZE)));
15623 emit_move_insn (result, operand);
15625 else if (!TARGET_RED_ZONE && TARGET_64BIT)
15627 switch (mode)
15629 case HImode:
15630 case SImode:
15631 operand = gen_lowpart (DImode, operand);
15632 /* FALLTHRU */
15633 case DImode:
15634 emit_insn (
15635 gen_rtx_SET (VOIDmode,
15636 gen_rtx_MEM (DImode,
15637 gen_rtx_PRE_DEC (DImode,
15638 stack_pointer_rtx)),
15639 operand));
15640 break;
15641 default:
15642 gcc_unreachable ();
15644 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15646 else
15648 switch (mode)
15650 case DImode:
15652 rtx operands[2];
15653 split_di (&operand, 1, operands, operands + 1);
15654 emit_insn (
15655 gen_rtx_SET (VOIDmode,
15656 gen_rtx_MEM (SImode,
15657 gen_rtx_PRE_DEC (Pmode,
15658 stack_pointer_rtx)),
15659 operands[1]));
15660 emit_insn (
15661 gen_rtx_SET (VOIDmode,
15662 gen_rtx_MEM (SImode,
15663 gen_rtx_PRE_DEC (Pmode,
15664 stack_pointer_rtx)),
15665 operands[0]));
15667 break;
15668 case HImode:
15669 /* It is better to store HImodes as SImodes. */
15670 if (!TARGET_PARTIAL_REG_STALL)
15671 operand = gen_lowpart (SImode, operand);
15672 /* FALLTHRU */
15673 case SImode:
15674 emit_insn (
15675 gen_rtx_SET (VOIDmode,
15676 gen_rtx_MEM (GET_MODE (operand),
15677 gen_rtx_PRE_DEC (SImode,
15678 stack_pointer_rtx)),
15679 operand));
15680 break;
15681 default:
15682 gcc_unreachable ();
15684 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15686 return result;
15689 /* Free operand from the memory. */
15690 void
15691 ix86_free_from_memory (enum machine_mode mode)
15693 if (!TARGET_RED_ZONE)
15695 int size;
15697 if (mode == DImode || TARGET_64BIT)
15698 size = 8;
15699 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
15700 size = 2;
15701 else
15702 size = 4;
15703 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15704 to pop or add instruction if registers are available. */
15705 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15706 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15707 GEN_INT (size))));
15711 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15712 QImode must go into class Q_REGS.
15713 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
15714 movdf to do mem-to-mem moves through integer regs. */
15715 enum reg_class
15716 ix86_preferred_reload_class (rtx x, enum reg_class class)
15718 /* We're only allowed to return a subclass of CLASS. Many of the
15719 following checks fail for NO_REGS, so eliminate that early. */
15720 if (class == NO_REGS)
15721 return NO_REGS;
15723 /* All classes can load zeros. */
15724 if (x == CONST0_RTX (GET_MODE (x)))
15725 return class;
15727 /* Floating-point constants need more complex checks. */
15728 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
15730 /* General regs can load everything. */
15731 if (reg_class_subset_p (class, GENERAL_REGS))
15732 return class;
15734 /* Floats can load 0 and 1 plus some others. Note that we eliminated
15735 zero above. We only want to wind up preferring 80387 registers if
15736 we plan on doing computation with them. */
15737 if (TARGET_80387
15738 && (TARGET_MIX_SSE_I387
15739 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
15740 && standard_80387_constant_p (x))
15742 /* Limit class to non-sse. */
15743 if (class == FLOAT_SSE_REGS)
15744 return FLOAT_REGS;
15745 if (class == FP_TOP_SSE_REGS)
15746 return FP_TOP_REG;
15747 if (class == FP_SECOND_SSE_REGS)
15748 return FP_SECOND_REG;
15749 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
15750 return class;
15753 return NO_REGS;
15755 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
15756 return NO_REGS;
15757 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
15758 return NO_REGS;
15760 /* Generally when we see PLUS here, it's the function invariant
15761 (plus soft-fp const_int). Which can only be computed into general
15762 regs. */
15763 if (GET_CODE (x) == PLUS)
15764 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
15766 /* QImode constants are easy to load, but non-constant QImode data
15767 must go into Q_REGS. */
15768 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15770 if (reg_class_subset_p (class, Q_REGS))
15771 return class;
15772 if (reg_class_subset_p (Q_REGS, class))
15773 return Q_REGS;
15774 return NO_REGS;
15777 return class;
15780 /* If we are copying between general and FP registers, we need a memory
15781 location. The same is true for SSE and MMX registers.
15783 The macro can't work reliably when one of the CLASSES is class containing
15784 registers from multiple units (SSE, MMX, integer). We avoid this by never
15785 combining those units in single alternative in the machine description.
15786 Ensure that this constraint holds to avoid unexpected surprises.
15788 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15789 enforce these sanity checks. */
15792 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15793 enum machine_mode mode, int strict)
15795 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15796 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15797 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15798 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15799 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15800 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15802 gcc_assert (!strict);
15803 return true;
15806 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15807 return true;
15809 /* ??? This is a lie. We do have moves between mmx/general, and for
15810 mmx/sse2. But by saying we need secondary memory we discourage the
15811 register allocator from using the mmx registers unless needed. */
15812 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15813 return true;
15815 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15817 /* SSE1 doesn't have any direct moves from other classes. */
15818 if (!TARGET_SSE2)
15819 return true;
15821 /* If the target says that inter-unit moves are more expensive
15822 than moving through memory, then don't generate them. */
15823 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15824 return true;
15826 /* Between SSE and general, we have moves no larger than word size. */
15827 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15828 return true;
15830 /* ??? For the cost of one register reformat penalty, we could use
15831 the same instructions to move SFmode and DFmode data, but the
15832 relevant move patterns don't support those alternatives. */
15833 if (mode == SFmode || mode == DFmode)
15834 return true;
15837 return false;
15840 /* Return true if the registers in CLASS cannot represent the change from
15841 modes FROM to TO. */
15843 bool
15844 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
15845 enum reg_class class)
15847 if (from == to)
15848 return false;
15850 /* x87 registers can't do subreg at all, as all values are reformatted
15851 to extended precision. */
15852 if (MAYBE_FLOAT_CLASS_P (class))
15853 return true;
15855 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
15857 /* Vector registers do not support QI or HImode loads. If we don't
15858 disallow a change to these modes, reload will assume it's ok to
15859 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
15860 the vec_dupv4hi pattern. */
15861 if (GET_MODE_SIZE (from) < 4)
15862 return true;
15864 /* Vector registers do not support subreg with nonzero offsets, which
15865 are otherwise valid for integer registers. Since we can't see
15866 whether we have a nonzero offset from here, prohibit all
15867 nonparadoxical subregs changing size. */
15868 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
15869 return true;
15872 return false;
15875 /* Return the cost of moving data from a register in class CLASS1 to
15876 one in class CLASS2.
15878 It is not required that the cost always equal 2 when FROM is the same as TO;
15879 on some machines it is expensive to move between registers if they are not
15880 general registers. */
15883 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15884 enum reg_class class2)
15886 /* In case we require secondary memory, compute cost of the store followed
15887 by load. In order to avoid bad register allocation choices, we need
15888 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15890 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15892 int cost = 1;
15894 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15895 MEMORY_MOVE_COST (mode, class1, 1));
15896 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15897 MEMORY_MOVE_COST (mode, class2, 1));
15899 /* In case of copying from general_purpose_register we may emit multiple
15900 stores followed by single load causing memory size mismatch stall.
15901 Count this as arbitrarily high cost of 20. */
15902 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15903 cost += 20;
15905 /* In the case of FP/MMX moves, the registers actually overlap, and we
15906 have to switch modes in order to treat them differently. */
15907 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15908 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15909 cost += 20;
15911 return cost;
15914 /* Moves between SSE/MMX and integer unit are expensive. */
15915 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15916 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15917 return ix86_cost->mmxsse_to_integer;
15918 if (MAYBE_FLOAT_CLASS_P (class1))
15919 return ix86_cost->fp_move;
15920 if (MAYBE_SSE_CLASS_P (class1))
15921 return ix86_cost->sse_move;
15922 if (MAYBE_MMX_CLASS_P (class1))
15923 return ix86_cost->mmx_move;
15924 return 2;
15927 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15929 bool
15930 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15932 /* Flags and only flags can only hold CCmode values. */
15933 if (CC_REGNO_P (regno))
15934 return GET_MODE_CLASS (mode) == MODE_CC;
15935 if (GET_MODE_CLASS (mode) == MODE_CC
15936 || GET_MODE_CLASS (mode) == MODE_RANDOM
15937 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15938 return 0;
15939 if (FP_REGNO_P (regno))
15940 return VALID_FP_MODE_P (mode);
15941 if (SSE_REGNO_P (regno))
15943 /* We implement the move patterns for all vector modes into and
15944 out of SSE registers, even when no operation instructions
15945 are available. */
15946 return (VALID_SSE_REG_MODE (mode)
15947 || VALID_SSE2_REG_MODE (mode)
15948 || VALID_MMX_REG_MODE (mode)
15949 || VALID_MMX_REG_MODE_3DNOW (mode));
15951 if (MMX_REGNO_P (regno))
15953 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15954 so if the register is available at all, then we can move data of
15955 the given mode into or out of it. */
15956 return (VALID_MMX_REG_MODE (mode)
15957 || VALID_MMX_REG_MODE_3DNOW (mode));
15960 if (mode == QImode)
15962 /* Take care for QImode values - they can be in non-QI regs,
15963 but then they do cause partial register stalls. */
15964 if (regno < 4 || TARGET_64BIT)
15965 return 1;
15966 if (!TARGET_PARTIAL_REG_STALL)
15967 return 1;
15968 return reload_in_progress || reload_completed;
15970 /* We handle both integer and floats in the general purpose registers. */
15971 else if (VALID_INT_MODE_P (mode))
15972 return 1;
15973 else if (VALID_FP_MODE_P (mode))
15974 return 1;
15975 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15976 on to use that value in smaller contexts, this can easily force a
15977 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15978 supporting DImode, allow it. */
15979 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15980 return 1;
15982 return 0;
15985 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15986 tieable integer mode. */
15988 static bool
15989 ix86_tieable_integer_mode_p (enum machine_mode mode)
15991 switch (mode)
15993 case HImode:
15994 case SImode:
15995 return true;
15997 case QImode:
15998 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16000 case DImode:
16001 return TARGET_64BIT;
16003 default:
16004 return false;
16008 /* Return true if MODE1 is accessible in a register that can hold MODE2
16009 without copying. That is, all register classes that can hold MODE2
16010 can also hold MODE1. */
16012 bool
16013 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16015 if (mode1 == mode2)
16016 return true;
16018 if (ix86_tieable_integer_mode_p (mode1)
16019 && ix86_tieable_integer_mode_p (mode2))
16020 return true;
16022 /* MODE2 being XFmode implies fp stack or general regs, which means we
16023 can tie any smaller floating point modes to it. Note that we do not
16024 tie this with TFmode. */
16025 if (mode2 == XFmode)
16026 return mode1 == SFmode || mode1 == DFmode;
16028 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16029 that we can tie it with SFmode. */
16030 if (mode2 == DFmode)
16031 return mode1 == SFmode;
16033 /* If MODE2 is only appropriate for an SSE register, then tie with
16034 any other mode acceptable to SSE registers. */
16035 if (GET_MODE_SIZE (mode2) >= 8
16036 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16037 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16039 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16040 with any other mode acceptable to MMX registers. */
16041 if (GET_MODE_SIZE (mode2) == 8
16042 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16043 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16045 return false;
16048 /* Return the cost of moving data of mode M between a
16049 register and memory. A value of 2 is the default; this cost is
16050 relative to those in `REGISTER_MOVE_COST'.
16052 If moving between registers and memory is more expensive than
16053 between two registers, you should define this macro to express the
16054 relative cost.
16056 Model also increased moving costs of QImode registers in non
16057 Q_REGS classes.
16060 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16062 if (FLOAT_CLASS_P (class))
16064 int index;
16065 switch (mode)
16067 case SFmode:
16068 index = 0;
16069 break;
16070 case DFmode:
16071 index = 1;
16072 break;
16073 case XFmode:
16074 index = 2;
16075 break;
16076 default:
16077 return 100;
16079 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16081 if (SSE_CLASS_P (class))
16083 int index;
16084 switch (GET_MODE_SIZE (mode))
16086 case 4:
16087 index = 0;
16088 break;
16089 case 8:
16090 index = 1;
16091 break;
16092 case 16:
16093 index = 2;
16094 break;
16095 default:
16096 return 100;
16098 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16100 if (MMX_CLASS_P (class))
16102 int index;
16103 switch (GET_MODE_SIZE (mode))
16105 case 4:
16106 index = 0;
16107 break;
16108 case 8:
16109 index = 1;
16110 break;
16111 default:
16112 return 100;
16114 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16116 switch (GET_MODE_SIZE (mode))
16118 case 1:
16119 if (in)
16120 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16121 : ix86_cost->movzbl_load);
16122 else
16123 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16124 : ix86_cost->int_store[0] + 4);
16125 break;
16126 case 2:
16127 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16128 default:
16129 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16130 if (mode == TFmode)
16131 mode = XFmode;
16132 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16133 * (((int) GET_MODE_SIZE (mode)
16134 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16138 /* Compute a (partial) cost for rtx X. Return true if the complete
16139 cost has been computed, and false if subexpressions should be
16140 scanned. In either case, *TOTAL contains the cost result. */
16142 static bool
16143 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16145 enum machine_mode mode = GET_MODE (x);
16147 switch (code)
16149 case CONST_INT:
16150 case CONST:
16151 case LABEL_REF:
16152 case SYMBOL_REF:
16153 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16154 *total = 3;
16155 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16156 *total = 2;
16157 else if (flag_pic && SYMBOLIC_CONST (x)
16158 && (!TARGET_64BIT
16159 || (!GET_CODE (x) != LABEL_REF
16160 && (GET_CODE (x) != SYMBOL_REF
16161 || !SYMBOL_REF_LOCAL_P (x)))))
16162 *total = 1;
16163 else
16164 *total = 0;
16165 return true;
16167 case CONST_DOUBLE:
16168 if (mode == VOIDmode)
16169 *total = 0;
16170 else
16171 switch (standard_80387_constant_p (x))
16173 case 1: /* 0.0 */
16174 *total = 1;
16175 break;
16176 default: /* Other constants */
16177 *total = 2;
16178 break;
16179 case 0:
16180 case -1:
16181 /* Start with (MEM (SYMBOL_REF)), since that's where
16182 it'll probably end up. Add a penalty for size. */
16183 *total = (COSTS_N_INSNS (1)
16184 + (flag_pic != 0 && !TARGET_64BIT)
16185 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16186 break;
16188 return true;
16190 case ZERO_EXTEND:
16191 /* The zero extensions is often completely free on x86_64, so make
16192 it as cheap as possible. */
16193 if (TARGET_64BIT && mode == DImode
16194 && GET_MODE (XEXP (x, 0)) == SImode)
16195 *total = 1;
16196 else if (TARGET_ZERO_EXTEND_WITH_AND)
16197 *total = COSTS_N_INSNS (ix86_cost->add);
16198 else
16199 *total = COSTS_N_INSNS (ix86_cost->movzx);
16200 return false;
16202 case SIGN_EXTEND:
16203 *total = COSTS_N_INSNS (ix86_cost->movsx);
16204 return false;
16206 case ASHIFT:
16207 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16208 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16210 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16211 if (value == 1)
16213 *total = COSTS_N_INSNS (ix86_cost->add);
16214 return false;
16216 if ((value == 2 || value == 3)
16217 && ix86_cost->lea <= ix86_cost->shift_const)
16219 *total = COSTS_N_INSNS (ix86_cost->lea);
16220 return false;
16223 /* FALLTHRU */
16225 case ROTATE:
16226 case ASHIFTRT:
16227 case LSHIFTRT:
16228 case ROTATERT:
16229 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16231 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16233 if (INTVAL (XEXP (x, 1)) > 32)
16234 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
16235 else
16236 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
16238 else
16240 if (GET_CODE (XEXP (x, 1)) == AND)
16241 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
16242 else
16243 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
16246 else
16248 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16249 *total = COSTS_N_INSNS (ix86_cost->shift_const);
16250 else
16251 *total = COSTS_N_INSNS (ix86_cost->shift_var);
16253 return false;
16255 case MULT:
16256 if (FLOAT_MODE_P (mode))
16258 *total = COSTS_N_INSNS (ix86_cost->fmul);
16259 return false;
16261 else
16263 rtx op0 = XEXP (x, 0);
16264 rtx op1 = XEXP (x, 1);
16265 int nbits;
16266 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16268 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16269 for (nbits = 0; value != 0; value &= value - 1)
16270 nbits++;
16272 else
16273 /* This is arbitrary. */
16274 nbits = 7;
16276 /* Compute costs correctly for widening multiplication. */
16277 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16278 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16279 == GET_MODE_SIZE (mode))
16281 int is_mulwiden = 0;
16282 enum machine_mode inner_mode = GET_MODE (op0);
16284 if (GET_CODE (op0) == GET_CODE (op1))
16285 is_mulwiden = 1, op1 = XEXP (op1, 0);
16286 else if (GET_CODE (op1) == CONST_INT)
16288 if (GET_CODE (op0) == SIGN_EXTEND)
16289 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16290 == INTVAL (op1);
16291 else
16292 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16295 if (is_mulwiden)
16296 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16299 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
16300 + nbits * ix86_cost->mult_bit)
16301 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
16303 return true;
16306 case DIV:
16307 case UDIV:
16308 case MOD:
16309 case UMOD:
16310 if (FLOAT_MODE_P (mode))
16311 *total = COSTS_N_INSNS (ix86_cost->fdiv);
16312 else
16313 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
16314 return false;
16316 case PLUS:
16317 if (FLOAT_MODE_P (mode))
16318 *total = COSTS_N_INSNS (ix86_cost->fadd);
16319 else if (GET_MODE_CLASS (mode) == MODE_INT
16320 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16322 if (GET_CODE (XEXP (x, 0)) == PLUS
16323 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16324 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16325 && CONSTANT_P (XEXP (x, 1)))
16327 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16328 if (val == 2 || val == 4 || val == 8)
16330 *total = COSTS_N_INSNS (ix86_cost->lea);
16331 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16332 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16333 outer_code);
16334 *total += rtx_cost (XEXP (x, 1), outer_code);
16335 return true;
16338 else if (GET_CODE (XEXP (x, 0)) == MULT
16339 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16341 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16342 if (val == 2 || val == 4 || val == 8)
16344 *total = COSTS_N_INSNS (ix86_cost->lea);
16345 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16346 *total += rtx_cost (XEXP (x, 1), outer_code);
16347 return true;
16350 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16352 *total = COSTS_N_INSNS (ix86_cost->lea);
16353 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16354 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16355 *total += rtx_cost (XEXP (x, 1), outer_code);
16356 return true;
16359 /* FALLTHRU */
16361 case MINUS:
16362 if (FLOAT_MODE_P (mode))
16364 *total = COSTS_N_INSNS (ix86_cost->fadd);
16365 return false;
16367 /* FALLTHRU */
16369 case AND:
16370 case IOR:
16371 case XOR:
16372 if (!TARGET_64BIT && mode == DImode)
16374 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
16375 + (rtx_cost (XEXP (x, 0), outer_code)
16376 << (GET_MODE (XEXP (x, 0)) != DImode))
16377 + (rtx_cost (XEXP (x, 1), outer_code)
16378 << (GET_MODE (XEXP (x, 1)) != DImode)));
16379 return true;
16381 /* FALLTHRU */
16383 case NEG:
16384 if (FLOAT_MODE_P (mode))
16386 *total = COSTS_N_INSNS (ix86_cost->fchs);
16387 return false;
16389 /* FALLTHRU */
16391 case NOT:
16392 if (!TARGET_64BIT && mode == DImode)
16393 *total = COSTS_N_INSNS (ix86_cost->add * 2);
16394 else
16395 *total = COSTS_N_INSNS (ix86_cost->add);
16396 return false;
16398 case COMPARE:
16399 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
16400 && XEXP (XEXP (x, 0), 1) == const1_rtx
16401 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
16402 && XEXP (x, 1) == const0_rtx)
16404 /* This kind of construct is implemented using test[bwl].
16405 Treat it as if we had an AND. */
16406 *total = (COSTS_N_INSNS (ix86_cost->add)
16407 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
16408 + rtx_cost (const1_rtx, outer_code));
16409 return true;
16411 return false;
16413 case FLOAT_EXTEND:
16414 if (!TARGET_SSE_MATH
16415 || mode == XFmode
16416 || (mode == DFmode && !TARGET_SSE2))
16417 *total = 0;
16418 return false;
16420 case ABS:
16421 if (FLOAT_MODE_P (mode))
16422 *total = COSTS_N_INSNS (ix86_cost->fabs);
16423 return false;
16425 case SQRT:
16426 if (FLOAT_MODE_P (mode))
16427 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
16428 return false;
16430 case UNSPEC:
16431 if (XINT (x, 1) == UNSPEC_TP)
16432 *total = 0;
16433 return false;
16435 default:
16436 return false;
16440 #if TARGET_MACHO
16442 static int current_machopic_label_num;
16444 /* Given a symbol name and its associated stub, write out the
16445 definition of the stub. */
16447 void
16448 machopic_output_stub (FILE *file, const char *symb, const char *stub)
16450 unsigned int length;
16451 char *binder_name, *symbol_name, lazy_ptr_name[32];
16452 int label = ++current_machopic_label_num;
16454 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
16455 symb = (*targetm.strip_name_encoding) (symb);
16457 length = strlen (stub);
16458 binder_name = alloca (length + 32);
16459 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
16461 length = strlen (symb);
16462 symbol_name = alloca (length + 32);
16463 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
16465 sprintf (lazy_ptr_name, "L%d$lz", label);
16467 if (MACHOPIC_PURE)
16468 machopic_picsymbol_stub_section ();
16469 else
16470 machopic_symbol_stub_section ();
16472 fprintf (file, "%s:\n", stub);
16473 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16475 if (MACHOPIC_PURE)
16477 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
16478 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
16479 fprintf (file, "\tjmp %%edx\n");
16481 else
16482 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
16484 fprintf (file, "%s:\n", binder_name);
16486 if (MACHOPIC_PURE)
16488 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
16489 fprintf (file, "\tpushl %%eax\n");
16491 else
16492 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
16494 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
16496 machopic_lazy_symbol_ptr_section ();
16497 fprintf (file, "%s:\n", lazy_ptr_name);
16498 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16499 fprintf (file, "\t.long %s\n", binder_name);
16501 #endif /* TARGET_MACHO */
16503 /* Order the registers for register allocator. */
16505 void
16506 x86_order_regs_for_local_alloc (void)
16508 int pos = 0;
16509 int i;
16511 /* First allocate the local general purpose registers. */
16512 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16513 if (GENERAL_REGNO_P (i) && call_used_regs[i])
16514 reg_alloc_order [pos++] = i;
16516 /* Global general purpose registers. */
16517 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16518 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
16519 reg_alloc_order [pos++] = i;
16521 /* x87 registers come first in case we are doing FP math
16522 using them. */
16523 if (!TARGET_SSE_MATH)
16524 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16525 reg_alloc_order [pos++] = i;
16527 /* SSE registers. */
16528 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16529 reg_alloc_order [pos++] = i;
16530 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16531 reg_alloc_order [pos++] = i;
16533 /* x87 registers. */
16534 if (TARGET_SSE_MATH)
16535 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16536 reg_alloc_order [pos++] = i;
16538 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16539 reg_alloc_order [pos++] = i;
16541 /* Initialize the rest of array as we do not allocate some registers
16542 at all. */
16543 while (pos < FIRST_PSEUDO_REGISTER)
16544 reg_alloc_order [pos++] = 0;
16547 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16548 struct attribute_spec.handler. */
16549 static tree
16550 ix86_handle_struct_attribute (tree *node, tree name,
16551 tree args ATTRIBUTE_UNUSED,
16552 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16554 tree *type = NULL;
16555 if (DECL_P (*node))
16557 if (TREE_CODE (*node) == TYPE_DECL)
16558 type = &TREE_TYPE (*node);
16560 else
16561 type = node;
16563 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16564 || TREE_CODE (*type) == UNION_TYPE)))
16566 warning (OPT_Wattributes, "%qs attribute ignored",
16567 IDENTIFIER_POINTER (name));
16568 *no_add_attrs = true;
16571 else if ((is_attribute_p ("ms_struct", name)
16572 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16573 || ((is_attribute_p ("gcc_struct", name)
16574 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16576 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
16577 IDENTIFIER_POINTER (name));
16578 *no_add_attrs = true;
16581 return NULL_TREE;
16584 static bool
16585 ix86_ms_bitfield_layout_p (tree record_type)
16587 return (TARGET_MS_BITFIELD_LAYOUT &&
16588 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16589 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16592 /* Returns an expression indicating where the this parameter is
16593 located on entry to the FUNCTION. */
16595 static rtx
16596 x86_this_parameter (tree function)
16598 tree type = TREE_TYPE (function);
16600 if (TARGET_64BIT)
16602 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16603 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16606 if (ix86_function_regparm (type, function) > 0)
16608 tree parm;
16610 parm = TYPE_ARG_TYPES (type);
16611 /* Figure out whether or not the function has a variable number of
16612 arguments. */
16613 for (; parm; parm = TREE_CHAIN (parm))
16614 if (TREE_VALUE (parm) == void_type_node)
16615 break;
16616 /* If not, the this parameter is in the first argument. */
16617 if (parm)
16619 int regno = 0;
16620 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16621 regno = 2;
16622 return gen_rtx_REG (SImode, regno);
16626 if (aggregate_value_p (TREE_TYPE (type), type))
16627 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16628 else
16629 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16632 /* Determine whether x86_output_mi_thunk can succeed. */
16634 static bool
16635 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16636 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16637 HOST_WIDE_INT vcall_offset, tree function)
16639 /* 64-bit can handle anything. */
16640 if (TARGET_64BIT)
16641 return true;
16643 /* For 32-bit, everything's fine if we have one free register. */
16644 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16645 return true;
16647 /* Need a free register for vcall_offset. */
16648 if (vcall_offset)
16649 return false;
16651 /* Need a free register for GOT references. */
16652 if (flag_pic && !(*targetm.binds_local_p) (function))
16653 return false;
16655 /* Otherwise ok. */
16656 return true;
16659 /* Output the assembler code for a thunk function. THUNK_DECL is the
16660 declaration for the thunk function itself, FUNCTION is the decl for
16661 the target function. DELTA is an immediate constant offset to be
16662 added to THIS. If VCALL_OFFSET is nonzero, the word at
16663 *(*this + vcall_offset) should be added to THIS. */
16665 static void
16666 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16667 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16668 HOST_WIDE_INT vcall_offset, tree function)
16670 rtx xops[3];
16671 rtx this = x86_this_parameter (function);
16672 rtx this_reg, tmp;
16674 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16675 pull it in now and let DELTA benefit. */
16676 if (REG_P (this))
16677 this_reg = this;
16678 else if (vcall_offset)
16680 /* Put the this parameter into %eax. */
16681 xops[0] = this;
16682 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16683 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16685 else
16686 this_reg = NULL_RTX;
16688 /* Adjust the this parameter by a fixed constant. */
16689 if (delta)
16691 xops[0] = GEN_INT (delta);
16692 xops[1] = this_reg ? this_reg : this;
16693 if (TARGET_64BIT)
16695 if (!x86_64_general_operand (xops[0], DImode))
16697 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16698 xops[1] = tmp;
16699 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16700 xops[0] = tmp;
16701 xops[1] = this;
16703 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16705 else
16706 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16709 /* Adjust the this parameter by a value stored in the vtable. */
16710 if (vcall_offset)
16712 if (TARGET_64BIT)
16713 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16714 else
16716 int tmp_regno = 2 /* ECX */;
16717 if (lookup_attribute ("fastcall",
16718 TYPE_ATTRIBUTES (TREE_TYPE (function))))
16719 tmp_regno = 0 /* EAX */;
16720 tmp = gen_rtx_REG (SImode, tmp_regno);
16723 xops[0] = gen_rtx_MEM (Pmode, this_reg);
16724 xops[1] = tmp;
16725 if (TARGET_64BIT)
16726 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16727 else
16728 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16730 /* Adjust the this parameter. */
16731 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
16732 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
16734 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
16735 xops[0] = GEN_INT (vcall_offset);
16736 xops[1] = tmp2;
16737 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16738 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
16740 xops[1] = this_reg;
16741 if (TARGET_64BIT)
16742 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16743 else
16744 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16747 /* If necessary, drop THIS back to its stack slot. */
16748 if (this_reg && this_reg != this)
16750 xops[0] = this_reg;
16751 xops[1] = this;
16752 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16755 xops[0] = XEXP (DECL_RTL (function), 0);
16756 if (TARGET_64BIT)
16758 if (!flag_pic || (*targetm.binds_local_p) (function))
16759 output_asm_insn ("jmp\t%P0", xops);
16760 else
16762 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
16763 tmp = gen_rtx_CONST (Pmode, tmp);
16764 tmp = gen_rtx_MEM (QImode, tmp);
16765 xops[0] = tmp;
16766 output_asm_insn ("jmp\t%A0", xops);
16769 else
16771 if (!flag_pic || (*targetm.binds_local_p) (function))
16772 output_asm_insn ("jmp\t%P0", xops);
16773 else
16774 #if TARGET_MACHO
16775 if (TARGET_MACHO)
16777 rtx sym_ref = XEXP (DECL_RTL (function), 0);
16778 tmp = (gen_rtx_SYMBOL_REF
16779 (Pmode,
16780 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
16781 tmp = gen_rtx_MEM (QImode, tmp);
16782 xops[0] = tmp;
16783 output_asm_insn ("jmp\t%0", xops);
16785 else
16786 #endif /* TARGET_MACHO */
16788 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
16789 output_set_got (tmp);
16791 xops[1] = tmp;
16792 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
16793 output_asm_insn ("jmp\t{*}%1", xops);
16798 static void
16799 x86_file_start (void)
16801 default_file_start ();
16802 if (X86_FILE_START_VERSION_DIRECTIVE)
16803 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16804 if (X86_FILE_START_FLTUSED)
16805 fputs ("\t.global\t__fltused\n", asm_out_file);
16806 if (ix86_asm_dialect == ASM_INTEL)
16807 fputs ("\t.intel_syntax\n", asm_out_file);
16811 x86_field_alignment (tree field, int computed)
16813 enum machine_mode mode;
16814 tree type = TREE_TYPE (field);
16816 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16817 return computed;
16818 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16819 ? get_inner_array_type (type) : type);
16820 if (mode == DFmode || mode == DCmode
16821 || GET_MODE_CLASS (mode) == MODE_INT
16822 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16823 return MIN (32, computed);
16824 return computed;
16827 /* Output assembler code to FILE to increment profiler label # LABELNO
16828 for profiling a function entry. */
16829 void
16830 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16832 if (TARGET_64BIT)
16833 if (flag_pic)
16835 #ifndef NO_PROFILE_COUNTERS
16836 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16837 #endif
16838 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
16840 else
16842 #ifndef NO_PROFILE_COUNTERS
16843 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
16844 #endif
16845 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16847 else if (flag_pic)
16849 #ifndef NO_PROFILE_COUNTERS
16850 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16851 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
16852 #endif
16853 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
16855 else
16857 #ifndef NO_PROFILE_COUNTERS
16858 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16859 PROFILE_COUNT_REGISTER);
16860 #endif
16861 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16865 /* We don't have exact information about the insn sizes, but we may assume
16866 quite safely that we are informed about all 1 byte insns and memory
16867 address sizes. This is enough to eliminate unnecessary padding in
16868 99% of cases. */
16870 static int
16871 min_insn_size (rtx insn)
16873 int l = 0;
16875 if (!INSN_P (insn) || !active_insn_p (insn))
16876 return 0;
16878 /* Discard alignments we've emit and jump instructions. */
16879 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16880 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16881 return 0;
16882 if (GET_CODE (insn) == JUMP_INSN
16883 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16884 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16885 return 0;
16887 /* Important case - calls are always 5 bytes.
16888 It is common to have many calls in the row. */
16889 if (GET_CODE (insn) == CALL_INSN
16890 && symbolic_reference_mentioned_p (PATTERN (insn))
16891 && !SIBLING_CALL_P (insn))
16892 return 5;
16893 if (get_attr_length (insn) <= 1)
16894 return 1;
16896 /* For normal instructions we may rely on the sizes of addresses
16897 and the presence of symbol to require 4 bytes of encoding.
16898 This is not the case for jumps where references are PC relative. */
16899 if (GET_CODE (insn) != JUMP_INSN)
16901 l = get_attr_length_address (insn);
16902 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16903 l = 4;
16905 if (l)
16906 return 1+l;
16907 else
16908 return 2;
16911 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16912 window. */
16914 static void
16915 ix86_avoid_jump_misspredicts (void)
16917 rtx insn, start = get_insns ();
16918 int nbytes = 0, njumps = 0;
16919 int isjump = 0;
16921 /* Look for all minimal intervals of instructions containing 4 jumps.
16922 The intervals are bounded by START and INSN. NBYTES is the total
16923 size of instructions in the interval including INSN and not including
16924 START. When the NBYTES is smaller than 16 bytes, it is possible
16925 that the end of START and INSN ends up in the same 16byte page.
16927 The smallest offset in the page INSN can start is the case where START
16928 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16929 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16931 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16934 nbytes += min_insn_size (insn);
16935 if (dump_file)
16936 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16937 INSN_UID (insn), min_insn_size (insn));
16938 if ((GET_CODE (insn) == JUMP_INSN
16939 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16940 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16941 || GET_CODE (insn) == CALL_INSN)
16942 njumps++;
16943 else
16944 continue;
16946 while (njumps > 3)
16948 start = NEXT_INSN (start);
16949 if ((GET_CODE (start) == JUMP_INSN
16950 && GET_CODE (PATTERN (start)) != ADDR_VEC
16951 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16952 || GET_CODE (start) == CALL_INSN)
16953 njumps--, isjump = 1;
16954 else
16955 isjump = 0;
16956 nbytes -= min_insn_size (start);
16958 gcc_assert (njumps >= 0);
16959 if (dump_file)
16960 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16961 INSN_UID (start), INSN_UID (insn), nbytes);
16963 if (njumps == 3 && isjump && nbytes < 16)
16965 int padsize = 15 - nbytes + min_insn_size (insn);
16967 if (dump_file)
16968 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16969 INSN_UID (insn), padsize);
16970 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16975 /* AMD Athlon works faster
16976 when RET is not destination of conditional jump or directly preceded
16977 by other jump instruction. We avoid the penalty by inserting NOP just
16978 before the RET instructions in such cases. */
16979 static void
16980 ix86_pad_returns (void)
16982 edge e;
16983 edge_iterator ei;
16985 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16987 basic_block bb = e->src;
16988 rtx ret = BB_END (bb);
16989 rtx prev;
16990 bool replace = false;
16992 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16993 || !maybe_hot_bb_p (bb))
16994 continue;
16995 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16996 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16997 break;
16998 if (prev && GET_CODE (prev) == CODE_LABEL)
17000 edge e;
17001 edge_iterator ei;
17003 FOR_EACH_EDGE (e, ei, bb->preds)
17004 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17005 && !(e->flags & EDGE_FALLTHRU))
17006 replace = true;
17008 if (!replace)
17010 prev = prev_active_insn (ret);
17011 if (prev
17012 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17013 || GET_CODE (prev) == CALL_INSN))
17014 replace = true;
17015 /* Empty functions get branch mispredict even when the jump destination
17016 is not visible to us. */
17017 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17018 replace = true;
17020 if (replace)
17022 emit_insn_before (gen_return_internal_long (), ret);
17023 delete_insn (ret);
17028 /* Implement machine specific optimizations. We implement padding of returns
17029 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17030 static void
17031 ix86_reorg (void)
17033 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
17034 ix86_pad_returns ();
17035 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17036 ix86_avoid_jump_misspredicts ();
17039 /* Return nonzero when QImode register that must be represented via REX prefix
17040 is used. */
17041 bool
17042 x86_extended_QIreg_mentioned_p (rtx insn)
17044 int i;
17045 extract_insn_cached (insn);
17046 for (i = 0; i < recog_data.n_operands; i++)
17047 if (REG_P (recog_data.operand[i])
17048 && REGNO (recog_data.operand[i]) >= 4)
17049 return true;
17050 return false;
17053 /* Return nonzero when P points to register encoded via REX prefix.
17054 Called via for_each_rtx. */
17055 static int
17056 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17058 unsigned int regno;
17059 if (!REG_P (*p))
17060 return 0;
17061 regno = REGNO (*p);
17062 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17065 /* Return true when INSN mentions register that must be encoded using REX
17066 prefix. */
17067 bool
17068 x86_extended_reg_mentioned_p (rtx insn)
17070 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17073 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17074 optabs would emit if we didn't have TFmode patterns. */
17076 void
17077 x86_emit_floatuns (rtx operands[2])
17079 rtx neglab, donelab, i0, i1, f0, in, out;
17080 enum machine_mode mode, inmode;
17082 inmode = GET_MODE (operands[1]);
17083 gcc_assert (inmode == SImode || inmode == DImode);
17085 out = operands[0];
17086 in = force_reg (inmode, operands[1]);
17087 mode = GET_MODE (out);
17088 neglab = gen_label_rtx ();
17089 donelab = gen_label_rtx ();
17090 i1 = gen_reg_rtx (Pmode);
17091 f0 = gen_reg_rtx (mode);
17093 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17095 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17096 emit_jump_insn (gen_jump (donelab));
17097 emit_barrier ();
17099 emit_label (neglab);
17101 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17102 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17103 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17104 expand_float (f0, i0, 0);
17105 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17107 emit_label (donelab);
17110 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17111 with all elements equal to VAR. Return true if successful. */
17113 static bool
17114 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17115 rtx target, rtx val)
17117 enum machine_mode smode, wsmode, wvmode;
17118 rtx x;
17120 switch (mode)
17122 case V2SImode:
17123 case V2SFmode:
17124 if (!mmx_ok && !TARGET_SSE)
17125 return false;
17126 /* FALLTHRU */
17128 case V2DFmode:
17129 case V2DImode:
17130 case V4SFmode:
17131 case V4SImode:
17132 val = force_reg (GET_MODE_INNER (mode), val);
17133 x = gen_rtx_VEC_DUPLICATE (mode, val);
17134 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17135 return true;
17137 case V4HImode:
17138 if (!mmx_ok)
17139 return false;
17140 if (TARGET_SSE || TARGET_3DNOW_A)
17142 val = gen_lowpart (SImode, val);
17143 x = gen_rtx_TRUNCATE (HImode, val);
17144 x = gen_rtx_VEC_DUPLICATE (mode, x);
17145 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17146 return true;
17148 else
17150 smode = HImode;
17151 wsmode = SImode;
17152 wvmode = V2SImode;
17153 goto widen;
17156 case V8QImode:
17157 if (!mmx_ok)
17158 return false;
17159 smode = QImode;
17160 wsmode = HImode;
17161 wvmode = V4HImode;
17162 goto widen;
17163 case V8HImode:
17164 smode = HImode;
17165 wsmode = SImode;
17166 wvmode = V4SImode;
17167 goto widen;
17168 case V16QImode:
17169 smode = QImode;
17170 wsmode = HImode;
17171 wvmode = V8HImode;
17172 goto widen;
17173 widen:
17174 /* Replicate the value once into the next wider mode and recurse. */
17175 val = convert_modes (wsmode, smode, val, true);
17176 x = expand_simple_binop (wsmode, ASHIFT, val,
17177 GEN_INT (GET_MODE_BITSIZE (smode)),
17178 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17179 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17181 x = gen_reg_rtx (wvmode);
17182 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17183 gcc_unreachable ();
17184 emit_move_insn (target, gen_lowpart (mode, x));
17185 return true;
17187 default:
17188 return false;
17192 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17193 whose low element is VAR, and other elements are zero. Return true
17194 if successful. */
17196 static bool
17197 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17198 rtx target, rtx var)
17200 enum machine_mode vsimode;
17201 rtx x;
17203 switch (mode)
17205 case V2SFmode:
17206 case V2SImode:
17207 if (!mmx_ok && !TARGET_SSE)
17208 return false;
17209 /* FALLTHRU */
17211 case V2DFmode:
17212 case V2DImode:
17213 var = force_reg (GET_MODE_INNER (mode), var);
17214 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17215 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17216 return true;
17218 case V4SFmode:
17219 case V4SImode:
17220 var = force_reg (GET_MODE_INNER (mode), var);
17221 x = gen_rtx_VEC_DUPLICATE (mode, var);
17222 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17223 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17224 return true;
17226 case V8HImode:
17227 case V16QImode:
17228 vsimode = V4SImode;
17229 goto widen;
17230 case V4HImode:
17231 case V8QImode:
17232 if (!mmx_ok)
17233 return false;
17234 vsimode = V2SImode;
17235 goto widen;
17236 widen:
17237 /* Zero extend the variable element to SImode and recurse. */
17238 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17240 x = gen_reg_rtx (vsimode);
17241 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17242 gcc_unreachable ();
17244 emit_move_insn (target, gen_lowpart (mode, x));
17245 return true;
17247 default:
17248 return false;
17252 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17253 consisting of the values in VALS. It is known that all elements
17254 except ONE_VAR are constants. Return true if successful. */
17256 static bool
17257 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17258 rtx target, rtx vals, int one_var)
17260 rtx var = XVECEXP (vals, 0, one_var);
17261 enum machine_mode wmode;
17262 rtx const_vec, x;
17264 const_vec = copy_rtx (vals);
17265 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17266 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17268 switch (mode)
17270 case V2DFmode:
17271 case V2DImode:
17272 case V2SFmode:
17273 case V2SImode:
17274 /* For the two element vectors, it's just as easy to use
17275 the general case. */
17276 return false;
17278 case V4SFmode:
17279 case V4SImode:
17280 case V8HImode:
17281 case V4HImode:
17282 break;
17284 case V16QImode:
17285 wmode = V8HImode;
17286 goto widen;
17287 case V8QImode:
17288 wmode = V4HImode;
17289 goto widen;
17290 widen:
17291 /* There's no way to set one QImode entry easily. Combine
17292 the variable value with its adjacent constant value, and
17293 promote to an HImode set. */
17294 x = XVECEXP (vals, 0, one_var ^ 1);
17295 if (one_var & 1)
17297 var = convert_modes (HImode, QImode, var, true);
17298 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17299 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17300 x = GEN_INT (INTVAL (x) & 0xff);
17302 else
17304 var = convert_modes (HImode, QImode, var, true);
17305 x = gen_int_mode (INTVAL (x) << 8, HImode);
17307 if (x != const0_rtx)
17308 var = expand_simple_binop (HImode, IOR, var, x, var,
17309 1, OPTAB_LIB_WIDEN);
17311 x = gen_reg_rtx (wmode);
17312 emit_move_insn (x, gen_lowpart (wmode, const_vec));
17313 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17315 emit_move_insn (target, gen_lowpart (mode, x));
17316 return true;
17318 default:
17319 return false;
17322 emit_move_insn (target, const_vec);
17323 ix86_expand_vector_set (mmx_ok, target, var, one_var);
17324 return true;
17327 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17328 all values variable, and none identical. */
17330 static void
17331 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17332 rtx target, rtx vals)
17334 enum machine_mode half_mode = GET_MODE_INNER (mode);
17335 rtx op0 = NULL, op1 = NULL;
17336 bool use_vec_concat = false;
17338 switch (mode)
17340 case V2SFmode:
17341 case V2SImode:
17342 if (!mmx_ok && !TARGET_SSE)
17343 break;
17344 /* FALLTHRU */
17346 case V2DFmode:
17347 case V2DImode:
17348 /* For the two element vectors, we always implement VEC_CONCAT. */
17349 op0 = XVECEXP (vals, 0, 0);
17350 op1 = XVECEXP (vals, 0, 1);
17351 use_vec_concat = true;
17352 break;
17354 case V4SFmode:
17355 half_mode = V2SFmode;
17356 goto half;
17357 case V4SImode:
17358 half_mode = V2SImode;
17359 goto half;
17360 half:
17362 rtvec v;
17364 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
17365 Recurse to load the two halves. */
17367 op0 = gen_reg_rtx (half_mode);
17368 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
17369 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
17371 op1 = gen_reg_rtx (half_mode);
17372 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
17373 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
17375 use_vec_concat = true;
17377 break;
17379 case V8HImode:
17380 case V16QImode:
17381 case V4HImode:
17382 case V8QImode:
17383 break;
17385 default:
17386 gcc_unreachable ();
17389 if (use_vec_concat)
17391 if (!register_operand (op0, half_mode))
17392 op0 = force_reg (half_mode, op0);
17393 if (!register_operand (op1, half_mode))
17394 op1 = force_reg (half_mode, op1);
17396 emit_insn (gen_rtx_SET (VOIDmode, target,
17397 gen_rtx_VEC_CONCAT (mode, op0, op1)));
17399 else
17401 int i, j, n_elts, n_words, n_elt_per_word;
17402 enum machine_mode inner_mode;
17403 rtx words[4], shift;
17405 inner_mode = GET_MODE_INNER (mode);
17406 n_elts = GET_MODE_NUNITS (mode);
17407 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
17408 n_elt_per_word = n_elts / n_words;
17409 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
17411 for (i = 0; i < n_words; ++i)
17413 rtx word = NULL_RTX;
17415 for (j = 0; j < n_elt_per_word; ++j)
17417 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
17418 elt = convert_modes (word_mode, inner_mode, elt, true);
17420 if (j == 0)
17421 word = elt;
17422 else
17424 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
17425 word, 1, OPTAB_LIB_WIDEN);
17426 word = expand_simple_binop (word_mode, IOR, word, elt,
17427 word, 1, OPTAB_LIB_WIDEN);
17431 words[i] = word;
17434 if (n_words == 1)
17435 emit_move_insn (target, gen_lowpart (mode, words[0]));
17436 else if (n_words == 2)
17438 rtx tmp = gen_reg_rtx (mode);
17439 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
17440 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
17441 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
17442 emit_move_insn (target, tmp);
17444 else if (n_words == 4)
17446 rtx tmp = gen_reg_rtx (V4SImode);
17447 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
17448 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
17449 emit_move_insn (target, gen_lowpart (mode, tmp));
17451 else
17452 gcc_unreachable ();
17456 /* Initialize vector TARGET via VALS. Suppress the use of MMX
17457 instructions unless MMX_OK is true. */
17459 void
17460 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
17462 enum machine_mode mode = GET_MODE (target);
17463 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17464 int n_elts = GET_MODE_NUNITS (mode);
17465 int n_var = 0, one_var = -1;
17466 bool all_same = true, all_const_zero = true;
17467 int i;
17468 rtx x;
17470 for (i = 0; i < n_elts; ++i)
17472 x = XVECEXP (vals, 0, i);
17473 if (!CONSTANT_P (x))
17474 n_var++, one_var = i;
17475 else if (x != CONST0_RTX (inner_mode))
17476 all_const_zero = false;
17477 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
17478 all_same = false;
17481 /* Constants are best loaded from the constant pool. */
17482 if (n_var == 0)
17484 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
17485 return;
17488 /* If all values are identical, broadcast the value. */
17489 if (all_same
17490 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
17491 XVECEXP (vals, 0, 0)))
17492 return;
17494 /* Values where only one field is non-constant are best loaded from
17495 the pool and overwritten via move later. */
17496 if (n_var == 1)
17498 if (all_const_zero && one_var == 0
17499 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
17500 XVECEXP (vals, 0, 0)))
17501 return;
17503 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
17504 return;
17507 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17510 void
17511 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
17513 enum machine_mode mode = GET_MODE (target);
17514 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17515 bool use_vec_merge = false;
17516 rtx tmp;
17518 switch (mode)
17520 case V2SFmode:
17521 case V2SImode:
17522 if (mmx_ok)
17524 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
17525 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
17526 if (elt == 0)
17527 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
17528 else
17529 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
17530 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17531 return;
17533 break;
17535 case V2DFmode:
17536 case V2DImode:
17538 rtx op0, op1;
17540 /* For the two element vectors, we implement a VEC_CONCAT with
17541 the extraction of the other element. */
17543 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17544 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17546 if (elt == 0)
17547 op0 = val, op1 = tmp;
17548 else
17549 op0 = tmp, op1 = val;
17551 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17552 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17554 return;
17556 case V4SFmode:
17557 switch (elt)
17559 case 0:
17560 use_vec_merge = true;
17561 break;
17563 case 1:
17564 /* tmp = target = A B C D */
17565 tmp = copy_to_reg (target);
17566 /* target = A A B B */
17567 emit_insn (gen_sse_unpcklps (target, target, target));
17568 /* target = X A B B */
17569 ix86_expand_vector_set (false, target, val, 0);
17570 /* target = A X C D */
17571 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17572 GEN_INT (1), GEN_INT (0),
17573 GEN_INT (2+4), GEN_INT (3+4)));
17574 return;
17576 case 2:
17577 /* tmp = target = A B C D */
17578 tmp = copy_to_reg (target);
17579 /* tmp = X B C D */
17580 ix86_expand_vector_set (false, tmp, val, 0);
17581 /* target = A B X D */
17582 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17583 GEN_INT (0), GEN_INT (1),
17584 GEN_INT (0+4), GEN_INT (3+4)));
17585 return;
17587 case 3:
17588 /* tmp = target = A B C D */
17589 tmp = copy_to_reg (target);
17590 /* tmp = X B C D */
17591 ix86_expand_vector_set (false, tmp, val, 0);
17592 /* target = A B X D */
17593 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17594 GEN_INT (0), GEN_INT (1),
17595 GEN_INT (2+4), GEN_INT (0+4)));
17596 return;
17598 default:
17599 gcc_unreachable ();
17601 break;
17603 case V4SImode:
17604 /* Element 0 handled by vec_merge below. */
17605 if (elt == 0)
17607 use_vec_merge = true;
17608 break;
17611 if (TARGET_SSE2)
17613 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17614 store into element 0, then shuffle them back. */
17616 rtx order[4];
17618 order[0] = GEN_INT (elt);
17619 order[1] = const1_rtx;
17620 order[2] = const2_rtx;
17621 order[3] = GEN_INT (3);
17622 order[elt] = const0_rtx;
17624 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17625 order[1], order[2], order[3]));
17627 ix86_expand_vector_set (false, target, val, 0);
17629 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17630 order[1], order[2], order[3]));
17632 else
17634 /* For SSE1, we have to reuse the V4SF code. */
17635 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17636 gen_lowpart (SFmode, val), elt);
17638 return;
17640 case V8HImode:
17641 use_vec_merge = TARGET_SSE2;
17642 break;
17643 case V4HImode:
17644 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17645 break;
17647 case V16QImode:
17648 case V8QImode:
17649 default:
17650 break;
17653 if (use_vec_merge)
17655 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17656 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17657 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17659 else
17661 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17663 emit_move_insn (mem, target);
17665 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17666 emit_move_insn (tmp, val);
17668 emit_move_insn (target, mem);
17672 void
17673 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17675 enum machine_mode mode = GET_MODE (vec);
17676 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17677 bool use_vec_extr = false;
17678 rtx tmp;
17680 switch (mode)
17682 case V2SImode:
17683 case V2SFmode:
17684 if (!mmx_ok)
17685 break;
17686 /* FALLTHRU */
17688 case V2DFmode:
17689 case V2DImode:
17690 use_vec_extr = true;
17691 break;
17693 case V4SFmode:
17694 switch (elt)
17696 case 0:
17697 tmp = vec;
17698 break;
17700 case 1:
17701 case 3:
17702 tmp = gen_reg_rtx (mode);
17703 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17704 GEN_INT (elt), GEN_INT (elt),
17705 GEN_INT (elt+4), GEN_INT (elt+4)));
17706 break;
17708 case 2:
17709 tmp = gen_reg_rtx (mode);
17710 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
17711 break;
17713 default:
17714 gcc_unreachable ();
17716 vec = tmp;
17717 use_vec_extr = true;
17718 elt = 0;
17719 break;
17721 case V4SImode:
17722 if (TARGET_SSE2)
17724 switch (elt)
17726 case 0:
17727 tmp = vec;
17728 break;
17730 case 1:
17731 case 3:
17732 tmp = gen_reg_rtx (mode);
17733 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
17734 GEN_INT (elt), GEN_INT (elt),
17735 GEN_INT (elt), GEN_INT (elt)));
17736 break;
17738 case 2:
17739 tmp = gen_reg_rtx (mode);
17740 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
17741 break;
17743 default:
17744 gcc_unreachable ();
17746 vec = tmp;
17747 use_vec_extr = true;
17748 elt = 0;
17750 else
17752 /* For SSE1, we have to reuse the V4SF code. */
17753 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
17754 gen_lowpart (V4SFmode, vec), elt);
17755 return;
17757 break;
17759 case V8HImode:
17760 use_vec_extr = TARGET_SSE2;
17761 break;
17762 case V4HImode:
17763 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17764 break;
17766 case V16QImode:
17767 case V8QImode:
17768 /* ??? Could extract the appropriate HImode element and shift. */
17769 default:
17770 break;
17773 if (use_vec_extr)
17775 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
17776 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
17778 /* Let the rtl optimizers know about the zero extension performed. */
17779 if (inner_mode == HImode)
17781 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
17782 target = gen_lowpart (SImode, target);
17785 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17787 else
17789 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17791 emit_move_insn (mem, vec);
17793 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17794 emit_move_insn (target, tmp);
17798 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
17799 pattern to reduce; DEST is the destination; IN is the input vector. */
17801 void
17802 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
17804 rtx tmp1, tmp2, tmp3;
17806 tmp1 = gen_reg_rtx (V4SFmode);
17807 tmp2 = gen_reg_rtx (V4SFmode);
17808 tmp3 = gen_reg_rtx (V4SFmode);
17810 emit_insn (gen_sse_movhlps (tmp1, in, in));
17811 emit_insn (fn (tmp2, tmp1, in));
17813 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
17814 GEN_INT (1), GEN_INT (1),
17815 GEN_INT (1+4), GEN_INT (1+4)));
17816 emit_insn (fn (dest, tmp2, tmp3));
17819 /* Implements target hook vector_mode_supported_p. */
17820 static bool
17821 ix86_vector_mode_supported_p (enum machine_mode mode)
17823 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17824 return true;
17825 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17826 return true;
17827 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17828 return true;
17829 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17830 return true;
17831 return false;
17834 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17836 We do this in the new i386 backend to maintain source compatibility
17837 with the old cc0-based compiler. */
17839 static tree
17840 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
17841 tree inputs ATTRIBUTE_UNUSED,
17842 tree clobbers)
17844 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
17845 clobbers);
17846 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
17847 clobbers);
17848 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
17849 clobbers);
17850 return clobbers;
17853 /* Return true if this goes in small data/bss. */
17855 static bool
17856 ix86_in_large_data_p (tree exp)
17858 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
17859 return false;
17861 /* Functions are never large data. */
17862 if (TREE_CODE (exp) == FUNCTION_DECL)
17863 return false;
17865 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
17867 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
17868 if (strcmp (section, ".ldata") == 0
17869 || strcmp (section, ".lbss") == 0)
17870 return true;
17871 return false;
17873 else
17875 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
17877 /* If this is an incomplete type with size 0, then we can't put it
17878 in data because it might be too big when completed. */
17879 if (!size || size > ix86_section_threshold)
17880 return true;
17883 return false;
17885 static void
17886 ix86_encode_section_info (tree decl, rtx rtl, int first)
17888 default_encode_section_info (decl, rtl, first);
17890 if (TREE_CODE (decl) == VAR_DECL
17891 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
17892 && ix86_in_large_data_p (decl))
17893 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
17896 /* Worker function for REVERSE_CONDITION. */
17898 enum rtx_code
17899 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
17901 return (mode != CCFPmode && mode != CCFPUmode
17902 ? reverse_condition (code)
17903 : reverse_condition_maybe_unordered (code));
17906 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17907 to OPERANDS[0]. */
17909 const char *
17910 output_387_reg_move (rtx insn, rtx *operands)
17912 if (REG_P (operands[1])
17913 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17915 if (REGNO (operands[0]) == FIRST_STACK_REG
17916 && TARGET_USE_FFREEP)
17917 return "ffreep\t%y0";
17918 return "fstp\t%y0";
17920 if (STACK_TOP_P (operands[0]))
17921 return "fld%z1\t%y1";
17922 return "fst\t%y0";
17925 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17926 FP status register is set. */
17928 void
17929 ix86_emit_fp_unordered_jump (rtx label)
17931 rtx reg = gen_reg_rtx (HImode);
17932 rtx temp;
17934 emit_insn (gen_x86_fnstsw_1 (reg));
17936 if (TARGET_USE_SAHF)
17938 emit_insn (gen_x86_sahf_1 (reg));
17940 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17941 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17943 else
17945 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17947 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17948 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17951 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17952 gen_rtx_LABEL_REF (VOIDmode, label),
17953 pc_rtx);
17954 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17955 emit_jump_insn (temp);
17958 /* Output code to perform a log1p XFmode calculation. */
17960 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17962 rtx label1 = gen_label_rtx ();
17963 rtx label2 = gen_label_rtx ();
17965 rtx tmp = gen_reg_rtx (XFmode);
17966 rtx tmp2 = gen_reg_rtx (XFmode);
17968 emit_insn (gen_absxf2 (tmp, op1));
17969 emit_insn (gen_cmpxf (tmp,
17970 CONST_DOUBLE_FROM_REAL_VALUE (
17971 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17972 XFmode)));
17973 emit_jump_insn (gen_bge (label1));
17975 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17976 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17977 emit_jump (label2);
17979 emit_label (label1);
17980 emit_move_insn (tmp, CONST1_RTX (XFmode));
17981 emit_insn (gen_addxf3 (tmp, op1, tmp));
17982 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17983 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17985 emit_label (label2);
17988 /* Solaris named-section hook. Parameters are as for
17989 named_section_real. */
17991 static void
17992 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17993 tree decl)
17995 /* With Binutils 2.15, the "@unwind" marker must be specified on
17996 every occurrence of the ".eh_frame" section, not just the first
17997 one. */
17998 if (TARGET_64BIT
17999 && strcmp (name, ".eh_frame") == 0)
18001 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18002 flags & SECTION_WRITE ? "aw" : "a");
18003 return;
18005 default_elf_asm_named_section (name, flags, decl);
18008 /* Return the mangling of TYPE if it is an extended fundamental type. */
18010 static const char *
18011 ix86_mangle_fundamental_type (tree type)
18013 switch (TYPE_MODE (type))
18015 case TFmode:
18016 /* __float128 is "g". */
18017 return "g";
18018 case XFmode:
18019 /* "long double" or __float80 is "e". */
18020 return "e";
18021 default:
18022 return NULL;
18026 /* For 32-bit code we can save PIC register setup by using
18027 __stack_chk_fail_local hidden function instead of calling
18028 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18029 register, so it is better to call __stack_chk_fail directly. */
18031 static tree
18032 ix86_stack_protect_fail (void)
18034 return TARGET_64BIT
18035 ? default_external_stack_protect_fail ()
18036 : default_hidden_stack_protect_fail ();
18039 /* Select a format to encode pointers in exception handling data. CODE
18040 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18041 true if the symbol may be affected by dynamic relocations.
18043 ??? All x86 object file formats are capable of representing this.
18044 After all, the relocation needed is the same as for the call insn.
18045 Whether or not a particular assembler allows us to enter such, I
18046 guess we'll have to see. */
18048 asm_preferred_eh_data_format (int code, int global)
18050 if (flag_pic)
18052 int type = DW_EH_PE_sdata8;
18053 if (!TARGET_64BIT
18054 || ix86_cmodel == CM_SMALL_PIC
18055 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18056 type = DW_EH_PE_sdata4;
18057 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18059 if (ix86_cmodel == CM_SMALL
18060 || (ix86_cmodel == CM_MEDIUM && code))
18061 return DW_EH_PE_udata4;
18062 return DW_EH_PE_absptr;
18065 #include "gt-i386.h"