* cfg.c, tree-complex.c, config/frv/frv.c, config/i386/i386.c:
[official-gcc.git] / gcc / config / i386 / i386.c
blob591b8b6a7aaf358cb048ddafa682a93a2c923257
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
539 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
540 const int x86_use_mov0 = m_K6;
541 const int x86_use_cltd = ~(m_PENT | m_K6);
542 const int x86_read_modify_write = ~m_PENT;
543 const int x86_read_modify = ~(m_PENT | m_PPRO);
544 const int x86_split_long_moves = m_PPRO;
545 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
546 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
547 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
548 const int x86_qimode_math = ~(0);
549 const int x86_promote_qi_regs = 0;
550 const int x86_himode_math = ~(m_PPRO);
551 const int x86_promote_hi_regs = m_PPRO;
552 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
553 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
556 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
557 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
560 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
563 const int x86_shift1 = ~m_486;
564 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
565 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
566 /* Set for machines where the type and dependencies are resolved on SSE
567 register parts instead of whole registers, so we may maintain just
568 lower part of scalar values in proper format leaving the upper part
569 undefined. */
570 const int x86_sse_split_regs = m_ATHLON_K8;
571 const int x86_sse_typeless_stores = m_ATHLON_K8;
572 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
573 const int x86_use_ffreep = m_ATHLON_K8;
574 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
577 integer data in xmm registers. Which results in pretty abysmal code. */
578 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
580 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
581 /* Some CPU cores are not able to predict more than 4 branch instructions in
582 the 16 byte window. */
583 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
584 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
585 const int x86_use_bt = m_ATHLON_K8;
586 /* Compare and exchange was added for 80486. */
587 const int x86_cmpxchg = ~m_386;
588 /* Exchange and add was added for 80486. */
589 const int x86_xadd = ~m_386;
591 /* In case the average insn count for single function invocation is
592 lower than this constant, emit fast (but longer) prologue and
593 epilogue code. */
594 #define FAST_PROLOGUE_INSN_COUNT 20
596 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
597 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
598 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
599 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
601 /* Array of the smallest class containing reg number REGNO, indexed by
602 REGNO. Used by REGNO_REG_CLASS in i386.h. */
604 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
606 /* ax, dx, cx, bx */
607 AREG, DREG, CREG, BREG,
608 /* si, di, bp, sp */
609 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
610 /* FP registers */
611 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
612 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
613 /* arg pointer */
614 NON_Q_REGS,
615 /* flags, fpsr, dirflag, frame */
616 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
617 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
618 SSE_REGS, SSE_REGS,
619 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
620 MMX_REGS, MMX_REGS,
621 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
624 SSE_REGS, SSE_REGS,
627 /* The "default" register map used in 32bit mode. */
629 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
631 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
632 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
633 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
634 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
635 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
636 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
640 static int const x86_64_int_parameter_registers[6] =
642 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
643 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
646 static int const x86_64_int_return_registers[4] =
648 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
651 /* The "default" register map used in 64bit mode. */
652 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
654 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
655 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
656 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
657 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
658 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
659 8,9,10,11,12,13,14,15, /* extended integer registers */
660 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
663 /* Define the register numbers to be used in Dwarf debugging information.
664 The SVR4 reference port C compiler uses the following register numbers
665 in its Dwarf output code:
666 0 for %eax (gcc regno = 0)
667 1 for %ecx (gcc regno = 2)
668 2 for %edx (gcc regno = 1)
669 3 for %ebx (gcc regno = 3)
670 4 for %esp (gcc regno = 7)
671 5 for %ebp (gcc regno = 6)
672 6 for %esi (gcc regno = 4)
673 7 for %edi (gcc regno = 5)
674 The following three DWARF register numbers are never generated by
675 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
676 believes these numbers have these meanings.
677 8 for %eip (no gcc equivalent)
678 9 for %eflags (gcc regno = 17)
679 10 for %trapno (no gcc equivalent)
680 It is not at all clear how we should number the FP stack registers
681 for the x86 architecture. If the version of SDB on x86/svr4 were
682 a bit less brain dead with respect to floating-point then we would
683 have a precedent to follow with respect to DWARF register numbers
684 for x86 FP registers, but the SDB on x86/svr4 is so completely
685 broken with respect to FP registers that it is hardly worth thinking
686 of it as something to strive for compatibility with.
687 The version of x86/svr4 SDB I have at the moment does (partially)
688 seem to believe that DWARF register number 11 is associated with
689 the x86 register %st(0), but that's about all. Higher DWARF
690 register numbers don't seem to be associated with anything in
691 particular, and even for DWARF regno 11, SDB only seems to under-
692 stand that it should say that a variable lives in %st(0) (when
693 asked via an `=' command) if we said it was in DWARF regno 11,
694 but SDB still prints garbage when asked for the value of the
695 variable in question (via a `/' command).
696 (Also note that the labels SDB prints for various FP stack regs
697 when doing an `x' command are all wrong.)
698 Note that these problems generally don't affect the native SVR4
699 C compiler because it doesn't allow the use of -O with -g and
700 because when it is *not* optimizing, it allocates a memory
701 location for each floating-point variable, and the memory
702 location is what gets described in the DWARF AT_location
703 attribute for the variable in question.
704 Regardless of the severe mental illness of the x86/svr4 SDB, we
705 do something sensible here and we use the following DWARF
706 register numbers. Note that these are all stack-top-relative
707 numbers.
708 11 for %st(0) (gcc regno = 8)
709 12 for %st(1) (gcc regno = 9)
710 13 for %st(2) (gcc regno = 10)
711 14 for %st(3) (gcc regno = 11)
712 15 for %st(4) (gcc regno = 12)
713 16 for %st(5) (gcc regno = 13)
714 17 for %st(6) (gcc regno = 14)
715 18 for %st(7) (gcc regno = 15)
717 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
719 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
720 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
721 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
722 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
723 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
724 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
728 /* Test and compare insns in i386.md store the information needed to
729 generate branch and scc insns here. */
731 rtx ix86_compare_op0 = NULL_RTX;
732 rtx ix86_compare_op1 = NULL_RTX;
733 rtx ix86_compare_emitted = NULL_RTX;
735 /* Size of the register save area. */
736 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
738 /* Define the structure for the machine field in struct function. */
740 struct stack_local_entry GTY(())
742 unsigned short mode;
743 unsigned short n;
744 rtx rtl;
745 struct stack_local_entry *next;
748 /* Structure describing stack frame layout.
749 Stack grows downward:
751 [arguments]
752 <- ARG_POINTER
753 saved pc
755 saved frame pointer if frame_pointer_needed
756 <- HARD_FRAME_POINTER
757 [saved regs]
759 [padding1] \
761 [va_arg registers] (
762 > to_allocate <- FRAME_POINTER
763 [frame] (
765 [padding2] /
767 struct ix86_frame
769 int nregs;
770 int padding1;
771 int va_arg_size;
772 HOST_WIDE_INT frame;
773 int padding2;
774 int outgoing_arguments_size;
775 int red_zone_size;
777 HOST_WIDE_INT to_allocate;
778 /* The offsets relative to ARG_POINTER. */
779 HOST_WIDE_INT frame_pointer_offset;
780 HOST_WIDE_INT hard_frame_pointer_offset;
781 HOST_WIDE_INT stack_pointer_offset;
783 /* When save_regs_using_mov is set, emit prologue using
784 move instead of push instructions. */
785 bool save_regs_using_mov;
788 /* Code model option. */
789 enum cmodel ix86_cmodel;
790 /* Asm dialect. */
791 enum asm_dialect ix86_asm_dialect = ASM_ATT;
792 /* TLS dialext. */
793 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
795 /* Which unit we are generating floating point math for. */
796 enum fpmath_unit ix86_fpmath;
798 /* Which cpu are we scheduling for. */
799 enum processor_type ix86_tune;
800 /* Which instruction set architecture to use. */
801 enum processor_type ix86_arch;
803 /* true if sse prefetch instruction is not NOOP. */
804 int x86_prefetch_sse;
806 /* ix86_regparm_string as a number */
807 static int ix86_regparm;
809 /* Preferred alignment for stack boundary in bits. */
810 unsigned int ix86_preferred_stack_boundary;
812 /* Values 1-5: see jump.c */
813 int ix86_branch_cost;
815 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
816 char internal_label_prefix[16];
817 int internal_label_prefix_len;
819 static bool ix86_handle_option (size_t, const char *, int);
820 static void output_pic_addr_const (FILE *, rtx, int);
821 static void put_condition_code (enum rtx_code, enum machine_mode,
822 int, int, FILE *);
823 static const char *get_some_local_dynamic_name (void);
824 static int get_some_local_dynamic_name_1 (rtx *, void *);
825 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
826 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
827 rtx *);
828 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
829 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
830 enum machine_mode);
831 static rtx get_thread_pointer (int);
832 static rtx legitimize_tls_address (rtx, enum tls_model, int);
833 static void get_pc_thunk_name (char [32], unsigned int);
834 static rtx gen_push (rtx);
835 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
836 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
837 static struct machine_function * ix86_init_machine_status (void);
838 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
839 static int ix86_nsaved_regs (void);
840 static void ix86_emit_save_regs (void);
841 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
842 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
843 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
844 static HOST_WIDE_INT ix86_GOT_alias_set (void);
845 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
846 static rtx ix86_expand_aligntest (rtx, int);
847 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
848 static int ix86_issue_rate (void);
849 static int ix86_adjust_cost (rtx, rtx, rtx, int);
850 static int ia32_multipass_dfa_lookahead (void);
851 static void ix86_init_mmx_sse_builtins (void);
852 static rtx x86_this_parameter (tree);
853 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
854 HOST_WIDE_INT, tree);
855 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
856 static void x86_file_start (void);
857 static void ix86_reorg (void);
858 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
859 static tree ix86_build_builtin_va_list (void);
860 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
861 tree, int *, int);
862 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
863 static bool ix86_vector_mode_supported_p (enum machine_mode);
865 static int ix86_address_cost (rtx);
866 static bool ix86_cannot_force_const_mem (rtx);
867 static rtx ix86_delegitimize_address (rtx);
869 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
871 struct builtin_description;
872 static rtx ix86_expand_sse_comi (const struct builtin_description *,
873 tree, rtx);
874 static rtx ix86_expand_sse_compare (const struct builtin_description *,
875 tree, rtx);
876 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
877 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
878 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
879 static rtx ix86_expand_store_builtin (enum insn_code, tree);
880 static rtx safe_vector_operand (rtx, enum machine_mode);
881 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
882 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
883 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
884 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
885 static int ix86_fp_comparison_cost (enum rtx_code code);
886 static unsigned int ix86_select_alt_pic_regnum (void);
887 static int ix86_save_reg (unsigned int, int);
888 static void ix86_compute_frame_layout (struct ix86_frame *);
889 static int ix86_comp_type_attributes (tree, tree);
890 static int ix86_function_regparm (tree, tree);
891 const struct attribute_spec ix86_attribute_table[];
892 static bool ix86_function_ok_for_sibcall (tree, tree);
893 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
894 static int ix86_value_regno (enum machine_mode, tree, tree);
895 static bool contains_128bit_aligned_vector_p (tree);
896 static rtx ix86_struct_value_rtx (tree, int);
897 static bool ix86_ms_bitfield_layout_p (tree);
898 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
899 static int extended_reg_mentioned_1 (rtx *, void *);
900 static bool ix86_rtx_costs (rtx, int, int, int *);
901 static int min_insn_size (rtx);
902 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
903 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
904 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
905 tree, bool);
906 static void ix86_init_builtins (void);
907 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
908 static const char *ix86_mangle_fundamental_type (tree);
909 static tree ix86_stack_protect_fail (void);
911 /* This function is only used on Solaris. */
912 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
913 ATTRIBUTE_UNUSED;
915 /* Register class used for passing given 64bit part of the argument.
916 These represent classes as documented by the PS ABI, with the exception
917 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
918 use SF or DFmode move instead of DImode to avoid reformatting penalties.
920 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
921 whenever possible (upper half does contain padding).
923 enum x86_64_reg_class
925 X86_64_NO_CLASS,
926 X86_64_INTEGER_CLASS,
927 X86_64_INTEGERSI_CLASS,
928 X86_64_SSE_CLASS,
929 X86_64_SSESF_CLASS,
930 X86_64_SSEDF_CLASS,
931 X86_64_SSEUP_CLASS,
932 X86_64_X87_CLASS,
933 X86_64_X87UP_CLASS,
934 X86_64_COMPLEX_X87_CLASS,
935 X86_64_MEMORY_CLASS
937 static const char * const x86_64_reg_class_name[] = {
938 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
939 "sseup", "x87", "x87up", "cplx87", "no"
942 #define MAX_CLASSES 4
944 /* Table of constants used by fldpi, fldln2, etc.... */
945 static REAL_VALUE_TYPE ext_80387_constants_table [5];
946 static bool ext_80387_constants_init = 0;
947 static void init_ext_80387_constants (void);
949 /* Initialize the GCC target structure. */
950 #undef TARGET_ATTRIBUTE_TABLE
951 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
952 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
953 # undef TARGET_MERGE_DECL_ATTRIBUTES
954 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
955 #endif
957 #undef TARGET_COMP_TYPE_ATTRIBUTES
958 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
960 #undef TARGET_INIT_BUILTINS
961 #define TARGET_INIT_BUILTINS ix86_init_builtins
962 #undef TARGET_EXPAND_BUILTIN
963 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
965 #undef TARGET_ASM_FUNCTION_EPILOGUE
966 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
968 #undef TARGET_ASM_OPEN_PAREN
969 #define TARGET_ASM_OPEN_PAREN ""
970 #undef TARGET_ASM_CLOSE_PAREN
971 #define TARGET_ASM_CLOSE_PAREN ""
973 #undef TARGET_ASM_ALIGNED_HI_OP
974 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
975 #undef TARGET_ASM_ALIGNED_SI_OP
976 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
977 #ifdef ASM_QUAD
978 #undef TARGET_ASM_ALIGNED_DI_OP
979 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
980 #endif
982 #undef TARGET_ASM_UNALIGNED_HI_OP
983 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
984 #undef TARGET_ASM_UNALIGNED_SI_OP
985 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
986 #undef TARGET_ASM_UNALIGNED_DI_OP
987 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
989 #undef TARGET_SCHED_ADJUST_COST
990 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
991 #undef TARGET_SCHED_ISSUE_RATE
992 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
993 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
994 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
995 ia32_multipass_dfa_lookahead
997 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
998 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1000 #ifdef HAVE_AS_TLS
1001 #undef TARGET_HAVE_TLS
1002 #define TARGET_HAVE_TLS true
1003 #endif
1004 #undef TARGET_CANNOT_FORCE_CONST_MEM
1005 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1007 #undef TARGET_DELEGITIMIZE_ADDRESS
1008 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1010 #undef TARGET_MS_BITFIELD_LAYOUT_P
1011 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1013 #if TARGET_MACHO
1014 #undef TARGET_BINDS_LOCAL_P
1015 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1016 #endif
1018 #undef TARGET_ASM_OUTPUT_MI_THUNK
1019 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1020 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1021 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1023 #undef TARGET_ASM_FILE_START
1024 #define TARGET_ASM_FILE_START x86_file_start
1026 #undef TARGET_DEFAULT_TARGET_FLAGS
1027 #define TARGET_DEFAULT_TARGET_FLAGS \
1028 (TARGET_DEFAULT \
1029 | TARGET_64BIT_DEFAULT \
1030 | TARGET_SUBTARGET_DEFAULT \
1031 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1033 #undef TARGET_HANDLE_OPTION
1034 #define TARGET_HANDLE_OPTION ix86_handle_option
1036 #undef TARGET_RTX_COSTS
1037 #define TARGET_RTX_COSTS ix86_rtx_costs
1038 #undef TARGET_ADDRESS_COST
1039 #define TARGET_ADDRESS_COST ix86_address_cost
1041 #undef TARGET_FIXED_CONDITION_CODE_REGS
1042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043 #undef TARGET_CC_MODES_COMPATIBLE
1044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046 #undef TARGET_MACHINE_DEPENDENT_REORG
1047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049 #undef TARGET_BUILD_BUILTIN_VA_LIST
1050 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052 #undef TARGET_MD_ASM_CLOBBERS
1053 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055 #undef TARGET_PROMOTE_PROTOTYPES
1056 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1057 #undef TARGET_STRUCT_VALUE_RTX
1058 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1059 #undef TARGET_SETUP_INCOMING_VARARGS
1060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1061 #undef TARGET_MUST_PASS_IN_STACK
1062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1063 #undef TARGET_PASS_BY_REFERENCE
1064 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1066 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1070 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1072 #ifdef HAVE_AS_TLS
1073 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1074 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1075 #endif
1077 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1078 #undef TARGET_INSERT_ATTRIBUTES
1079 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1080 #endif
1082 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1083 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1085 #undef TARGET_STACK_PROTECT_FAIL
1086 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1088 #undef TARGET_FUNCTION_VALUE
1089 #define TARGET_FUNCTION_VALUE ix86_function_value
1091 struct gcc_target targetm = TARGET_INITIALIZER;
1094 /* The svr4 ABI for the i386 says that records and unions are returned
1095 in memory. */
1096 #ifndef DEFAULT_PCC_STRUCT_RETURN
1097 #define DEFAULT_PCC_STRUCT_RETURN 1
1098 #endif
1100 /* Implement TARGET_HANDLE_OPTION. */
1102 static bool
1103 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1105 switch (code)
1107 case OPT_m3dnow:
1108 if (!value)
1110 target_flags &= ~MASK_3DNOW_A;
1111 target_flags_explicit |= MASK_3DNOW_A;
1113 return true;
1115 case OPT_mmmx:
1116 if (!value)
1118 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1119 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1121 return true;
1123 case OPT_msse:
1124 if (!value)
1126 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1127 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1129 return true;
1131 case OPT_msse2:
1132 if (!value)
1134 target_flags &= ~MASK_SSE3;
1135 target_flags_explicit |= MASK_SSE3;
1137 return true;
1139 default:
1140 return true;
1144 /* Sometimes certain combinations of command options do not make
1145 sense on a particular target machine. You can define a macro
1146 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1147 defined, is executed once just after all the command options have
1148 been parsed.
1150 Don't use this macro to turn on various extra optimizations for
1151 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1153 void
1154 override_options (void)
1156 int i;
1157 int ix86_tune_defaulted = 0;
1159 /* Comes from final.c -- no real reason to change it. */
1160 #define MAX_CODE_ALIGN 16
1162 static struct ptt
1164 const struct processor_costs *cost; /* Processor costs */
1165 const int target_enable; /* Target flags to enable. */
1166 const int target_disable; /* Target flags to disable. */
1167 const int align_loop; /* Default alignments. */
1168 const int align_loop_max_skip;
1169 const int align_jump;
1170 const int align_jump_max_skip;
1171 const int align_func;
1173 const processor_target_table[PROCESSOR_max] =
1175 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1176 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1177 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1178 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1179 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1180 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1181 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1182 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1183 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1186 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1187 static struct pta
1189 const char *const name; /* processor name or nickname. */
1190 const enum processor_type processor;
1191 const enum pta_flags
1193 PTA_SSE = 1,
1194 PTA_SSE2 = 2,
1195 PTA_SSE3 = 4,
1196 PTA_MMX = 8,
1197 PTA_PREFETCH_SSE = 16,
1198 PTA_3DNOW = 32,
1199 PTA_3DNOW_A = 64,
1200 PTA_64BIT = 128
1201 } flags;
1203 const processor_alias_table[] =
1205 {"i386", PROCESSOR_I386, 0},
1206 {"i486", PROCESSOR_I486, 0},
1207 {"i586", PROCESSOR_PENTIUM, 0},
1208 {"pentium", PROCESSOR_PENTIUM, 0},
1209 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1210 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1211 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1212 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1213 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1214 {"i686", PROCESSOR_PENTIUMPRO, 0},
1215 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1216 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1217 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1218 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1219 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1220 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1221 | PTA_MMX | PTA_PREFETCH_SSE},
1222 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1223 | PTA_MMX | PTA_PREFETCH_SSE},
1224 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1225 | PTA_MMX | PTA_PREFETCH_SSE},
1226 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1227 | PTA_MMX | PTA_PREFETCH_SSE},
1228 {"k6", PROCESSOR_K6, PTA_MMX},
1229 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1230 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1231 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1232 | PTA_3DNOW_A},
1233 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1234 | PTA_3DNOW | PTA_3DNOW_A},
1235 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1236 | PTA_3DNOW_A | PTA_SSE},
1237 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1238 | PTA_3DNOW_A | PTA_SSE},
1239 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1240 | PTA_3DNOW_A | PTA_SSE},
1241 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1242 | PTA_SSE | PTA_SSE2 },
1243 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1244 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1245 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1246 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1247 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1248 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1249 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1250 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1253 int const pta_size = ARRAY_SIZE (processor_alias_table);
1255 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1256 SUBTARGET_OVERRIDE_OPTIONS;
1257 #endif
1259 /* Set the default values for switches whose default depends on TARGET_64BIT
1260 in case they weren't overwritten by command line options. */
1261 if (TARGET_64BIT)
1263 if (flag_omit_frame_pointer == 2)
1264 flag_omit_frame_pointer = 1;
1265 if (flag_asynchronous_unwind_tables == 2)
1266 flag_asynchronous_unwind_tables = 1;
1267 if (flag_pcc_struct_return == 2)
1268 flag_pcc_struct_return = 0;
1270 else
1272 if (flag_omit_frame_pointer == 2)
1273 flag_omit_frame_pointer = 0;
1274 if (flag_asynchronous_unwind_tables == 2)
1275 flag_asynchronous_unwind_tables = 0;
1276 if (flag_pcc_struct_return == 2)
1277 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1280 if (!ix86_tune_string && ix86_arch_string)
1281 ix86_tune_string = ix86_arch_string;
1282 if (!ix86_tune_string)
1284 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1285 ix86_tune_defaulted = 1;
1287 if (!ix86_arch_string)
1288 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1290 if (ix86_cmodel_string != 0)
1292 if (!strcmp (ix86_cmodel_string, "small"))
1293 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1294 else if (flag_pic)
1295 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1296 else if (!strcmp (ix86_cmodel_string, "32"))
1297 ix86_cmodel = CM_32;
1298 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1299 ix86_cmodel = CM_KERNEL;
1300 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1301 ix86_cmodel = CM_MEDIUM;
1302 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1303 ix86_cmodel = CM_LARGE;
1304 else
1305 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1307 else
1309 ix86_cmodel = CM_32;
1310 if (TARGET_64BIT)
1311 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1313 if (ix86_asm_string != 0)
1315 if (!strcmp (ix86_asm_string, "intel"))
1316 ix86_asm_dialect = ASM_INTEL;
1317 else if (!strcmp (ix86_asm_string, "att"))
1318 ix86_asm_dialect = ASM_ATT;
1319 else
1320 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1322 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1323 error ("code model %qs not supported in the %s bit mode",
1324 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1325 if (ix86_cmodel == CM_LARGE)
1326 sorry ("code model %<large%> not supported yet");
1327 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1328 sorry ("%i-bit mode not compiled in",
1329 (target_flags & MASK_64BIT) ? 64 : 32);
1331 for (i = 0; i < pta_size; i++)
1332 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1334 ix86_arch = processor_alias_table[i].processor;
1335 /* Default cpu tuning to the architecture. */
1336 ix86_tune = ix86_arch;
1337 if (processor_alias_table[i].flags & PTA_MMX
1338 && !(target_flags_explicit & MASK_MMX))
1339 target_flags |= MASK_MMX;
1340 if (processor_alias_table[i].flags & PTA_3DNOW
1341 && !(target_flags_explicit & MASK_3DNOW))
1342 target_flags |= MASK_3DNOW;
1343 if (processor_alias_table[i].flags & PTA_3DNOW_A
1344 && !(target_flags_explicit & MASK_3DNOW_A))
1345 target_flags |= MASK_3DNOW_A;
1346 if (processor_alias_table[i].flags & PTA_SSE
1347 && !(target_flags_explicit & MASK_SSE))
1348 target_flags |= MASK_SSE;
1349 if (processor_alias_table[i].flags & PTA_SSE2
1350 && !(target_flags_explicit & MASK_SSE2))
1351 target_flags |= MASK_SSE2;
1352 if (processor_alias_table[i].flags & PTA_SSE3
1353 && !(target_flags_explicit & MASK_SSE3))
1354 target_flags |= MASK_SSE3;
1355 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1356 x86_prefetch_sse = true;
1357 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1358 error ("CPU you selected does not support x86-64 "
1359 "instruction set");
1360 break;
1363 if (i == pta_size)
1364 error ("bad value (%s) for -march= switch", ix86_arch_string);
1366 for (i = 0; i < pta_size; i++)
1367 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1369 ix86_tune = processor_alias_table[i].processor;
1370 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1372 if (ix86_tune_defaulted)
1374 ix86_tune_string = "x86-64";
1375 for (i = 0; i < pta_size; i++)
1376 if (! strcmp (ix86_tune_string,
1377 processor_alias_table[i].name))
1378 break;
1379 ix86_tune = processor_alias_table[i].processor;
1381 else
1382 error ("CPU you selected does not support x86-64 "
1383 "instruction set");
1385 /* Intel CPUs have always interpreted SSE prefetch instructions as
1386 NOPs; so, we can enable SSE prefetch instructions even when
1387 -mtune (rather than -march) points us to a processor that has them.
1388 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1389 higher processors. */
1390 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1391 x86_prefetch_sse = true;
1392 break;
1394 if (i == pta_size)
1395 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1397 if (optimize_size)
1398 ix86_cost = &size_cost;
1399 else
1400 ix86_cost = processor_target_table[ix86_tune].cost;
1401 target_flags |= processor_target_table[ix86_tune].target_enable;
1402 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1404 /* Arrange to set up i386_stack_locals for all functions. */
1405 init_machine_status = ix86_init_machine_status;
1407 /* Validate -mregparm= value. */
1408 if (ix86_regparm_string)
1410 i = atoi (ix86_regparm_string);
1411 if (i < 0 || i > REGPARM_MAX)
1412 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1413 else
1414 ix86_regparm = i;
1416 else
1417 if (TARGET_64BIT)
1418 ix86_regparm = REGPARM_MAX;
1420 /* If the user has provided any of the -malign-* options,
1421 warn and use that value only if -falign-* is not set.
1422 Remove this code in GCC 3.2 or later. */
1423 if (ix86_align_loops_string)
1425 warning (0, "-malign-loops is obsolete, use -falign-loops");
1426 if (align_loops == 0)
1428 i = atoi (ix86_align_loops_string);
1429 if (i < 0 || i > MAX_CODE_ALIGN)
1430 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1431 else
1432 align_loops = 1 << i;
1436 if (ix86_align_jumps_string)
1438 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1439 if (align_jumps == 0)
1441 i = atoi (ix86_align_jumps_string);
1442 if (i < 0 || i > MAX_CODE_ALIGN)
1443 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1444 else
1445 align_jumps = 1 << i;
1449 if (ix86_align_funcs_string)
1451 warning (0, "-malign-functions is obsolete, use -falign-functions");
1452 if (align_functions == 0)
1454 i = atoi (ix86_align_funcs_string);
1455 if (i < 0 || i > MAX_CODE_ALIGN)
1456 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1457 else
1458 align_functions = 1 << i;
1462 /* Default align_* from the processor table. */
1463 if (align_loops == 0)
1465 align_loops = processor_target_table[ix86_tune].align_loop;
1466 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1468 if (align_jumps == 0)
1470 align_jumps = processor_target_table[ix86_tune].align_jump;
1471 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1473 if (align_functions == 0)
1475 align_functions = processor_target_table[ix86_tune].align_func;
1478 /* Validate -mpreferred-stack-boundary= value, or provide default.
1479 The default of 128 bits is for Pentium III's SSE __m128, but we
1480 don't want additional code to keep the stack aligned when
1481 optimizing for code size. */
1482 ix86_preferred_stack_boundary = (optimize_size
1483 ? TARGET_64BIT ? 128 : 32
1484 : 128);
1485 if (ix86_preferred_stack_boundary_string)
1487 i = atoi (ix86_preferred_stack_boundary_string);
1488 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1489 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1490 TARGET_64BIT ? 4 : 2);
1491 else
1492 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1495 /* Validate -mbranch-cost= value, or provide default. */
1496 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1497 if (ix86_branch_cost_string)
1499 i = atoi (ix86_branch_cost_string);
1500 if (i < 0 || i > 5)
1501 error ("-mbranch-cost=%d is not between 0 and 5", i);
1502 else
1503 ix86_branch_cost = i;
1506 if (ix86_tls_dialect_string)
1508 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1509 ix86_tls_dialect = TLS_DIALECT_GNU;
1510 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1511 ix86_tls_dialect = TLS_DIALECT_SUN;
1512 else
1513 error ("bad value (%s) for -mtls-dialect= switch",
1514 ix86_tls_dialect_string);
1517 /* Keep nonleaf frame pointers. */
1518 if (flag_omit_frame_pointer)
1519 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1520 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1521 flag_omit_frame_pointer = 1;
1523 /* If we're doing fast math, we don't care about comparison order
1524 wrt NaNs. This lets us use a shorter comparison sequence. */
1525 if (flag_unsafe_math_optimizations)
1526 target_flags &= ~MASK_IEEE_FP;
1528 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1529 since the insns won't need emulation. */
1530 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1531 target_flags &= ~MASK_NO_FANCY_MATH_387;
1533 /* Likewise, if the target doesn't have a 387, or we've specified
1534 software floating point, don't use 387 inline intrinsics. */
1535 if (!TARGET_80387)
1536 target_flags |= MASK_NO_FANCY_MATH_387;
1538 /* Turn on SSE2 builtins for -msse3. */
1539 if (TARGET_SSE3)
1540 target_flags |= MASK_SSE2;
1542 /* Turn on SSE builtins for -msse2. */
1543 if (TARGET_SSE2)
1544 target_flags |= MASK_SSE;
1546 /* Turn on MMX builtins for -msse. */
1547 if (TARGET_SSE)
1549 target_flags |= MASK_MMX & ~target_flags_explicit;
1550 x86_prefetch_sse = true;
1553 /* Turn on MMX builtins for 3Dnow. */
1554 if (TARGET_3DNOW)
1555 target_flags |= MASK_MMX;
1557 if (TARGET_64BIT)
1559 if (TARGET_ALIGN_DOUBLE)
1560 error ("-malign-double makes no sense in the 64bit mode");
1561 if (TARGET_RTD)
1562 error ("-mrtd calling convention not supported in the 64bit mode");
1564 /* Enable by default the SSE and MMX builtins. Do allow the user to
1565 explicitly disable any of these. In particular, disabling SSE and
1566 MMX for kernel code is extremely useful. */
1567 target_flags
1568 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1569 & ~target_flags_explicit);
1571 else
1573 /* i386 ABI does not specify red zone. It still makes sense to use it
1574 when programmer takes care to stack from being destroyed. */
1575 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1576 target_flags |= MASK_NO_RED_ZONE;
1579 /* Accept -msseregparm only if at least SSE support is enabled. */
1580 if (TARGET_SSEREGPARM
1581 && ! TARGET_SSE)
1582 error ("-msseregparm used without SSE enabled");
1584 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1586 if (ix86_fpmath_string != 0)
1588 if (! strcmp (ix86_fpmath_string, "387"))
1589 ix86_fpmath = FPMATH_387;
1590 else if (! strcmp (ix86_fpmath_string, "sse"))
1592 if (!TARGET_SSE)
1594 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1595 ix86_fpmath = FPMATH_387;
1597 else
1598 ix86_fpmath = FPMATH_SSE;
1600 else if (! strcmp (ix86_fpmath_string, "387,sse")
1601 || ! strcmp (ix86_fpmath_string, "sse,387"))
1603 if (!TARGET_SSE)
1605 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1606 ix86_fpmath = FPMATH_387;
1608 else if (!TARGET_80387)
1610 warning (0, "387 instruction set disabled, using SSE arithmetics");
1611 ix86_fpmath = FPMATH_SSE;
1613 else
1614 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1616 else
1617 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1620 /* If the i387 is disabled, then do not return values in it. */
1621 if (!TARGET_80387)
1622 target_flags &= ~MASK_FLOAT_RETURNS;
1624 if ((x86_accumulate_outgoing_args & TUNEMASK)
1625 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1626 && !optimize_size)
1627 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1629 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1631 char *p;
1632 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1633 p = strchr (internal_label_prefix, 'X');
1634 internal_label_prefix_len = p - internal_label_prefix;
1635 *p = '\0';
1638 /* When scheduling description is not available, disable scheduler pass
1639 so it won't slow down the compilation and make x87 code slower. */
1640 if (!TARGET_SCHEDULE)
1641 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1644 void
1645 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1647 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1648 make the problem with not enough registers even worse. */
1649 #ifdef INSN_SCHEDULING
1650 if (level > 1)
1651 flag_schedule_insns = 0;
1652 #endif
1654 if (TARGET_MACHO)
1655 /* The Darwin libraries never set errno, so we might as well
1656 avoid calling them when that's the only reason we would. */
1657 flag_errno_math = 0;
1659 /* The default values of these switches depend on the TARGET_64BIT
1660 that is not known at this moment. Mark these values with 2 and
1661 let user the to override these. In case there is no command line option
1662 specifying them, we will set the defaults in override_options. */
1663 if (optimize >= 1)
1664 flag_omit_frame_pointer = 2;
1665 flag_pcc_struct_return = 2;
1666 flag_asynchronous_unwind_tables = 2;
1667 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1668 SUBTARGET_OPTIMIZATION_OPTIONS;
1669 #endif
1672 /* Table of valid machine attributes. */
1673 const struct attribute_spec ix86_attribute_table[] =
1675 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1676 /* Stdcall attribute says callee is responsible for popping arguments
1677 if they are not variable. */
1678 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1679 /* Fastcall attribute says callee is responsible for popping arguments
1680 if they are not variable. */
1681 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1682 /* Cdecl attribute says the callee is a normal C declaration */
1683 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1684 /* Regparm attribute specifies how many integer arguments are to be
1685 passed in registers. */
1686 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
1687 /* Sseregparm attribute says we are using x86_64 calling conventions
1688 for FP arguments. */
1689 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1690 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1691 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1692 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1693 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1694 #endif
1695 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1696 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1697 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1698 SUBTARGET_ATTRIBUTE_TABLE,
1699 #endif
1700 { NULL, 0, 0, false, false, false, NULL }
1703 /* Decide whether we can make a sibling call to a function. DECL is the
1704 declaration of the function being targeted by the call and EXP is the
1705 CALL_EXPR representing the call. */
1707 static bool
1708 ix86_function_ok_for_sibcall (tree decl, tree exp)
1710 tree func;
1711 rtx a, b;
1713 /* If we are generating position-independent code, we cannot sibcall
1714 optimize any indirect call, or a direct call to a global function,
1715 as the PLT requires %ebx be live. */
1716 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1717 return false;
1719 if (decl)
1720 func = decl;
1721 else
1723 func = TREE_TYPE (TREE_OPERAND (exp, 0));
1724 if (POINTER_TYPE_P (func))
1725 func = TREE_TYPE (func);
1728 /* Check that the return value locations are the same. Like
1729 if we are returning floats on the 80387 register stack, we cannot
1730 make a sibcall from a function that doesn't return a float to a
1731 function that does or, conversely, from a function that does return
1732 a float to a function that doesn't; the necessary stack adjustment
1733 would not be executed. This is also the place we notice
1734 differences in the return value ABI. */
1735 a = ix86_function_value (TREE_TYPE (exp), func, false);
1736 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1737 cfun->decl, false);
1738 if (! rtx_equal_p (a, b))
1739 return false;
1741 /* If this call is indirect, we'll need to be able to use a call-clobbered
1742 register for the address of the target function. Make sure that all
1743 such registers are not used for passing parameters. */
1744 if (!decl && !TARGET_64BIT)
1746 tree type;
1748 /* We're looking at the CALL_EXPR, we need the type of the function. */
1749 type = TREE_OPERAND (exp, 0); /* pointer expression */
1750 type = TREE_TYPE (type); /* pointer type */
1751 type = TREE_TYPE (type); /* function type */
1753 if (ix86_function_regparm (type, NULL) >= 3)
1755 /* ??? Need to count the actual number of registers to be used,
1756 not the possible number of registers. Fix later. */
1757 return false;
1761 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1762 /* Dllimport'd functions are also called indirectly. */
1763 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1764 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1765 return false;
1766 #endif
1768 /* Otherwise okay. That also includes certain types of indirect calls. */
1769 return true;
1772 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
1773 calling convention attributes;
1774 arguments as in struct attribute_spec.handler. */
1776 static tree
1777 ix86_handle_cconv_attribute (tree *node, tree name,
1778 tree args,
1779 int flags ATTRIBUTE_UNUSED,
1780 bool *no_add_attrs)
1782 if (TREE_CODE (*node) != FUNCTION_TYPE
1783 && TREE_CODE (*node) != METHOD_TYPE
1784 && TREE_CODE (*node) != FIELD_DECL
1785 && TREE_CODE (*node) != TYPE_DECL)
1787 warning (OPT_Wattributes, "%qs attribute only applies to functions",
1788 IDENTIFIER_POINTER (name));
1789 *no_add_attrs = true;
1790 return NULL_TREE;
1793 /* Can combine regparm with all attributes but fastcall. */
1794 if (is_attribute_p ("regparm", name))
1796 tree cst;
1798 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1800 error ("fastcall and regparm attributes are not compatible");
1803 cst = TREE_VALUE (args);
1804 if (TREE_CODE (cst) != INTEGER_CST)
1806 warning (OPT_Wattributes,
1807 "%qs attribute requires an integer constant argument",
1808 IDENTIFIER_POINTER (name));
1809 *no_add_attrs = true;
1811 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1813 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
1814 IDENTIFIER_POINTER (name), REGPARM_MAX);
1815 *no_add_attrs = true;
1818 return NULL_TREE;
1821 if (TARGET_64BIT)
1823 warning (OPT_Wattributes, "%qs attribute ignored",
1824 IDENTIFIER_POINTER (name));
1825 *no_add_attrs = true;
1826 return NULL_TREE;
1829 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
1830 if (is_attribute_p ("fastcall", name))
1832 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
1834 error ("fastcall and cdecl attributes are not compatible");
1836 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1838 error ("fastcall and stdcall attributes are not compatible");
1840 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1842 error ("fastcall and regparm attributes are not compatible");
1846 /* Can combine stdcall with fastcall (redundant), regparm and
1847 sseregparm. */
1848 else if (is_attribute_p ("stdcall", name))
1850 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
1852 error ("stdcall and cdecl attributes are not compatible");
1854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1856 error ("stdcall and fastcall attributes are not compatible");
1860 /* Can combine cdecl with regparm and sseregparm. */
1861 else if (is_attribute_p ("cdecl", name))
1863 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1865 error ("stdcall and cdecl attributes are not compatible");
1867 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1869 error ("fastcall and cdecl attributes are not compatible");
1873 /* Can combine sseregparm with all attributes. */
1875 return NULL_TREE;
1878 /* Return 0 if the attributes for two types are incompatible, 1 if they
1879 are compatible, and 2 if they are nearly compatible (which causes a
1880 warning to be generated). */
1882 static int
1883 ix86_comp_type_attributes (tree type1, tree type2)
1885 /* Check for mismatch of non-default calling convention. */
1886 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1888 if (TREE_CODE (type1) != FUNCTION_TYPE)
1889 return 1;
1891 /* Check for mismatched fastcall/regparm types. */
1892 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1893 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1894 || (ix86_function_regparm (type1, NULL)
1895 != ix86_function_regparm (type2, NULL)))
1896 return 0;
1898 /* Check for mismatched sseregparm types. */
1899 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
1900 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
1901 return 0;
1903 /* Check for mismatched return types (cdecl vs stdcall). */
1904 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1905 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1906 return 0;
1908 return 1;
1911 /* Return the regparm value for a function with the indicated TYPE and DECL.
1912 DECL may be NULL when calling function indirectly
1913 or considering a libcall. */
1915 static int
1916 ix86_function_regparm (tree type, tree decl)
1918 tree attr;
1919 int regparm = ix86_regparm;
1920 bool user_convention = false;
1922 if (!TARGET_64BIT)
1924 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1925 if (attr)
1927 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1928 user_convention = true;
1931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1933 regparm = 2;
1934 user_convention = true;
1937 /* Use register calling convention for local functions when possible. */
1938 if (!TARGET_64BIT && !user_convention && decl
1939 && flag_unit_at_a_time && !profile_flag)
1941 struct cgraph_local_info *i = cgraph_local_info (decl);
1942 if (i && i->local)
1944 /* We can't use regparm(3) for nested functions as these use
1945 static chain pointer in third argument. */
1946 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1947 regparm = 2;
1948 else
1949 regparm = 3;
1953 return regparm;
1956 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
1957 in SSE registers for a function with the indicated TYPE and DECL.
1958 DECL may be NULL when calling function indirectly
1959 or considering a libcall. Otherwise return 0. */
1961 static int
1962 ix86_function_sseregparm (tree type, tree decl)
1964 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1965 by the sseregparm attribute. */
1966 if (TARGET_SSEREGPARM
1967 || (type
1968 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1970 if (!TARGET_SSE)
1972 if (decl)
1973 error ("Calling %qD with attribute sseregparm without "
1974 "SSE/SSE2 enabled", decl);
1975 else
1976 error ("Calling %qT with attribute sseregparm without "
1977 "SSE/SSE2 enabled", type);
1978 return 0;
1981 return 2;
1984 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
1985 in SSE registers even for 32-bit mode and not just 3, but up to
1986 8 SSE arguments in registers. */
1987 if (!TARGET_64BIT && decl
1988 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
1990 struct cgraph_local_info *i = cgraph_local_info (decl);
1991 if (i && i->local)
1992 return TARGET_SSE2 ? 2 : 1;
1995 return 0;
1998 /* Return true if EAX is live at the start of the function. Used by
1999 ix86_expand_prologue to determine if we need special help before
2000 calling allocate_stack_worker. */
2002 static bool
2003 ix86_eax_live_at_start_p (void)
2005 /* Cheat. Don't bother working forward from ix86_function_regparm
2006 to the function type to whether an actual argument is located in
2007 eax. Instead just look at cfg info, which is still close enough
2008 to correct at this point. This gives false positives for broken
2009 functions that might use uninitialized data that happens to be
2010 allocated in eax, but who cares? */
2011 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2014 /* Value is the number of bytes of arguments automatically
2015 popped when returning from a subroutine call.
2016 FUNDECL is the declaration node of the function (as a tree),
2017 FUNTYPE is the data type of the function (as a tree),
2018 or for a library call it is an identifier node for the subroutine name.
2019 SIZE is the number of bytes of arguments passed on the stack.
2021 On the 80386, the RTD insn may be used to pop them if the number
2022 of args is fixed, but if the number is variable then the caller
2023 must pop them all. RTD can't be used for library calls now
2024 because the library is compiled with the Unix compiler.
2025 Use of RTD is a selectable option, since it is incompatible with
2026 standard Unix calling sequences. If the option is not selected,
2027 the caller must always pop the args.
2029 The attribute stdcall is equivalent to RTD on a per module basis. */
2032 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2034 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2036 /* Cdecl functions override -mrtd, and never pop the stack. */
2037 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2039 /* Stdcall and fastcall functions will pop the stack if not
2040 variable args. */
2041 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2042 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2043 rtd = 1;
2045 if (rtd
2046 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2047 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2048 == void_type_node)))
2049 return size;
2052 /* Lose any fake structure return argument if it is passed on the stack. */
2053 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2054 && !TARGET_64BIT
2055 && !KEEP_AGGREGATE_RETURN_POINTER)
2057 int nregs = ix86_function_regparm (funtype, fundecl);
2059 if (!nregs)
2060 return GET_MODE_SIZE (Pmode);
2063 return 0;
2066 /* Argument support functions. */
2068 /* Return true when register may be used to pass function parameters. */
2069 bool
2070 ix86_function_arg_regno_p (int regno)
2072 int i;
2073 if (!TARGET_64BIT)
2074 return (regno < REGPARM_MAX
2075 || (TARGET_MMX && MMX_REGNO_P (regno)
2076 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2077 || (TARGET_SSE && SSE_REGNO_P (regno)
2078 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2080 if (TARGET_SSE && SSE_REGNO_P (regno)
2081 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2082 return true;
2083 /* RAX is used as hidden argument to va_arg functions. */
2084 if (!regno)
2085 return true;
2086 for (i = 0; i < REGPARM_MAX; i++)
2087 if (regno == x86_64_int_parameter_registers[i])
2088 return true;
2089 return false;
2092 /* Return if we do not know how to pass TYPE solely in registers. */
2094 static bool
2095 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2097 if (must_pass_in_stack_var_size_or_pad (mode, type))
2098 return true;
2100 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2101 The layout_type routine is crafty and tries to trick us into passing
2102 currently unsupported vector types on the stack by using TImode. */
2103 return (!TARGET_64BIT && mode == TImode
2104 && type && TREE_CODE (type) != VECTOR_TYPE);
2107 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2108 for a call to a function whose data type is FNTYPE.
2109 For a library call, FNTYPE is 0. */
2111 void
2112 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2113 tree fntype, /* tree ptr for function decl */
2114 rtx libname, /* SYMBOL_REF of library name or 0 */
2115 tree fndecl)
2117 static CUMULATIVE_ARGS zero_cum;
2118 tree param, next_param;
2120 if (TARGET_DEBUG_ARG)
2122 fprintf (stderr, "\ninit_cumulative_args (");
2123 if (fntype)
2124 fprintf (stderr, "fntype code = %s, ret code = %s",
2125 tree_code_name[(int) TREE_CODE (fntype)],
2126 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2127 else
2128 fprintf (stderr, "no fntype");
2130 if (libname)
2131 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2134 *cum = zero_cum;
2136 /* Set up the number of registers to use for passing arguments. */
2137 cum->nregs = ix86_regparm;
2138 if (TARGET_SSE)
2139 cum->sse_nregs = SSE_REGPARM_MAX;
2140 if (TARGET_MMX)
2141 cum->mmx_nregs = MMX_REGPARM_MAX;
2142 cum->warn_sse = true;
2143 cum->warn_mmx = true;
2144 cum->maybe_vaarg = false;
2146 /* Use ecx and edx registers if function has fastcall attribute,
2147 else look for regparm information. */
2148 if (fntype && !TARGET_64BIT)
2150 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2152 cum->nregs = 2;
2153 cum->fastcall = 1;
2155 else
2156 cum->nregs = ix86_function_regparm (fntype, fndecl);
2159 /* Set up the number of SSE registers used for passing SFmode
2160 and DFmode arguments. Warn for mismatching ABI. */
2161 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2163 /* Determine if this function has variable arguments. This is
2164 indicated by the last argument being 'void_type_mode' if there
2165 are no variable arguments. If there are variable arguments, then
2166 we won't pass anything in registers in 32-bit mode. */
2168 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2170 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2171 param != 0; param = next_param)
2173 next_param = TREE_CHAIN (param);
2174 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2176 if (!TARGET_64BIT)
2178 cum->nregs = 0;
2179 cum->sse_nregs = 0;
2180 cum->mmx_nregs = 0;
2181 cum->warn_sse = 0;
2182 cum->warn_mmx = 0;
2183 cum->fastcall = 0;
2184 cum->float_in_sse = 0;
2186 cum->maybe_vaarg = true;
2190 if ((!fntype && !libname)
2191 || (fntype && !TYPE_ARG_TYPES (fntype)))
2192 cum->maybe_vaarg = true;
2194 if (TARGET_DEBUG_ARG)
2195 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2197 return;
2200 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2201 But in the case of vector types, it is some vector mode.
2203 When we have only some of our vector isa extensions enabled, then there
2204 are some modes for which vector_mode_supported_p is false. For these
2205 modes, the generic vector support in gcc will choose some non-vector mode
2206 in order to implement the type. By computing the natural mode, we'll
2207 select the proper ABI location for the operand and not depend on whatever
2208 the middle-end decides to do with these vector types. */
2210 static enum machine_mode
2211 type_natural_mode (tree type)
2213 enum machine_mode mode = TYPE_MODE (type);
2215 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2217 HOST_WIDE_INT size = int_size_in_bytes (type);
2218 if ((size == 8 || size == 16)
2219 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2220 && TYPE_VECTOR_SUBPARTS (type) > 1)
2222 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2224 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2225 mode = MIN_MODE_VECTOR_FLOAT;
2226 else
2227 mode = MIN_MODE_VECTOR_INT;
2229 /* Get the mode which has this inner mode and number of units. */
2230 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2231 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2232 && GET_MODE_INNER (mode) == innermode)
2233 return mode;
2235 gcc_unreachable ();
2239 return mode;
2242 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2243 this may not agree with the mode that the type system has chosen for the
2244 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2245 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2247 static rtx
2248 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2249 unsigned int regno)
2251 rtx tmp;
2253 if (orig_mode != BLKmode)
2254 tmp = gen_rtx_REG (orig_mode, regno);
2255 else
2257 tmp = gen_rtx_REG (mode, regno);
2258 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2259 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2262 return tmp;
2265 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2266 of this code is to classify each 8bytes of incoming argument by the register
2267 class and assign registers accordingly. */
2269 /* Return the union class of CLASS1 and CLASS2.
2270 See the x86-64 PS ABI for details. */
2272 static enum x86_64_reg_class
2273 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2275 /* Rule #1: If both classes are equal, this is the resulting class. */
2276 if (class1 == class2)
2277 return class1;
2279 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2280 the other class. */
2281 if (class1 == X86_64_NO_CLASS)
2282 return class2;
2283 if (class2 == X86_64_NO_CLASS)
2284 return class1;
2286 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2287 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2288 return X86_64_MEMORY_CLASS;
2290 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2291 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2292 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2293 return X86_64_INTEGERSI_CLASS;
2294 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2295 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2296 return X86_64_INTEGER_CLASS;
2298 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2299 MEMORY is used. */
2300 if (class1 == X86_64_X87_CLASS
2301 || class1 == X86_64_X87UP_CLASS
2302 || class1 == X86_64_COMPLEX_X87_CLASS
2303 || class2 == X86_64_X87_CLASS
2304 || class2 == X86_64_X87UP_CLASS
2305 || class2 == X86_64_COMPLEX_X87_CLASS)
2306 return X86_64_MEMORY_CLASS;
2308 /* Rule #6: Otherwise class SSE is used. */
2309 return X86_64_SSE_CLASS;
2312 /* Classify the argument of type TYPE and mode MODE.
2313 CLASSES will be filled by the register class used to pass each word
2314 of the operand. The number of words is returned. In case the parameter
2315 should be passed in memory, 0 is returned. As a special case for zero
2316 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2318 BIT_OFFSET is used internally for handling records and specifies offset
2319 of the offset in bits modulo 256 to avoid overflow cases.
2321 See the x86-64 PS ABI for details.
2324 static int
2325 classify_argument (enum machine_mode mode, tree type,
2326 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2328 HOST_WIDE_INT bytes =
2329 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2330 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2332 /* Variable sized entities are always passed/returned in memory. */
2333 if (bytes < 0)
2334 return 0;
2336 if (mode != VOIDmode
2337 && targetm.calls.must_pass_in_stack (mode, type))
2338 return 0;
2340 if (type && AGGREGATE_TYPE_P (type))
2342 int i;
2343 tree field;
2344 enum x86_64_reg_class subclasses[MAX_CLASSES];
2346 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2347 if (bytes > 16)
2348 return 0;
2350 for (i = 0; i < words; i++)
2351 classes[i] = X86_64_NO_CLASS;
2353 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2354 signalize memory class, so handle it as special case. */
2355 if (!words)
2357 classes[0] = X86_64_NO_CLASS;
2358 return 1;
2361 /* Classify each field of record and merge classes. */
2362 switch (TREE_CODE (type))
2364 case RECORD_TYPE:
2365 /* For classes first merge in the field of the subclasses. */
2366 if (TYPE_BINFO (type))
2368 tree binfo, base_binfo;
2369 int basenum;
2371 for (binfo = TYPE_BINFO (type), basenum = 0;
2372 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2374 int num;
2375 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2376 tree type = BINFO_TYPE (base_binfo);
2378 num = classify_argument (TYPE_MODE (type),
2379 type, subclasses,
2380 (offset + bit_offset) % 256);
2381 if (!num)
2382 return 0;
2383 for (i = 0; i < num; i++)
2385 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2386 classes[i + pos] =
2387 merge_classes (subclasses[i], classes[i + pos]);
2391 /* And now merge the fields of structure. */
2392 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2394 if (TREE_CODE (field) == FIELD_DECL)
2396 int num;
2398 /* Bitfields are always classified as integer. Handle them
2399 early, since later code would consider them to be
2400 misaligned integers. */
2401 if (DECL_BIT_FIELD (field))
2403 for (i = int_bit_position (field) / 8 / 8;
2404 i < (int_bit_position (field)
2405 + tree_low_cst (DECL_SIZE (field), 0)
2406 + 63) / 8 / 8; i++)
2407 classes[i] =
2408 merge_classes (X86_64_INTEGER_CLASS,
2409 classes[i]);
2411 else
2413 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2414 TREE_TYPE (field), subclasses,
2415 (int_bit_position (field)
2416 + bit_offset) % 256);
2417 if (!num)
2418 return 0;
2419 for (i = 0; i < num; i++)
2421 int pos =
2422 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2423 classes[i + pos] =
2424 merge_classes (subclasses[i], classes[i + pos]);
2429 break;
2431 case ARRAY_TYPE:
2432 /* Arrays are handled as small records. */
2434 int num;
2435 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2436 TREE_TYPE (type), subclasses, bit_offset);
2437 if (!num)
2438 return 0;
2440 /* The partial classes are now full classes. */
2441 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2442 subclasses[0] = X86_64_SSE_CLASS;
2443 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2444 subclasses[0] = X86_64_INTEGER_CLASS;
2446 for (i = 0; i < words; i++)
2447 classes[i] = subclasses[i % num];
2449 break;
2451 case UNION_TYPE:
2452 case QUAL_UNION_TYPE:
2453 /* Unions are similar to RECORD_TYPE but offset is always 0.
2456 /* Unions are not derived. */
2457 gcc_assert (!TYPE_BINFO (type)
2458 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2459 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2461 if (TREE_CODE (field) == FIELD_DECL)
2463 int num;
2464 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2465 TREE_TYPE (field), subclasses,
2466 bit_offset);
2467 if (!num)
2468 return 0;
2469 for (i = 0; i < num; i++)
2470 classes[i] = merge_classes (subclasses[i], classes[i]);
2473 break;
2475 default:
2476 gcc_unreachable ();
2479 /* Final merger cleanup. */
2480 for (i = 0; i < words; i++)
2482 /* If one class is MEMORY, everything should be passed in
2483 memory. */
2484 if (classes[i] == X86_64_MEMORY_CLASS)
2485 return 0;
2487 /* The X86_64_SSEUP_CLASS should be always preceded by
2488 X86_64_SSE_CLASS. */
2489 if (classes[i] == X86_64_SSEUP_CLASS
2490 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2491 classes[i] = X86_64_SSE_CLASS;
2493 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2494 if (classes[i] == X86_64_X87UP_CLASS
2495 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2496 classes[i] = X86_64_SSE_CLASS;
2498 return words;
2501 /* Compute alignment needed. We align all types to natural boundaries with
2502 exception of XFmode that is aligned to 64bits. */
2503 if (mode != VOIDmode && mode != BLKmode)
2505 int mode_alignment = GET_MODE_BITSIZE (mode);
2507 if (mode == XFmode)
2508 mode_alignment = 128;
2509 else if (mode == XCmode)
2510 mode_alignment = 256;
2511 if (COMPLEX_MODE_P (mode))
2512 mode_alignment /= 2;
2513 /* Misaligned fields are always returned in memory. */
2514 if (bit_offset % mode_alignment)
2515 return 0;
2518 /* for V1xx modes, just use the base mode */
2519 if (VECTOR_MODE_P (mode)
2520 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2521 mode = GET_MODE_INNER (mode);
2523 /* Classification of atomic types. */
2524 switch (mode)
2526 case DImode:
2527 case SImode:
2528 case HImode:
2529 case QImode:
2530 case CSImode:
2531 case CHImode:
2532 case CQImode:
2533 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2534 classes[0] = X86_64_INTEGERSI_CLASS;
2535 else
2536 classes[0] = X86_64_INTEGER_CLASS;
2537 return 1;
2538 case CDImode:
2539 case TImode:
2540 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2541 return 2;
2542 case CTImode:
2543 return 0;
2544 case SFmode:
2545 if (!(bit_offset % 64))
2546 classes[0] = X86_64_SSESF_CLASS;
2547 else
2548 classes[0] = X86_64_SSE_CLASS;
2549 return 1;
2550 case DFmode:
2551 classes[0] = X86_64_SSEDF_CLASS;
2552 return 1;
2553 case XFmode:
2554 classes[0] = X86_64_X87_CLASS;
2555 classes[1] = X86_64_X87UP_CLASS;
2556 return 2;
2557 case TFmode:
2558 classes[0] = X86_64_SSE_CLASS;
2559 classes[1] = X86_64_SSEUP_CLASS;
2560 return 2;
2561 case SCmode:
2562 classes[0] = X86_64_SSE_CLASS;
2563 return 1;
2564 case DCmode:
2565 classes[0] = X86_64_SSEDF_CLASS;
2566 classes[1] = X86_64_SSEDF_CLASS;
2567 return 2;
2568 case XCmode:
2569 classes[0] = X86_64_COMPLEX_X87_CLASS;
2570 return 1;
2571 case TCmode:
2572 /* This modes is larger than 16 bytes. */
2573 return 0;
2574 case V4SFmode:
2575 case V4SImode:
2576 case V16QImode:
2577 case V8HImode:
2578 case V2DFmode:
2579 case V2DImode:
2580 classes[0] = X86_64_SSE_CLASS;
2581 classes[1] = X86_64_SSEUP_CLASS;
2582 return 2;
2583 case V2SFmode:
2584 case V2SImode:
2585 case V4HImode:
2586 case V8QImode:
2587 classes[0] = X86_64_SSE_CLASS;
2588 return 1;
2589 case BLKmode:
2590 case VOIDmode:
2591 return 0;
2592 default:
2593 gcc_assert (VECTOR_MODE_P (mode));
2595 if (bytes > 16)
2596 return 0;
2598 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2600 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2601 classes[0] = X86_64_INTEGERSI_CLASS;
2602 else
2603 classes[0] = X86_64_INTEGER_CLASS;
2604 classes[1] = X86_64_INTEGER_CLASS;
2605 return 1 + (bytes > 8);
2609 /* Examine the argument and return set number of register required in each
2610 class. Return 0 iff parameter should be passed in memory. */
2611 static int
2612 examine_argument (enum machine_mode mode, tree type, int in_return,
2613 int *int_nregs, int *sse_nregs)
2615 enum x86_64_reg_class class[MAX_CLASSES];
2616 int n = classify_argument (mode, type, class, 0);
2618 *int_nregs = 0;
2619 *sse_nregs = 0;
2620 if (!n)
2621 return 0;
2622 for (n--; n >= 0; n--)
2623 switch (class[n])
2625 case X86_64_INTEGER_CLASS:
2626 case X86_64_INTEGERSI_CLASS:
2627 (*int_nregs)++;
2628 break;
2629 case X86_64_SSE_CLASS:
2630 case X86_64_SSESF_CLASS:
2631 case X86_64_SSEDF_CLASS:
2632 (*sse_nregs)++;
2633 break;
2634 case X86_64_NO_CLASS:
2635 case X86_64_SSEUP_CLASS:
2636 break;
2637 case X86_64_X87_CLASS:
2638 case X86_64_X87UP_CLASS:
2639 if (!in_return)
2640 return 0;
2641 break;
2642 case X86_64_COMPLEX_X87_CLASS:
2643 return in_return ? 2 : 0;
2644 case X86_64_MEMORY_CLASS:
2645 gcc_unreachable ();
2647 return 1;
2650 /* Construct container for the argument used by GCC interface. See
2651 FUNCTION_ARG for the detailed description. */
2653 static rtx
2654 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2655 tree type, int in_return, int nintregs, int nsseregs,
2656 const int *intreg, int sse_regno)
2658 enum machine_mode tmpmode;
2659 int bytes =
2660 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2661 enum x86_64_reg_class class[MAX_CLASSES];
2662 int n;
2663 int i;
2664 int nexps = 0;
2665 int needed_sseregs, needed_intregs;
2666 rtx exp[MAX_CLASSES];
2667 rtx ret;
2669 n = classify_argument (mode, type, class, 0);
2670 if (TARGET_DEBUG_ARG)
2672 if (!n)
2673 fprintf (stderr, "Memory class\n");
2674 else
2676 fprintf (stderr, "Classes:");
2677 for (i = 0; i < n; i++)
2679 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2681 fprintf (stderr, "\n");
2684 if (!n)
2685 return NULL;
2686 if (!examine_argument (mode, type, in_return, &needed_intregs,
2687 &needed_sseregs))
2688 return NULL;
2689 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2690 return NULL;
2692 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2693 some less clueful developer tries to use floating-point anyway. */
2694 if (needed_sseregs && !TARGET_SSE)
2696 static bool issued_error;
2697 if (!issued_error)
2699 issued_error = true;
2700 if (in_return)
2701 error ("SSE register return with SSE disabled");
2702 else
2703 error ("SSE register argument with SSE disabled");
2705 return NULL;
2708 /* First construct simple cases. Avoid SCmode, since we want to use
2709 single register to pass this type. */
2710 if (n == 1 && mode != SCmode)
2711 switch (class[0])
2713 case X86_64_INTEGER_CLASS:
2714 case X86_64_INTEGERSI_CLASS:
2715 return gen_rtx_REG (mode, intreg[0]);
2716 case X86_64_SSE_CLASS:
2717 case X86_64_SSESF_CLASS:
2718 case X86_64_SSEDF_CLASS:
2719 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2720 case X86_64_X87_CLASS:
2721 case X86_64_COMPLEX_X87_CLASS:
2722 return gen_rtx_REG (mode, FIRST_STACK_REG);
2723 case X86_64_NO_CLASS:
2724 /* Zero sized array, struct or class. */
2725 return NULL;
2726 default:
2727 gcc_unreachable ();
2729 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2730 && mode != BLKmode)
2731 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2732 if (n == 2
2733 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2734 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2735 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2736 && class[1] == X86_64_INTEGER_CLASS
2737 && (mode == CDImode || mode == TImode || mode == TFmode)
2738 && intreg[0] + 1 == intreg[1])
2739 return gen_rtx_REG (mode, intreg[0]);
2741 /* Otherwise figure out the entries of the PARALLEL. */
2742 for (i = 0; i < n; i++)
2744 switch (class[i])
2746 case X86_64_NO_CLASS:
2747 break;
2748 case X86_64_INTEGER_CLASS:
2749 case X86_64_INTEGERSI_CLASS:
2750 /* Merge TImodes on aligned occasions here too. */
2751 if (i * 8 + 8 > bytes)
2752 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2753 else if (class[i] == X86_64_INTEGERSI_CLASS)
2754 tmpmode = SImode;
2755 else
2756 tmpmode = DImode;
2757 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2758 if (tmpmode == BLKmode)
2759 tmpmode = DImode;
2760 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2761 gen_rtx_REG (tmpmode, *intreg),
2762 GEN_INT (i*8));
2763 intreg++;
2764 break;
2765 case X86_64_SSESF_CLASS:
2766 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2767 gen_rtx_REG (SFmode,
2768 SSE_REGNO (sse_regno)),
2769 GEN_INT (i*8));
2770 sse_regno++;
2771 break;
2772 case X86_64_SSEDF_CLASS:
2773 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2774 gen_rtx_REG (DFmode,
2775 SSE_REGNO (sse_regno)),
2776 GEN_INT (i*8));
2777 sse_regno++;
2778 break;
2779 case X86_64_SSE_CLASS:
2780 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2781 tmpmode = TImode;
2782 else
2783 tmpmode = DImode;
2784 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2785 gen_rtx_REG (tmpmode,
2786 SSE_REGNO (sse_regno)),
2787 GEN_INT (i*8));
2788 if (tmpmode == TImode)
2789 i++;
2790 sse_regno++;
2791 break;
2792 default:
2793 gcc_unreachable ();
2797 /* Empty aligned struct, union or class. */
2798 if (nexps == 0)
2799 return NULL;
2801 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2802 for (i = 0; i < nexps; i++)
2803 XVECEXP (ret, 0, i) = exp [i];
2804 return ret;
2807 /* Update the data in CUM to advance over an argument
2808 of mode MODE and data type TYPE.
2809 (TYPE is null for libcalls where that information may not be available.) */
2811 void
2812 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2813 tree type, int named)
2815 int bytes =
2816 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2817 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2819 if (type)
2820 mode = type_natural_mode (type);
2822 if (TARGET_DEBUG_ARG)
2823 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2824 "mode=%s, named=%d)\n\n",
2825 words, cum->words, cum->nregs, cum->sse_nregs,
2826 GET_MODE_NAME (mode), named);
2828 if (TARGET_64BIT)
2830 int int_nregs, sse_nregs;
2831 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2832 cum->words += words;
2833 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2835 cum->nregs -= int_nregs;
2836 cum->sse_nregs -= sse_nregs;
2837 cum->regno += int_nregs;
2838 cum->sse_regno += sse_nregs;
2840 else
2841 cum->words += words;
2843 else
2845 switch (mode)
2847 default:
2848 break;
2850 case BLKmode:
2851 if (bytes < 0)
2852 break;
2853 /* FALLTHRU */
2855 case DImode:
2856 case SImode:
2857 case HImode:
2858 case QImode:
2859 cum->words += words;
2860 cum->nregs -= words;
2861 cum->regno += words;
2863 if (cum->nregs <= 0)
2865 cum->nregs = 0;
2866 cum->regno = 0;
2868 break;
2870 case DFmode:
2871 if (cum->float_in_sse < 2)
2872 break;
2873 case SFmode:
2874 if (cum->float_in_sse < 1)
2875 break;
2876 /* FALLTHRU */
2878 case TImode:
2879 case V16QImode:
2880 case V8HImode:
2881 case V4SImode:
2882 case V2DImode:
2883 case V4SFmode:
2884 case V2DFmode:
2885 if (!type || !AGGREGATE_TYPE_P (type))
2887 cum->sse_words += words;
2888 cum->sse_nregs -= 1;
2889 cum->sse_regno += 1;
2890 if (cum->sse_nregs <= 0)
2892 cum->sse_nregs = 0;
2893 cum->sse_regno = 0;
2896 break;
2898 case V8QImode:
2899 case V4HImode:
2900 case V2SImode:
2901 case V2SFmode:
2902 if (!type || !AGGREGATE_TYPE_P (type))
2904 cum->mmx_words += words;
2905 cum->mmx_nregs -= 1;
2906 cum->mmx_regno += 1;
2907 if (cum->mmx_nregs <= 0)
2909 cum->mmx_nregs = 0;
2910 cum->mmx_regno = 0;
2913 break;
2918 /* Define where to put the arguments to a function.
2919 Value is zero to push the argument on the stack,
2920 or a hard register in which to store the argument.
2922 MODE is the argument's machine mode.
2923 TYPE is the data type of the argument (as a tree).
2924 This is null for libcalls where that information may
2925 not be available.
2926 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2927 the preceding args and about the function being called.
2928 NAMED is nonzero if this argument is a named parameter
2929 (otherwise it is an extra parameter matching an ellipsis). */
2932 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2933 tree type, int named)
2935 enum machine_mode mode = orig_mode;
2936 rtx ret = NULL_RTX;
2937 int bytes =
2938 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2939 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2940 static bool warnedsse, warnedmmx;
2942 /* To simplify the code below, represent vector types with a vector mode
2943 even if MMX/SSE are not active. */
2944 if (type && TREE_CODE (type) == VECTOR_TYPE)
2945 mode = type_natural_mode (type);
2947 /* Handle a hidden AL argument containing number of registers for varargs
2948 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2949 any AL settings. */
2950 if (mode == VOIDmode)
2952 if (TARGET_64BIT)
2953 return GEN_INT (cum->maybe_vaarg
2954 ? (cum->sse_nregs < 0
2955 ? SSE_REGPARM_MAX
2956 : cum->sse_regno)
2957 : -1);
2958 else
2959 return constm1_rtx;
2961 if (TARGET_64BIT)
2962 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2963 cum->sse_nregs,
2964 &x86_64_int_parameter_registers [cum->regno],
2965 cum->sse_regno);
2966 else
2967 switch (mode)
2969 /* For now, pass fp/complex values on the stack. */
2970 default:
2971 break;
2973 case BLKmode:
2974 if (bytes < 0)
2975 break;
2976 /* FALLTHRU */
2977 case DImode:
2978 case SImode:
2979 case HImode:
2980 case QImode:
2981 if (words <= cum->nregs)
2983 int regno = cum->regno;
2985 /* Fastcall allocates the first two DWORD (SImode) or
2986 smaller arguments to ECX and EDX. */
2987 if (cum->fastcall)
2989 if (mode == BLKmode || mode == DImode)
2990 break;
2992 /* ECX not EAX is the first allocated register. */
2993 if (regno == 0)
2994 regno = 2;
2996 ret = gen_rtx_REG (mode, regno);
2998 break;
2999 case DFmode:
3000 if (cum->float_in_sse < 2)
3001 break;
3002 case SFmode:
3003 if (cum->float_in_sse < 1)
3004 break;
3005 /* FALLTHRU */
3006 case TImode:
3007 case V16QImode:
3008 case V8HImode:
3009 case V4SImode:
3010 case V2DImode:
3011 case V4SFmode:
3012 case V2DFmode:
3013 if (!type || !AGGREGATE_TYPE_P (type))
3015 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3017 warnedsse = true;
3018 warning (0, "SSE vector argument without SSE enabled "
3019 "changes the ABI");
3021 if (cum->sse_nregs)
3022 ret = gen_reg_or_parallel (mode, orig_mode,
3023 cum->sse_regno + FIRST_SSE_REG);
3025 break;
3026 case V8QImode:
3027 case V4HImode:
3028 case V2SImode:
3029 case V2SFmode:
3030 if (!type || !AGGREGATE_TYPE_P (type))
3032 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3034 warnedmmx = true;
3035 warning (0, "MMX vector argument without MMX enabled "
3036 "changes the ABI");
3038 if (cum->mmx_nregs)
3039 ret = gen_reg_or_parallel (mode, orig_mode,
3040 cum->mmx_regno + FIRST_MMX_REG);
3042 break;
3045 if (TARGET_DEBUG_ARG)
3047 fprintf (stderr,
3048 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3049 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3051 if (ret)
3052 print_simple_rtl (stderr, ret);
3053 else
3054 fprintf (stderr, ", stack");
3056 fprintf (stderr, " )\n");
3059 return ret;
3062 /* A C expression that indicates when an argument must be passed by
3063 reference. If nonzero for an argument, a copy of that argument is
3064 made in memory and a pointer to the argument is passed instead of
3065 the argument itself. The pointer is passed in whatever way is
3066 appropriate for passing a pointer to that type. */
3068 static bool
3069 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3070 enum machine_mode mode ATTRIBUTE_UNUSED,
3071 tree type, bool named ATTRIBUTE_UNUSED)
3073 if (!TARGET_64BIT)
3074 return 0;
3076 if (type && int_size_in_bytes (type) == -1)
3078 if (TARGET_DEBUG_ARG)
3079 fprintf (stderr, "function_arg_pass_by_reference\n");
3080 return 1;
3083 return 0;
3086 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3087 ABI. Only called if TARGET_SSE. */
3088 static bool
3089 contains_128bit_aligned_vector_p (tree type)
3091 enum machine_mode mode = TYPE_MODE (type);
3092 if (SSE_REG_MODE_P (mode)
3093 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3094 return true;
3095 if (TYPE_ALIGN (type) < 128)
3096 return false;
3098 if (AGGREGATE_TYPE_P (type))
3100 /* Walk the aggregates recursively. */
3101 switch (TREE_CODE (type))
3103 case RECORD_TYPE:
3104 case UNION_TYPE:
3105 case QUAL_UNION_TYPE:
3107 tree field;
3109 if (TYPE_BINFO (type))
3111 tree binfo, base_binfo;
3112 int i;
3114 for (binfo = TYPE_BINFO (type), i = 0;
3115 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3116 if (contains_128bit_aligned_vector_p
3117 (BINFO_TYPE (base_binfo)))
3118 return true;
3120 /* And now merge the fields of structure. */
3121 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3123 if (TREE_CODE (field) == FIELD_DECL
3124 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3125 return true;
3127 break;
3130 case ARRAY_TYPE:
3131 /* Just for use if some languages passes arrays by value. */
3132 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3133 return true;
3135 default:
3136 gcc_unreachable ();
3139 return false;
3142 /* Gives the alignment boundary, in bits, of an argument with the
3143 specified mode and type. */
3146 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3148 int align;
3149 if (type)
3150 align = TYPE_ALIGN (type);
3151 else
3152 align = GET_MODE_ALIGNMENT (mode);
3153 if (align < PARM_BOUNDARY)
3154 align = PARM_BOUNDARY;
3155 if (!TARGET_64BIT)
3157 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3158 make an exception for SSE modes since these require 128bit
3159 alignment.
3161 The handling here differs from field_alignment. ICC aligns MMX
3162 arguments to 4 byte boundaries, while structure fields are aligned
3163 to 8 byte boundaries. */
3164 if (!TARGET_SSE)
3165 align = PARM_BOUNDARY;
3166 else if (!type)
3168 if (!SSE_REG_MODE_P (mode))
3169 align = PARM_BOUNDARY;
3171 else
3173 if (!contains_128bit_aligned_vector_p (type))
3174 align = PARM_BOUNDARY;
3177 if (align > 128)
3178 align = 128;
3179 return align;
3182 /* Return true if N is a possible register number of function value. */
3183 bool
3184 ix86_function_value_regno_p (int regno)
3186 if (regno == 0
3187 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3188 || (regno == FIRST_SSE_REG && TARGET_SSE))
3189 return true;
3191 if (!TARGET_64BIT
3192 && (regno == FIRST_MMX_REG && TARGET_MMX))
3193 return true;
3195 return false;
3198 /* Define how to find the value returned by a function.
3199 VALTYPE is the data type of the value (as a tree).
3200 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3201 otherwise, FUNC is 0. */
3203 ix86_function_value (tree valtype, tree fntype_or_decl,
3204 bool outgoing ATTRIBUTE_UNUSED)
3206 enum machine_mode natmode = type_natural_mode (valtype);
3208 if (TARGET_64BIT)
3210 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3211 1, REGPARM_MAX, SSE_REGPARM_MAX,
3212 x86_64_int_return_registers, 0);
3213 /* For zero sized structures, construct_container return NULL, but we
3214 need to keep rest of compiler happy by returning meaningful value. */
3215 if (!ret)
3216 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3217 return ret;
3219 else
3221 tree fn = NULL_TREE, fntype;
3222 if (fntype_or_decl
3223 && DECL_P (fntype_or_decl))
3224 fn = fntype_or_decl;
3225 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3226 return gen_rtx_REG (TYPE_MODE (valtype),
3227 ix86_value_regno (natmode, fn, fntype));
3231 /* Return false iff type is returned in memory. */
3233 ix86_return_in_memory (tree type)
3235 int needed_intregs, needed_sseregs, size;
3236 enum machine_mode mode = type_natural_mode (type);
3238 if (TARGET_64BIT)
3239 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3241 if (mode == BLKmode)
3242 return 1;
3244 size = int_size_in_bytes (type);
3246 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3247 return 0;
3249 if (VECTOR_MODE_P (mode) || mode == TImode)
3251 /* User-created vectors small enough to fit in EAX. */
3252 if (size < 8)
3253 return 0;
3255 /* MMX/3dNow values are returned in MM0,
3256 except when it doesn't exits. */
3257 if (size == 8)
3258 return (TARGET_MMX ? 0 : 1);
3260 /* SSE values are returned in XMM0, except when it doesn't exist. */
3261 if (size == 16)
3262 return (TARGET_SSE ? 0 : 1);
3265 if (mode == XFmode)
3266 return 0;
3268 if (size > 12)
3269 return 1;
3270 return 0;
3273 /* When returning SSE vector types, we have a choice of either
3274 (1) being abi incompatible with a -march switch, or
3275 (2) generating an error.
3276 Given no good solution, I think the safest thing is one warning.
3277 The user won't be able to use -Werror, but....
3279 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3280 called in response to actually generating a caller or callee that
3281 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3282 via aggregate_value_p for general type probing from tree-ssa. */
3284 static rtx
3285 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3287 static bool warnedsse, warnedmmx;
3289 if (type)
3291 /* Look at the return type of the function, not the function type. */
3292 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3294 if (!TARGET_SSE && !warnedsse)
3296 if (mode == TImode
3297 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3299 warnedsse = true;
3300 warning (0, "SSE vector return without SSE enabled "
3301 "changes the ABI");
3305 if (!TARGET_MMX && !warnedmmx)
3307 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3309 warnedmmx = true;
3310 warning (0, "MMX vector return without MMX enabled "
3311 "changes the ABI");
3316 return NULL;
3319 /* Define how to find the value returned by a library function
3320 assuming the value has mode MODE. */
3322 ix86_libcall_value (enum machine_mode mode)
3324 if (TARGET_64BIT)
3326 switch (mode)
3328 case SFmode:
3329 case SCmode:
3330 case DFmode:
3331 case DCmode:
3332 case TFmode:
3333 return gen_rtx_REG (mode, FIRST_SSE_REG);
3334 case XFmode:
3335 case XCmode:
3336 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3337 case TCmode:
3338 return NULL;
3339 default:
3340 return gen_rtx_REG (mode, 0);
3343 else
3344 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3347 /* Given a mode, return the register to use for a return value. */
3349 static int
3350 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3352 gcc_assert (!TARGET_64BIT);
3354 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3355 we prevent this case when mmx is not available. */
3356 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3357 return FIRST_MMX_REG;
3359 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3360 we prevent this case when sse is not available. */
3361 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3362 return FIRST_SSE_REG;
3364 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3365 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3366 return 0;
3368 /* Floating point return values in %st(0), except for local functions when
3369 SSE math is enabled or for functions with sseregparm attribute. */
3370 if ((func || fntype)
3371 && (mode == SFmode || mode == DFmode))
3373 int sse_level = ix86_function_sseregparm (fntype, func);
3374 if ((sse_level >= 1 && mode == SFmode)
3375 || (sse_level == 2 && mode == DFmode))
3376 return FIRST_SSE_REG;
3379 return FIRST_FLOAT_REG;
3382 /* Create the va_list data type. */
3384 static tree
3385 ix86_build_builtin_va_list (void)
3387 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3389 /* For i386 we use plain pointer to argument area. */
3390 if (!TARGET_64BIT)
3391 return build_pointer_type (char_type_node);
3393 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3394 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3396 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3397 unsigned_type_node);
3398 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3399 unsigned_type_node);
3400 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3401 ptr_type_node);
3402 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3403 ptr_type_node);
3405 va_list_gpr_counter_field = f_gpr;
3406 va_list_fpr_counter_field = f_fpr;
3408 DECL_FIELD_CONTEXT (f_gpr) = record;
3409 DECL_FIELD_CONTEXT (f_fpr) = record;
3410 DECL_FIELD_CONTEXT (f_ovf) = record;
3411 DECL_FIELD_CONTEXT (f_sav) = record;
3413 TREE_CHAIN (record) = type_decl;
3414 TYPE_NAME (record) = type_decl;
3415 TYPE_FIELDS (record) = f_gpr;
3416 TREE_CHAIN (f_gpr) = f_fpr;
3417 TREE_CHAIN (f_fpr) = f_ovf;
3418 TREE_CHAIN (f_ovf) = f_sav;
3420 layout_type (record);
3422 /* The correct type is an array type of one element. */
3423 return build_array_type (record, build_index_type (size_zero_node));
3426 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3428 static void
3429 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3430 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3431 int no_rtl)
3433 CUMULATIVE_ARGS next_cum;
3434 rtx save_area = NULL_RTX, mem;
3435 rtx label;
3436 rtx label_ref;
3437 rtx tmp_reg;
3438 rtx nsse_reg;
3439 int set;
3440 tree fntype;
3441 int stdarg_p;
3442 int i;
3444 if (!TARGET_64BIT)
3445 return;
3447 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3448 return;
3450 /* Indicate to allocate space on the stack for varargs save area. */
3451 ix86_save_varrargs_registers = 1;
3453 cfun->stack_alignment_needed = 128;
3455 fntype = TREE_TYPE (current_function_decl);
3456 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3457 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3458 != void_type_node));
3460 /* For varargs, we do not want to skip the dummy va_dcl argument.
3461 For stdargs, we do want to skip the last named argument. */
3462 next_cum = *cum;
3463 if (stdarg_p)
3464 function_arg_advance (&next_cum, mode, type, 1);
3466 if (!no_rtl)
3467 save_area = frame_pointer_rtx;
3469 set = get_varargs_alias_set ();
3471 for (i = next_cum.regno;
3472 i < ix86_regparm
3473 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3474 i++)
3476 mem = gen_rtx_MEM (Pmode,
3477 plus_constant (save_area, i * UNITS_PER_WORD));
3478 set_mem_alias_set (mem, set);
3479 emit_move_insn (mem, gen_rtx_REG (Pmode,
3480 x86_64_int_parameter_registers[i]));
3483 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3485 /* Now emit code to save SSE registers. The AX parameter contains number
3486 of SSE parameter registers used to call this function. We use
3487 sse_prologue_save insn template that produces computed jump across
3488 SSE saves. We need some preparation work to get this working. */
3490 label = gen_label_rtx ();
3491 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3493 /* Compute address to jump to :
3494 label - 5*eax + nnamed_sse_arguments*5 */
3495 tmp_reg = gen_reg_rtx (Pmode);
3496 nsse_reg = gen_reg_rtx (Pmode);
3497 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3498 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3499 gen_rtx_MULT (Pmode, nsse_reg,
3500 GEN_INT (4))));
3501 if (next_cum.sse_regno)
3502 emit_move_insn
3503 (nsse_reg,
3504 gen_rtx_CONST (DImode,
3505 gen_rtx_PLUS (DImode,
3506 label_ref,
3507 GEN_INT (next_cum.sse_regno * 4))));
3508 else
3509 emit_move_insn (nsse_reg, label_ref);
3510 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3512 /* Compute address of memory block we save into. We always use pointer
3513 pointing 127 bytes after first byte to store - this is needed to keep
3514 instruction size limited by 4 bytes. */
3515 tmp_reg = gen_reg_rtx (Pmode);
3516 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3517 plus_constant (save_area,
3518 8 * REGPARM_MAX + 127)));
3519 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3520 set_mem_alias_set (mem, set);
3521 set_mem_align (mem, BITS_PER_WORD);
3523 /* And finally do the dirty job! */
3524 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3525 GEN_INT (next_cum.sse_regno), label));
3530 /* Implement va_start. */
3532 void
3533 ix86_va_start (tree valist, rtx nextarg)
3535 HOST_WIDE_INT words, n_gpr, n_fpr;
3536 tree f_gpr, f_fpr, f_ovf, f_sav;
3537 tree gpr, fpr, ovf, sav, t;
3539 /* Only 64bit target needs something special. */
3540 if (!TARGET_64BIT)
3542 std_expand_builtin_va_start (valist, nextarg);
3543 return;
3546 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3547 f_fpr = TREE_CHAIN (f_gpr);
3548 f_ovf = TREE_CHAIN (f_fpr);
3549 f_sav = TREE_CHAIN (f_ovf);
3551 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3552 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3553 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3554 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3555 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3557 /* Count number of gp and fp argument registers used. */
3558 words = current_function_args_info.words;
3559 n_gpr = current_function_args_info.regno;
3560 n_fpr = current_function_args_info.sse_regno;
3562 if (TARGET_DEBUG_ARG)
3563 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3564 (int) words, (int) n_gpr, (int) n_fpr);
3566 if (cfun->va_list_gpr_size)
3568 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3569 build_int_cst (NULL_TREE, n_gpr * 8));
3570 TREE_SIDE_EFFECTS (t) = 1;
3571 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3574 if (cfun->va_list_fpr_size)
3576 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3577 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3578 TREE_SIDE_EFFECTS (t) = 1;
3579 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3582 /* Find the overflow area. */
3583 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3584 if (words != 0)
3585 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3586 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3587 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3588 TREE_SIDE_EFFECTS (t) = 1;
3589 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3591 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3593 /* Find the register save area.
3594 Prologue of the function save it right above stack frame. */
3595 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3596 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3597 TREE_SIDE_EFFECTS (t) = 1;
3598 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3602 /* Implement va_arg. */
3604 tree
3605 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3607 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3608 tree f_gpr, f_fpr, f_ovf, f_sav;
3609 tree gpr, fpr, ovf, sav, t;
3610 int size, rsize;
3611 tree lab_false, lab_over = NULL_TREE;
3612 tree addr, t2;
3613 rtx container;
3614 int indirect_p = 0;
3615 tree ptrtype;
3616 enum machine_mode nat_mode;
3618 /* Only 64bit target needs something special. */
3619 if (!TARGET_64BIT)
3620 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3622 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3623 f_fpr = TREE_CHAIN (f_gpr);
3624 f_ovf = TREE_CHAIN (f_fpr);
3625 f_sav = TREE_CHAIN (f_ovf);
3627 valist = build_va_arg_indirect_ref (valist);
3628 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3629 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3630 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3631 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3633 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3634 if (indirect_p)
3635 type = build_pointer_type (type);
3636 size = int_size_in_bytes (type);
3637 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3639 nat_mode = type_natural_mode (type);
3640 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3641 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3643 /* Pull the value out of the saved registers. */
3645 addr = create_tmp_var (ptr_type_node, "addr");
3646 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3648 if (container)
3650 int needed_intregs, needed_sseregs;
3651 bool need_temp;
3652 tree int_addr, sse_addr;
3654 lab_false = create_artificial_label ();
3655 lab_over = create_artificial_label ();
3657 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3659 need_temp = (!REG_P (container)
3660 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3661 || TYPE_ALIGN (type) > 128));
3663 /* In case we are passing structure, verify that it is consecutive block
3664 on the register save area. If not we need to do moves. */
3665 if (!need_temp && !REG_P (container))
3667 /* Verify that all registers are strictly consecutive */
3668 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3670 int i;
3672 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3674 rtx slot = XVECEXP (container, 0, i);
3675 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3676 || INTVAL (XEXP (slot, 1)) != i * 16)
3677 need_temp = 1;
3680 else
3682 int i;
3684 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3686 rtx slot = XVECEXP (container, 0, i);
3687 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3688 || INTVAL (XEXP (slot, 1)) != i * 8)
3689 need_temp = 1;
3693 if (!need_temp)
3695 int_addr = addr;
3696 sse_addr = addr;
3698 else
3700 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3701 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3702 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3703 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3706 /* First ensure that we fit completely in registers. */
3707 if (needed_intregs)
3709 t = build_int_cst (TREE_TYPE (gpr),
3710 (REGPARM_MAX - needed_intregs + 1) * 8);
3711 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3712 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3713 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3714 gimplify_and_add (t, pre_p);
3716 if (needed_sseregs)
3718 t = build_int_cst (TREE_TYPE (fpr),
3719 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3720 + REGPARM_MAX * 8);
3721 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3722 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3723 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3724 gimplify_and_add (t, pre_p);
3727 /* Compute index to start of area used for integer regs. */
3728 if (needed_intregs)
3730 /* int_addr = gpr + sav; */
3731 t = fold_convert (ptr_type_node, gpr);
3732 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3733 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3734 gimplify_and_add (t, pre_p);
3736 if (needed_sseregs)
3738 /* sse_addr = fpr + sav; */
3739 t = fold_convert (ptr_type_node, fpr);
3740 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3741 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3742 gimplify_and_add (t, pre_p);
3744 if (need_temp)
3746 int i;
3747 tree temp = create_tmp_var (type, "va_arg_tmp");
3749 /* addr = &temp; */
3750 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3751 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3752 gimplify_and_add (t, pre_p);
3754 for (i = 0; i < XVECLEN (container, 0); i++)
3756 rtx slot = XVECEXP (container, 0, i);
3757 rtx reg = XEXP (slot, 0);
3758 enum machine_mode mode = GET_MODE (reg);
3759 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3760 tree addr_type = build_pointer_type (piece_type);
3761 tree src_addr, src;
3762 int src_offset;
3763 tree dest_addr, dest;
3765 if (SSE_REGNO_P (REGNO (reg)))
3767 src_addr = sse_addr;
3768 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3770 else
3772 src_addr = int_addr;
3773 src_offset = REGNO (reg) * 8;
3775 src_addr = fold_convert (addr_type, src_addr);
3776 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3777 size_int (src_offset)));
3778 src = build_va_arg_indirect_ref (src_addr);
3780 dest_addr = fold_convert (addr_type, addr);
3781 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3782 size_int (INTVAL (XEXP (slot, 1)))));
3783 dest = build_va_arg_indirect_ref (dest_addr);
3785 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3786 gimplify_and_add (t, pre_p);
3790 if (needed_intregs)
3792 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3793 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3794 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3795 gimplify_and_add (t, pre_p);
3797 if (needed_sseregs)
3799 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3800 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3801 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3802 gimplify_and_add (t, pre_p);
3805 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3806 gimplify_and_add (t, pre_p);
3808 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3809 append_to_statement_list (t, pre_p);
3812 /* ... otherwise out of the overflow area. */
3814 /* Care for on-stack alignment if needed. */
3815 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3816 t = ovf;
3817 else
3819 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3820 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3821 build_int_cst (TREE_TYPE (ovf), align - 1));
3822 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3823 build_int_cst (TREE_TYPE (t), -align));
3825 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3827 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3828 gimplify_and_add (t2, pre_p);
3830 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3831 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3832 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3833 gimplify_and_add (t, pre_p);
3835 if (container)
3837 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3838 append_to_statement_list (t, pre_p);
3841 ptrtype = build_pointer_type (type);
3842 addr = fold_convert (ptrtype, addr);
3844 if (indirect_p)
3845 addr = build_va_arg_indirect_ref (addr);
3846 return build_va_arg_indirect_ref (addr);
3849 /* Return nonzero if OPNUM's MEM should be matched
3850 in movabs* patterns. */
3853 ix86_check_movabs (rtx insn, int opnum)
3855 rtx set, mem;
3857 set = PATTERN (insn);
3858 if (GET_CODE (set) == PARALLEL)
3859 set = XVECEXP (set, 0, 0);
3860 gcc_assert (GET_CODE (set) == SET);
3861 mem = XEXP (set, opnum);
3862 while (GET_CODE (mem) == SUBREG)
3863 mem = SUBREG_REG (mem);
3864 gcc_assert (GET_CODE (mem) == MEM);
3865 return (volatile_ok || !MEM_VOLATILE_P (mem));
3868 /* Initialize the table of extra 80387 mathematical constants. */
3870 static void
3871 init_ext_80387_constants (void)
3873 static const char * cst[5] =
3875 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3876 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3877 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3878 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3879 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3881 int i;
3883 for (i = 0; i < 5; i++)
3885 real_from_string (&ext_80387_constants_table[i], cst[i]);
3886 /* Ensure each constant is rounded to XFmode precision. */
3887 real_convert (&ext_80387_constants_table[i],
3888 XFmode, &ext_80387_constants_table[i]);
3891 ext_80387_constants_init = 1;
3894 /* Return true if the constant is something that can be loaded with
3895 a special instruction. */
3898 standard_80387_constant_p (rtx x)
3900 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3901 return -1;
3903 if (x == CONST0_RTX (GET_MODE (x)))
3904 return 1;
3905 if (x == CONST1_RTX (GET_MODE (x)))
3906 return 2;
3908 /* For XFmode constants, try to find a special 80387 instruction when
3909 optimizing for size or on those CPUs that benefit from them. */
3910 if (GET_MODE (x) == XFmode
3911 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3913 REAL_VALUE_TYPE r;
3914 int i;
3916 if (! ext_80387_constants_init)
3917 init_ext_80387_constants ();
3919 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3920 for (i = 0; i < 5; i++)
3921 if (real_identical (&r, &ext_80387_constants_table[i]))
3922 return i + 3;
3925 return 0;
3928 /* Return the opcode of the special instruction to be used to load
3929 the constant X. */
3931 const char *
3932 standard_80387_constant_opcode (rtx x)
3934 switch (standard_80387_constant_p (x))
3936 case 1:
3937 return "fldz";
3938 case 2:
3939 return "fld1";
3940 case 3:
3941 return "fldlg2";
3942 case 4:
3943 return "fldln2";
3944 case 5:
3945 return "fldl2e";
3946 case 6:
3947 return "fldl2t";
3948 case 7:
3949 return "fldpi";
3950 default:
3951 gcc_unreachable ();
3955 /* Return the CONST_DOUBLE representing the 80387 constant that is
3956 loaded by the specified special instruction. The argument IDX
3957 matches the return value from standard_80387_constant_p. */
3960 standard_80387_constant_rtx (int idx)
3962 int i;
3964 if (! ext_80387_constants_init)
3965 init_ext_80387_constants ();
3967 switch (idx)
3969 case 3:
3970 case 4:
3971 case 5:
3972 case 6:
3973 case 7:
3974 i = idx - 3;
3975 break;
3977 default:
3978 gcc_unreachable ();
3981 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3982 XFmode);
3985 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3988 standard_sse_constant_p (rtx x)
3990 if (x == const0_rtx)
3991 return 1;
3992 return (x == CONST0_RTX (GET_MODE (x)));
3995 /* Returns 1 if OP contains a symbol reference */
3998 symbolic_reference_mentioned_p (rtx op)
4000 const char *fmt;
4001 int i;
4003 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4004 return 1;
4006 fmt = GET_RTX_FORMAT (GET_CODE (op));
4007 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4009 if (fmt[i] == 'E')
4011 int j;
4013 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4014 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4015 return 1;
4018 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4019 return 1;
4022 return 0;
4025 /* Return 1 if it is appropriate to emit `ret' instructions in the
4026 body of a function. Do this only if the epilogue is simple, needing a
4027 couple of insns. Prior to reloading, we can't tell how many registers
4028 must be saved, so return 0 then. Return 0 if there is no frame
4029 marker to de-allocate. */
4032 ix86_can_use_return_insn_p (void)
4034 struct ix86_frame frame;
4036 if (! reload_completed || frame_pointer_needed)
4037 return 0;
4039 /* Don't allow more than 32 pop, since that's all we can do
4040 with one instruction. */
4041 if (current_function_pops_args
4042 && current_function_args_size >= 32768)
4043 return 0;
4045 ix86_compute_frame_layout (&frame);
4046 return frame.to_allocate == 0 && frame.nregs == 0;
4049 /* Value should be nonzero if functions must have frame pointers.
4050 Zero means the frame pointer need not be set up (and parms may
4051 be accessed via the stack pointer) in functions that seem suitable. */
4054 ix86_frame_pointer_required (void)
4056 /* If we accessed previous frames, then the generated code expects
4057 to be able to access the saved ebp value in our frame. */
4058 if (cfun->machine->accesses_prev_frame)
4059 return 1;
4061 /* Several x86 os'es need a frame pointer for other reasons,
4062 usually pertaining to setjmp. */
4063 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4064 return 1;
4066 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4067 the frame pointer by default. Turn it back on now if we've not
4068 got a leaf function. */
4069 if (TARGET_OMIT_LEAF_FRAME_POINTER
4070 && (!current_function_is_leaf))
4071 return 1;
4073 if (current_function_profile)
4074 return 1;
4076 return 0;
4079 /* Record that the current function accesses previous call frames. */
4081 void
4082 ix86_setup_frame_addresses (void)
4084 cfun->machine->accesses_prev_frame = 1;
4087 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4088 # define USE_HIDDEN_LINKONCE 1
4089 #else
4090 # define USE_HIDDEN_LINKONCE 0
4091 #endif
4093 static int pic_labels_used;
4095 /* Fills in the label name that should be used for a pc thunk for
4096 the given register. */
4098 static void
4099 get_pc_thunk_name (char name[32], unsigned int regno)
4101 if (USE_HIDDEN_LINKONCE)
4102 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4103 else
4104 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4108 /* This function generates code for -fpic that loads %ebx with
4109 the return address of the caller and then returns. */
4111 void
4112 ix86_file_end (void)
4114 rtx xops[2];
4115 int regno;
4117 for (regno = 0; regno < 8; ++regno)
4119 char name[32];
4121 if (! ((pic_labels_used >> regno) & 1))
4122 continue;
4124 get_pc_thunk_name (name, regno);
4126 if (USE_HIDDEN_LINKONCE)
4128 tree decl;
4130 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4131 error_mark_node);
4132 TREE_PUBLIC (decl) = 1;
4133 TREE_STATIC (decl) = 1;
4134 DECL_ONE_ONLY (decl) = 1;
4136 (*targetm.asm_out.unique_section) (decl, 0);
4137 named_section (decl, NULL, 0);
4139 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4140 fputs ("\t.hidden\t", asm_out_file);
4141 assemble_name (asm_out_file, name);
4142 fputc ('\n', asm_out_file);
4143 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4145 else
4147 text_section ();
4148 ASM_OUTPUT_LABEL (asm_out_file, name);
4151 xops[0] = gen_rtx_REG (SImode, regno);
4152 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4153 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4154 output_asm_insn ("ret", xops);
4157 if (NEED_INDICATE_EXEC_STACK)
4158 file_end_indicate_exec_stack ();
4161 /* Emit code for the SET_GOT patterns. */
4163 const char *
4164 output_set_got (rtx dest)
4166 rtx xops[3];
4168 xops[0] = dest;
4169 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4171 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4173 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4175 if (!flag_pic)
4176 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4177 else
4178 output_asm_insn ("call\t%a2", xops);
4180 #if TARGET_MACHO
4181 /* Output the "canonical" label name ("Lxx$pb") here too. This
4182 is what will be referred to by the Mach-O PIC subsystem. */
4183 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4184 #endif
4185 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4186 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4188 if (flag_pic)
4189 output_asm_insn ("pop{l}\t%0", xops);
4191 else
4193 char name[32];
4194 get_pc_thunk_name (name, REGNO (dest));
4195 pic_labels_used |= 1 << REGNO (dest);
4197 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4198 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4199 output_asm_insn ("call\t%X2", xops);
4202 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4203 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4204 else if (!TARGET_MACHO)
4205 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4207 return "";
4210 /* Generate an "push" pattern for input ARG. */
4212 static rtx
4213 gen_push (rtx arg)
4215 return gen_rtx_SET (VOIDmode,
4216 gen_rtx_MEM (Pmode,
4217 gen_rtx_PRE_DEC (Pmode,
4218 stack_pointer_rtx)),
4219 arg);
4222 /* Return >= 0 if there is an unused call-clobbered register available
4223 for the entire function. */
4225 static unsigned int
4226 ix86_select_alt_pic_regnum (void)
4228 if (current_function_is_leaf && !current_function_profile)
4230 int i;
4231 for (i = 2; i >= 0; --i)
4232 if (!regs_ever_live[i])
4233 return i;
4236 return INVALID_REGNUM;
4239 /* Return 1 if we need to save REGNO. */
4240 static int
4241 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4243 if (pic_offset_table_rtx
4244 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4245 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4246 || current_function_profile
4247 || current_function_calls_eh_return
4248 || current_function_uses_const_pool))
4250 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4251 return 0;
4252 return 1;
4255 if (current_function_calls_eh_return && maybe_eh_return)
4257 unsigned i;
4258 for (i = 0; ; i++)
4260 unsigned test = EH_RETURN_DATA_REGNO (i);
4261 if (test == INVALID_REGNUM)
4262 break;
4263 if (test == regno)
4264 return 1;
4268 return (regs_ever_live[regno]
4269 && !call_used_regs[regno]
4270 && !fixed_regs[regno]
4271 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4274 /* Return number of registers to be saved on the stack. */
4276 static int
4277 ix86_nsaved_regs (void)
4279 int nregs = 0;
4280 int regno;
4282 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4283 if (ix86_save_reg (regno, true))
4284 nregs++;
4285 return nregs;
4288 /* Return the offset between two registers, one to be eliminated, and the other
4289 its replacement, at the start of a routine. */
4291 HOST_WIDE_INT
4292 ix86_initial_elimination_offset (int from, int to)
4294 struct ix86_frame frame;
4295 ix86_compute_frame_layout (&frame);
4297 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4298 return frame.hard_frame_pointer_offset;
4299 else if (from == FRAME_POINTER_REGNUM
4300 && to == HARD_FRAME_POINTER_REGNUM)
4301 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4302 else
4304 gcc_assert (to == STACK_POINTER_REGNUM);
4306 if (from == ARG_POINTER_REGNUM)
4307 return frame.stack_pointer_offset;
4309 gcc_assert (from == FRAME_POINTER_REGNUM);
4310 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4314 /* Fill structure ix86_frame about frame of currently computed function. */
4316 static void
4317 ix86_compute_frame_layout (struct ix86_frame *frame)
4319 HOST_WIDE_INT total_size;
4320 unsigned int stack_alignment_needed;
4321 HOST_WIDE_INT offset;
4322 unsigned int preferred_alignment;
4323 HOST_WIDE_INT size = get_frame_size ();
4325 frame->nregs = ix86_nsaved_regs ();
4326 total_size = size;
4328 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4329 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4331 /* During reload iteration the amount of registers saved can change.
4332 Recompute the value as needed. Do not recompute when amount of registers
4333 didn't change as reload does multiple calls to the function and does not
4334 expect the decision to change within single iteration. */
4335 if (!optimize_size
4336 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4338 int count = frame->nregs;
4340 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4341 /* The fast prologue uses move instead of push to save registers. This
4342 is significantly longer, but also executes faster as modern hardware
4343 can execute the moves in parallel, but can't do that for push/pop.
4345 Be careful about choosing what prologue to emit: When function takes
4346 many instructions to execute we may use slow version as well as in
4347 case function is known to be outside hot spot (this is known with
4348 feedback only). Weight the size of function by number of registers
4349 to save as it is cheap to use one or two push instructions but very
4350 slow to use many of them. */
4351 if (count)
4352 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4353 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4354 || (flag_branch_probabilities
4355 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4356 cfun->machine->use_fast_prologue_epilogue = false;
4357 else
4358 cfun->machine->use_fast_prologue_epilogue
4359 = !expensive_function_p (count);
4361 if (TARGET_PROLOGUE_USING_MOVE
4362 && cfun->machine->use_fast_prologue_epilogue)
4363 frame->save_regs_using_mov = true;
4364 else
4365 frame->save_regs_using_mov = false;
4368 /* Skip return address and saved base pointer. */
4369 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4371 frame->hard_frame_pointer_offset = offset;
4373 /* Do some sanity checking of stack_alignment_needed and
4374 preferred_alignment, since i386 port is the only using those features
4375 that may break easily. */
4377 gcc_assert (!size || stack_alignment_needed);
4378 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4379 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4380 gcc_assert (stack_alignment_needed
4381 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4383 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4384 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4386 /* Register save area */
4387 offset += frame->nregs * UNITS_PER_WORD;
4389 /* Va-arg area */
4390 if (ix86_save_varrargs_registers)
4392 offset += X86_64_VARARGS_SIZE;
4393 frame->va_arg_size = X86_64_VARARGS_SIZE;
4395 else
4396 frame->va_arg_size = 0;
4398 /* Align start of frame for local function. */
4399 frame->padding1 = ((offset + stack_alignment_needed - 1)
4400 & -stack_alignment_needed) - offset;
4402 offset += frame->padding1;
4404 /* Frame pointer points here. */
4405 frame->frame_pointer_offset = offset;
4407 offset += size;
4409 /* Add outgoing arguments area. Can be skipped if we eliminated
4410 all the function calls as dead code.
4411 Skipping is however impossible when function calls alloca. Alloca
4412 expander assumes that last current_function_outgoing_args_size
4413 of stack frame are unused. */
4414 if (ACCUMULATE_OUTGOING_ARGS
4415 && (!current_function_is_leaf || current_function_calls_alloca))
4417 offset += current_function_outgoing_args_size;
4418 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4420 else
4421 frame->outgoing_arguments_size = 0;
4423 /* Align stack boundary. Only needed if we're calling another function
4424 or using alloca. */
4425 if (!current_function_is_leaf || current_function_calls_alloca)
4426 frame->padding2 = ((offset + preferred_alignment - 1)
4427 & -preferred_alignment) - offset;
4428 else
4429 frame->padding2 = 0;
4431 offset += frame->padding2;
4433 /* We've reached end of stack frame. */
4434 frame->stack_pointer_offset = offset;
4436 /* Size prologue needs to allocate. */
4437 frame->to_allocate =
4438 (size + frame->padding1 + frame->padding2
4439 + frame->outgoing_arguments_size + frame->va_arg_size);
4441 if ((!frame->to_allocate && frame->nregs <= 1)
4442 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4443 frame->save_regs_using_mov = false;
4445 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4446 && current_function_is_leaf)
4448 frame->red_zone_size = frame->to_allocate;
4449 if (frame->save_regs_using_mov)
4450 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4451 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4452 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4454 else
4455 frame->red_zone_size = 0;
4456 frame->to_allocate -= frame->red_zone_size;
4457 frame->stack_pointer_offset -= frame->red_zone_size;
4458 #if 0
4459 fprintf (stderr, "nregs: %i\n", frame->nregs);
4460 fprintf (stderr, "size: %i\n", size);
4461 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4462 fprintf (stderr, "padding1: %i\n", frame->padding1);
4463 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4464 fprintf (stderr, "padding2: %i\n", frame->padding2);
4465 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4466 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4467 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4468 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4469 frame->hard_frame_pointer_offset);
4470 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4471 #endif
4474 /* Emit code to save registers in the prologue. */
4476 static void
4477 ix86_emit_save_regs (void)
4479 int regno;
4480 rtx insn;
4482 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4483 if (ix86_save_reg (regno, true))
4485 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4486 RTX_FRAME_RELATED_P (insn) = 1;
4490 /* Emit code to save registers using MOV insns. First register
4491 is restored from POINTER + OFFSET. */
4492 static void
4493 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4495 int regno;
4496 rtx insn;
4498 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4499 if (ix86_save_reg (regno, true))
4501 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4502 Pmode, offset),
4503 gen_rtx_REG (Pmode, regno));
4504 RTX_FRAME_RELATED_P (insn) = 1;
4505 offset += UNITS_PER_WORD;
4509 /* Expand prologue or epilogue stack adjustment.
4510 The pattern exist to put a dependency on all ebp-based memory accesses.
4511 STYLE should be negative if instructions should be marked as frame related,
4512 zero if %r11 register is live and cannot be freely used and positive
4513 otherwise. */
4515 static void
4516 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4518 rtx insn;
4520 if (! TARGET_64BIT)
4521 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4522 else if (x86_64_immediate_operand (offset, DImode))
4523 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4524 else
4526 rtx r11;
4527 /* r11 is used by indirect sibcall return as well, set before the
4528 epilogue and used after the epilogue. ATM indirect sibcall
4529 shouldn't be used together with huge frame sizes in one
4530 function because of the frame_size check in sibcall.c. */
4531 gcc_assert (style);
4532 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4533 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4534 if (style < 0)
4535 RTX_FRAME_RELATED_P (insn) = 1;
4536 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4537 offset));
4539 if (style < 0)
4540 RTX_FRAME_RELATED_P (insn) = 1;
4543 /* Expand the prologue into a bunch of separate insns. */
4545 void
4546 ix86_expand_prologue (void)
4548 rtx insn;
4549 bool pic_reg_used;
4550 struct ix86_frame frame;
4551 HOST_WIDE_INT allocate;
4553 ix86_compute_frame_layout (&frame);
4555 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4556 slower on all targets. Also sdb doesn't like it. */
4558 if (frame_pointer_needed)
4560 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4561 RTX_FRAME_RELATED_P (insn) = 1;
4563 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4564 RTX_FRAME_RELATED_P (insn) = 1;
4567 allocate = frame.to_allocate;
4569 if (!frame.save_regs_using_mov)
4570 ix86_emit_save_regs ();
4571 else
4572 allocate += frame.nregs * UNITS_PER_WORD;
4574 /* When using red zone we may start register saving before allocating
4575 the stack frame saving one cycle of the prologue. */
4576 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4577 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4578 : stack_pointer_rtx,
4579 -frame.nregs * UNITS_PER_WORD);
4581 if (allocate == 0)
4583 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4584 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4585 GEN_INT (-allocate), -1);
4586 else
4588 /* Only valid for Win32. */
4589 rtx eax = gen_rtx_REG (SImode, 0);
4590 bool eax_live = ix86_eax_live_at_start_p ();
4591 rtx t;
4593 gcc_assert (!TARGET_64BIT);
4595 if (eax_live)
4597 emit_insn (gen_push (eax));
4598 allocate -= 4;
4601 emit_move_insn (eax, GEN_INT (allocate));
4603 insn = emit_insn (gen_allocate_stack_worker (eax));
4604 RTX_FRAME_RELATED_P (insn) = 1;
4605 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4606 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4607 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4608 t, REG_NOTES (insn));
4610 if (eax_live)
4612 if (frame_pointer_needed)
4613 t = plus_constant (hard_frame_pointer_rtx,
4614 allocate
4615 - frame.to_allocate
4616 - frame.nregs * UNITS_PER_WORD);
4617 else
4618 t = plus_constant (stack_pointer_rtx, allocate);
4619 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4623 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4625 if (!frame_pointer_needed || !frame.to_allocate)
4626 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4627 else
4628 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4629 -frame.nregs * UNITS_PER_WORD);
4632 pic_reg_used = false;
4633 if (pic_offset_table_rtx
4634 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4635 || current_function_profile))
4637 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4639 if (alt_pic_reg_used != INVALID_REGNUM)
4640 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4642 pic_reg_used = true;
4645 if (pic_reg_used)
4647 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4649 /* Even with accurate pre-reload life analysis, we can wind up
4650 deleting all references to the pic register after reload.
4651 Consider if cross-jumping unifies two sides of a branch
4652 controlled by a comparison vs the only read from a global.
4653 In which case, allow the set_got to be deleted, though we're
4654 too late to do anything about the ebx save in the prologue. */
4655 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4658 /* Prevent function calls from be scheduled before the call to mcount.
4659 In the pic_reg_used case, make sure that the got load isn't deleted. */
4660 if (current_function_profile)
4661 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4664 /* Emit code to restore saved registers using MOV insns. First register
4665 is restored from POINTER + OFFSET. */
4666 static void
4667 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4668 int maybe_eh_return)
4670 int regno;
4671 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4673 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4674 if (ix86_save_reg (regno, maybe_eh_return))
4676 /* Ensure that adjust_address won't be forced to produce pointer
4677 out of range allowed by x86-64 instruction set. */
4678 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4680 rtx r11;
4682 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4683 emit_move_insn (r11, GEN_INT (offset));
4684 emit_insn (gen_adddi3 (r11, r11, pointer));
4685 base_address = gen_rtx_MEM (Pmode, r11);
4686 offset = 0;
4688 emit_move_insn (gen_rtx_REG (Pmode, regno),
4689 adjust_address (base_address, Pmode, offset));
4690 offset += UNITS_PER_WORD;
4694 /* Restore function stack, frame, and registers. */
4696 void
4697 ix86_expand_epilogue (int style)
4699 int regno;
4700 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4701 struct ix86_frame frame;
4702 HOST_WIDE_INT offset;
4704 ix86_compute_frame_layout (&frame);
4706 /* Calculate start of saved registers relative to ebp. Special care
4707 must be taken for the normal return case of a function using
4708 eh_return: the eax and edx registers are marked as saved, but not
4709 restored along this path. */
4710 offset = frame.nregs;
4711 if (current_function_calls_eh_return && style != 2)
4712 offset -= 2;
4713 offset *= -UNITS_PER_WORD;
4715 /* If we're only restoring one register and sp is not valid then
4716 using a move instruction to restore the register since it's
4717 less work than reloading sp and popping the register.
4719 The default code result in stack adjustment using add/lea instruction,
4720 while this code results in LEAVE instruction (or discrete equivalent),
4721 so it is profitable in some other cases as well. Especially when there
4722 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4723 and there is exactly one register to pop. This heuristic may need some
4724 tuning in future. */
4725 if ((!sp_valid && frame.nregs <= 1)
4726 || (TARGET_EPILOGUE_USING_MOVE
4727 && cfun->machine->use_fast_prologue_epilogue
4728 && (frame.nregs > 1 || frame.to_allocate))
4729 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4730 || (frame_pointer_needed && TARGET_USE_LEAVE
4731 && cfun->machine->use_fast_prologue_epilogue
4732 && frame.nregs == 1)
4733 || current_function_calls_eh_return)
4735 /* Restore registers. We can use ebp or esp to address the memory
4736 locations. If both are available, default to ebp, since offsets
4737 are known to be small. Only exception is esp pointing directly to the
4738 end of block of saved registers, where we may simplify addressing
4739 mode. */
4741 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4742 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4743 frame.to_allocate, style == 2);
4744 else
4745 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4746 offset, style == 2);
4748 /* eh_return epilogues need %ecx added to the stack pointer. */
4749 if (style == 2)
4751 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4753 if (frame_pointer_needed)
4755 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4756 tmp = plus_constant (tmp, UNITS_PER_WORD);
4757 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4759 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4760 emit_move_insn (hard_frame_pointer_rtx, tmp);
4762 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4763 const0_rtx, style);
4765 else
4767 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4768 tmp = plus_constant (tmp, (frame.to_allocate
4769 + frame.nregs * UNITS_PER_WORD));
4770 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4773 else if (!frame_pointer_needed)
4774 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4775 GEN_INT (frame.to_allocate
4776 + frame.nregs * UNITS_PER_WORD),
4777 style);
4778 /* If not an i386, mov & pop is faster than "leave". */
4779 else if (TARGET_USE_LEAVE || optimize_size
4780 || !cfun->machine->use_fast_prologue_epilogue)
4781 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4782 else
4784 pro_epilogue_adjust_stack (stack_pointer_rtx,
4785 hard_frame_pointer_rtx,
4786 const0_rtx, style);
4787 if (TARGET_64BIT)
4788 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4789 else
4790 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4793 else
4795 /* First step is to deallocate the stack frame so that we can
4796 pop the registers. */
4797 if (!sp_valid)
4799 gcc_assert (frame_pointer_needed);
4800 pro_epilogue_adjust_stack (stack_pointer_rtx,
4801 hard_frame_pointer_rtx,
4802 GEN_INT (offset), style);
4804 else if (frame.to_allocate)
4805 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4806 GEN_INT (frame.to_allocate), style);
4808 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4809 if (ix86_save_reg (regno, false))
4811 if (TARGET_64BIT)
4812 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4813 else
4814 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4816 if (frame_pointer_needed)
4818 /* Leave results in shorter dependency chains on CPUs that are
4819 able to grok it fast. */
4820 if (TARGET_USE_LEAVE)
4821 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4822 else if (TARGET_64BIT)
4823 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4824 else
4825 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4829 /* Sibcall epilogues don't want a return instruction. */
4830 if (style == 0)
4831 return;
4833 if (current_function_pops_args && current_function_args_size)
4835 rtx popc = GEN_INT (current_function_pops_args);
4837 /* i386 can only pop 64K bytes. If asked to pop more, pop
4838 return address, do explicit add, and jump indirectly to the
4839 caller. */
4841 if (current_function_pops_args >= 65536)
4843 rtx ecx = gen_rtx_REG (SImode, 2);
4845 /* There is no "pascal" calling convention in 64bit ABI. */
4846 gcc_assert (!TARGET_64BIT);
4848 emit_insn (gen_popsi1 (ecx));
4849 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4850 emit_jump_insn (gen_return_indirect_internal (ecx));
4852 else
4853 emit_jump_insn (gen_return_pop_internal (popc));
4855 else
4856 emit_jump_insn (gen_return_internal ());
4859 /* Reset from the function's potential modifications. */
4861 static void
4862 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4863 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4865 if (pic_offset_table_rtx)
4866 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4869 /* Extract the parts of an RTL expression that is a valid memory address
4870 for an instruction. Return 0 if the structure of the address is
4871 grossly off. Return -1 if the address contains ASHIFT, so it is not
4872 strictly valid, but still used for computing length of lea instruction. */
4875 ix86_decompose_address (rtx addr, struct ix86_address *out)
4877 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4878 rtx base_reg, index_reg;
4879 HOST_WIDE_INT scale = 1;
4880 rtx scale_rtx = NULL_RTX;
4881 int retval = 1;
4882 enum ix86_address_seg seg = SEG_DEFAULT;
4884 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4885 base = addr;
4886 else if (GET_CODE (addr) == PLUS)
4888 rtx addends[4], op;
4889 int n = 0, i;
4891 op = addr;
4894 if (n >= 4)
4895 return 0;
4896 addends[n++] = XEXP (op, 1);
4897 op = XEXP (op, 0);
4899 while (GET_CODE (op) == PLUS);
4900 if (n >= 4)
4901 return 0;
4902 addends[n] = op;
4904 for (i = n; i >= 0; --i)
4906 op = addends[i];
4907 switch (GET_CODE (op))
4909 case MULT:
4910 if (index)
4911 return 0;
4912 index = XEXP (op, 0);
4913 scale_rtx = XEXP (op, 1);
4914 break;
4916 case UNSPEC:
4917 if (XINT (op, 1) == UNSPEC_TP
4918 && TARGET_TLS_DIRECT_SEG_REFS
4919 && seg == SEG_DEFAULT)
4920 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4921 else
4922 return 0;
4923 break;
4925 case REG:
4926 case SUBREG:
4927 if (!base)
4928 base = op;
4929 else if (!index)
4930 index = op;
4931 else
4932 return 0;
4933 break;
4935 case CONST:
4936 case CONST_INT:
4937 case SYMBOL_REF:
4938 case LABEL_REF:
4939 if (disp)
4940 return 0;
4941 disp = op;
4942 break;
4944 default:
4945 return 0;
4949 else if (GET_CODE (addr) == MULT)
4951 index = XEXP (addr, 0); /* index*scale */
4952 scale_rtx = XEXP (addr, 1);
4954 else if (GET_CODE (addr) == ASHIFT)
4956 rtx tmp;
4958 /* We're called for lea too, which implements ashift on occasion. */
4959 index = XEXP (addr, 0);
4960 tmp = XEXP (addr, 1);
4961 if (GET_CODE (tmp) != CONST_INT)
4962 return 0;
4963 scale = INTVAL (tmp);
4964 if ((unsigned HOST_WIDE_INT) scale > 3)
4965 return 0;
4966 scale = 1 << scale;
4967 retval = -1;
4969 else
4970 disp = addr; /* displacement */
4972 /* Extract the integral value of scale. */
4973 if (scale_rtx)
4975 if (GET_CODE (scale_rtx) != CONST_INT)
4976 return 0;
4977 scale = INTVAL (scale_rtx);
4980 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4981 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4983 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4984 if (base_reg && index_reg && scale == 1
4985 && (index_reg == arg_pointer_rtx
4986 || index_reg == frame_pointer_rtx
4987 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4989 rtx tmp;
4990 tmp = base, base = index, index = tmp;
4991 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4994 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4995 if ((base_reg == hard_frame_pointer_rtx
4996 || base_reg == frame_pointer_rtx
4997 || base_reg == arg_pointer_rtx) && !disp)
4998 disp = const0_rtx;
5000 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5001 Avoid this by transforming to [%esi+0]. */
5002 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5003 && base_reg && !index_reg && !disp
5004 && REG_P (base_reg)
5005 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5006 disp = const0_rtx;
5008 /* Special case: encode reg+reg instead of reg*2. */
5009 if (!base && index && scale && scale == 2)
5010 base = index, base_reg = index_reg, scale = 1;
5012 /* Special case: scaling cannot be encoded without base or displacement. */
5013 if (!base && !disp && index && scale != 1)
5014 disp = const0_rtx;
5016 out->base = base;
5017 out->index = index;
5018 out->disp = disp;
5019 out->scale = scale;
5020 out->seg = seg;
5022 return retval;
5025 /* Return cost of the memory address x.
5026 For i386, it is better to use a complex address than let gcc copy
5027 the address into a reg and make a new pseudo. But not if the address
5028 requires to two regs - that would mean more pseudos with longer
5029 lifetimes. */
5030 static int
5031 ix86_address_cost (rtx x)
5033 struct ix86_address parts;
5034 int cost = 1;
5035 int ok = ix86_decompose_address (x, &parts);
5037 gcc_assert (ok);
5039 if (parts.base && GET_CODE (parts.base) == SUBREG)
5040 parts.base = SUBREG_REG (parts.base);
5041 if (parts.index && GET_CODE (parts.index) == SUBREG)
5042 parts.index = SUBREG_REG (parts.index);
5044 /* More complex memory references are better. */
5045 if (parts.disp && parts.disp != const0_rtx)
5046 cost--;
5047 if (parts.seg != SEG_DEFAULT)
5048 cost--;
5050 /* Attempt to minimize number of registers in the address. */
5051 if ((parts.base
5052 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5053 || (parts.index
5054 && (!REG_P (parts.index)
5055 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5056 cost++;
5058 if (parts.base
5059 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5060 && parts.index
5061 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5062 && parts.base != parts.index)
5063 cost++;
5065 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5066 since it's predecode logic can't detect the length of instructions
5067 and it degenerates to vector decoded. Increase cost of such
5068 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5069 to split such addresses or even refuse such addresses at all.
5071 Following addressing modes are affected:
5072 [base+scale*index]
5073 [scale*index+disp]
5074 [base+index]
5076 The first and last case may be avoidable by explicitly coding the zero in
5077 memory address, but I don't have AMD-K6 machine handy to check this
5078 theory. */
5080 if (TARGET_K6
5081 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5082 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5083 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5084 cost += 10;
5086 return cost;
5089 /* If X is a machine specific address (i.e. a symbol or label being
5090 referenced as a displacement from the GOT implemented using an
5091 UNSPEC), then return the base term. Otherwise return X. */
5094 ix86_find_base_term (rtx x)
5096 rtx term;
5098 if (TARGET_64BIT)
5100 if (GET_CODE (x) != CONST)
5101 return x;
5102 term = XEXP (x, 0);
5103 if (GET_CODE (term) == PLUS
5104 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5105 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5106 term = XEXP (term, 0);
5107 if (GET_CODE (term) != UNSPEC
5108 || XINT (term, 1) != UNSPEC_GOTPCREL)
5109 return x;
5111 term = XVECEXP (term, 0, 0);
5113 if (GET_CODE (term) != SYMBOL_REF
5114 && GET_CODE (term) != LABEL_REF)
5115 return x;
5117 return term;
5120 term = ix86_delegitimize_address (x);
5122 if (GET_CODE (term) != SYMBOL_REF
5123 && GET_CODE (term) != LABEL_REF)
5124 return x;
5126 return term;
5129 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5130 this is used for to form addresses to local data when -fPIC is in
5131 use. */
5133 static bool
5134 darwin_local_data_pic (rtx disp)
5136 if (GET_CODE (disp) == MINUS)
5138 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5139 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5140 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5142 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5143 if (! strcmp (sym_name, "<pic base>"))
5144 return true;
5148 return false;
5151 /* Determine if a given RTX is a valid constant. We already know this
5152 satisfies CONSTANT_P. */
5154 bool
5155 legitimate_constant_p (rtx x)
5157 switch (GET_CODE (x))
5159 case CONST:
5160 x = XEXP (x, 0);
5162 if (GET_CODE (x) == PLUS)
5164 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5165 return false;
5166 x = XEXP (x, 0);
5169 if (TARGET_MACHO && darwin_local_data_pic (x))
5170 return true;
5172 /* Only some unspecs are valid as "constants". */
5173 if (GET_CODE (x) == UNSPEC)
5174 switch (XINT (x, 1))
5176 case UNSPEC_TPOFF:
5177 case UNSPEC_NTPOFF:
5178 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5179 case UNSPEC_DTPOFF:
5180 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5181 default:
5182 return false;
5185 /* We must have drilled down to a symbol. */
5186 if (!symbolic_operand (x, Pmode))
5187 return false;
5188 /* FALLTHRU */
5190 case SYMBOL_REF:
5191 /* TLS symbols are never valid. */
5192 if (tls_symbolic_operand (x, Pmode))
5193 return false;
5194 break;
5196 default:
5197 break;
5200 /* Otherwise we handle everything else in the move patterns. */
5201 return true;
5204 /* Determine if it's legal to put X into the constant pool. This
5205 is not possible for the address of thread-local symbols, which
5206 is checked above. */
5208 static bool
5209 ix86_cannot_force_const_mem (rtx x)
5211 return !legitimate_constant_p (x);
5214 /* Determine if a given RTX is a valid constant address. */
5216 bool
5217 constant_address_p (rtx x)
5219 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5222 /* Nonzero if the constant value X is a legitimate general operand
5223 when generating PIC code. It is given that flag_pic is on and
5224 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5226 bool
5227 legitimate_pic_operand_p (rtx x)
5229 rtx inner;
5231 switch (GET_CODE (x))
5233 case CONST:
5234 inner = XEXP (x, 0);
5236 /* Only some unspecs are valid as "constants". */
5237 if (GET_CODE (inner) == UNSPEC)
5238 switch (XINT (inner, 1))
5240 case UNSPEC_TPOFF:
5241 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5242 default:
5243 return false;
5245 /* FALLTHRU */
5247 case SYMBOL_REF:
5248 case LABEL_REF:
5249 return legitimate_pic_address_disp_p (x);
5251 default:
5252 return true;
5256 /* Determine if a given CONST RTX is a valid memory displacement
5257 in PIC mode. */
5260 legitimate_pic_address_disp_p (rtx disp)
5262 bool saw_plus;
5264 /* In 64bit mode we can allow direct addresses of symbols and labels
5265 when they are not dynamic symbols. */
5266 if (TARGET_64BIT)
5268 /* TLS references should always be enclosed in UNSPEC. */
5269 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5270 return 0;
5271 if (GET_CODE (disp) == SYMBOL_REF
5272 && ix86_cmodel == CM_SMALL_PIC
5273 && SYMBOL_REF_LOCAL_P (disp))
5274 return 1;
5275 if (GET_CODE (disp) == LABEL_REF)
5276 return 1;
5277 if (GET_CODE (disp) == CONST
5278 && GET_CODE (XEXP (disp, 0)) == PLUS)
5280 rtx op0 = XEXP (XEXP (disp, 0), 0);
5281 rtx op1 = XEXP (XEXP (disp, 0), 1);
5283 /* TLS references should always be enclosed in UNSPEC. */
5284 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5285 return 0;
5286 if (((GET_CODE (op0) == SYMBOL_REF
5287 && ix86_cmodel == CM_SMALL_PIC
5288 && SYMBOL_REF_LOCAL_P (op0))
5289 || GET_CODE (op0) == LABEL_REF)
5290 && GET_CODE (op1) == CONST_INT
5291 && INTVAL (op1) < 16*1024*1024
5292 && INTVAL (op1) >= -16*1024*1024)
5293 return 1;
5296 if (GET_CODE (disp) != CONST)
5297 return 0;
5298 disp = XEXP (disp, 0);
5300 if (TARGET_64BIT)
5302 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5303 of GOT tables. We should not need these anyway. */
5304 if (GET_CODE (disp) != UNSPEC
5305 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5306 return 0;
5308 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5309 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5310 return 0;
5311 return 1;
5314 saw_plus = false;
5315 if (GET_CODE (disp) == PLUS)
5317 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5318 return 0;
5319 disp = XEXP (disp, 0);
5320 saw_plus = true;
5323 if (TARGET_MACHO && darwin_local_data_pic (disp))
5324 return 1;
5326 if (GET_CODE (disp) != UNSPEC)
5327 return 0;
5329 switch (XINT (disp, 1))
5331 case UNSPEC_GOT:
5332 if (saw_plus)
5333 return false;
5334 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5335 case UNSPEC_GOTOFF:
5336 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5337 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5338 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5339 return false;
5340 case UNSPEC_GOTTPOFF:
5341 case UNSPEC_GOTNTPOFF:
5342 case UNSPEC_INDNTPOFF:
5343 if (saw_plus)
5344 return false;
5345 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5346 case UNSPEC_NTPOFF:
5347 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5348 case UNSPEC_DTPOFF:
5349 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5352 return 0;
5355 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5356 memory address for an instruction. The MODE argument is the machine mode
5357 for the MEM expression that wants to use this address.
5359 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5360 convert common non-canonical forms to canonical form so that they will
5361 be recognized. */
5364 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5366 struct ix86_address parts;
5367 rtx base, index, disp;
5368 HOST_WIDE_INT scale;
5369 const char *reason = NULL;
5370 rtx reason_rtx = NULL_RTX;
5372 if (TARGET_DEBUG_ADDR)
5374 fprintf (stderr,
5375 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5376 GET_MODE_NAME (mode), strict);
5377 debug_rtx (addr);
5380 if (ix86_decompose_address (addr, &parts) <= 0)
5382 reason = "decomposition failed";
5383 goto report_error;
5386 base = parts.base;
5387 index = parts.index;
5388 disp = parts.disp;
5389 scale = parts.scale;
5391 /* Validate base register.
5393 Don't allow SUBREG's that span more than a word here. It can lead to spill
5394 failures when the base is one word out of a two word structure, which is
5395 represented internally as a DImode int. */
5397 if (base)
5399 rtx reg;
5400 reason_rtx = base;
5402 if (REG_P (base))
5403 reg = base;
5404 else if (GET_CODE (base) == SUBREG
5405 && REG_P (SUBREG_REG (base))
5406 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5407 <= UNITS_PER_WORD)
5408 reg = SUBREG_REG (base);
5409 else
5411 reason = "base is not a register";
5412 goto report_error;
5415 if (GET_MODE (base) != Pmode)
5417 reason = "base is not in Pmode";
5418 goto report_error;
5421 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5422 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5424 reason = "base is not valid";
5425 goto report_error;
5429 /* Validate index register.
5431 Don't allow SUBREG's that span more than a word here -- same as above. */
5433 if (index)
5435 rtx reg;
5436 reason_rtx = index;
5438 if (REG_P (index))
5439 reg = index;
5440 else if (GET_CODE (index) == SUBREG
5441 && REG_P (SUBREG_REG (index))
5442 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5443 <= UNITS_PER_WORD)
5444 reg = SUBREG_REG (index);
5445 else
5447 reason = "index is not a register";
5448 goto report_error;
5451 if (GET_MODE (index) != Pmode)
5453 reason = "index is not in Pmode";
5454 goto report_error;
5457 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5458 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5460 reason = "index is not valid";
5461 goto report_error;
5465 /* Validate scale factor. */
5466 if (scale != 1)
5468 reason_rtx = GEN_INT (scale);
5469 if (!index)
5471 reason = "scale without index";
5472 goto report_error;
5475 if (scale != 2 && scale != 4 && scale != 8)
5477 reason = "scale is not a valid multiplier";
5478 goto report_error;
5482 /* Validate displacement. */
5483 if (disp)
5485 reason_rtx = disp;
5487 if (GET_CODE (disp) == CONST
5488 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5489 switch (XINT (XEXP (disp, 0), 1))
5491 case UNSPEC_GOT:
5492 case UNSPEC_GOTOFF:
5493 case UNSPEC_GOTPCREL:
5494 gcc_assert (flag_pic);
5495 goto is_legitimate_pic;
5497 case UNSPEC_GOTTPOFF:
5498 case UNSPEC_GOTNTPOFF:
5499 case UNSPEC_INDNTPOFF:
5500 case UNSPEC_NTPOFF:
5501 case UNSPEC_DTPOFF:
5502 break;
5504 default:
5505 reason = "invalid address unspec";
5506 goto report_error;
5509 else if (flag_pic && (SYMBOLIC_CONST (disp)
5510 #if TARGET_MACHO
5511 && !machopic_operand_p (disp)
5512 #endif
5515 is_legitimate_pic:
5516 if (TARGET_64BIT && (index || base))
5518 /* foo@dtpoff(%rX) is ok. */
5519 if (GET_CODE (disp) != CONST
5520 || GET_CODE (XEXP (disp, 0)) != PLUS
5521 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5522 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5523 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5524 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5526 reason = "non-constant pic memory reference";
5527 goto report_error;
5530 else if (! legitimate_pic_address_disp_p (disp))
5532 reason = "displacement is an invalid pic construct";
5533 goto report_error;
5536 /* This code used to verify that a symbolic pic displacement
5537 includes the pic_offset_table_rtx register.
5539 While this is good idea, unfortunately these constructs may
5540 be created by "adds using lea" optimization for incorrect
5541 code like:
5543 int a;
5544 int foo(int i)
5546 return *(&a+i);
5549 This code is nonsensical, but results in addressing
5550 GOT table with pic_offset_table_rtx base. We can't
5551 just refuse it easily, since it gets matched by
5552 "addsi3" pattern, that later gets split to lea in the
5553 case output register differs from input. While this
5554 can be handled by separate addsi pattern for this case
5555 that never results in lea, this seems to be easier and
5556 correct fix for crash to disable this test. */
5558 else if (GET_CODE (disp) != LABEL_REF
5559 && GET_CODE (disp) != CONST_INT
5560 && (GET_CODE (disp) != CONST
5561 || !legitimate_constant_p (disp))
5562 && (GET_CODE (disp) != SYMBOL_REF
5563 || !legitimate_constant_p (disp)))
5565 reason = "displacement is not constant";
5566 goto report_error;
5568 else if (TARGET_64BIT
5569 && !x86_64_immediate_operand (disp, VOIDmode))
5571 reason = "displacement is out of range";
5572 goto report_error;
5576 /* Everything looks valid. */
5577 if (TARGET_DEBUG_ADDR)
5578 fprintf (stderr, "Success.\n");
5579 return TRUE;
5581 report_error:
5582 if (TARGET_DEBUG_ADDR)
5584 fprintf (stderr, "Error: %s\n", reason);
5585 debug_rtx (reason_rtx);
5587 return FALSE;
5590 /* Return an unique alias set for the GOT. */
5592 static HOST_WIDE_INT
5593 ix86_GOT_alias_set (void)
5595 static HOST_WIDE_INT set = -1;
5596 if (set == -1)
5597 set = new_alias_set ();
5598 return set;
5601 /* Return a legitimate reference for ORIG (an address) using the
5602 register REG. If REG is 0, a new pseudo is generated.
5604 There are two types of references that must be handled:
5606 1. Global data references must load the address from the GOT, via
5607 the PIC reg. An insn is emitted to do this load, and the reg is
5608 returned.
5610 2. Static data references, constant pool addresses, and code labels
5611 compute the address as an offset from the GOT, whose base is in
5612 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5613 differentiate them from global data objects. The returned
5614 address is the PIC reg + an unspec constant.
5616 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5617 reg also appears in the address. */
5619 static rtx
5620 legitimize_pic_address (rtx orig, rtx reg)
5622 rtx addr = orig;
5623 rtx new = orig;
5624 rtx base;
5626 #if TARGET_MACHO
5627 if (reg == 0)
5628 reg = gen_reg_rtx (Pmode);
5629 /* Use the generic Mach-O PIC machinery. */
5630 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5631 #endif
5633 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5634 new = addr;
5635 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5637 /* This symbol may be referenced via a displacement from the PIC
5638 base address (@GOTOFF). */
5640 if (reload_in_progress)
5641 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5642 if (GET_CODE (addr) == CONST)
5643 addr = XEXP (addr, 0);
5644 if (GET_CODE (addr) == PLUS)
5646 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5647 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5649 else
5650 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5651 new = gen_rtx_CONST (Pmode, new);
5652 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5654 if (reg != 0)
5656 emit_move_insn (reg, new);
5657 new = reg;
5660 else if (GET_CODE (addr) == SYMBOL_REF)
5662 if (TARGET_64BIT)
5664 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5665 new = gen_rtx_CONST (Pmode, new);
5666 new = gen_const_mem (Pmode, new);
5667 set_mem_alias_set (new, ix86_GOT_alias_set ());
5669 if (reg == 0)
5670 reg = gen_reg_rtx (Pmode);
5671 /* Use directly gen_movsi, otherwise the address is loaded
5672 into register for CSE. We don't want to CSE this addresses,
5673 instead we CSE addresses from the GOT table, so skip this. */
5674 emit_insn (gen_movsi (reg, new));
5675 new = reg;
5677 else
5679 /* This symbol must be referenced via a load from the
5680 Global Offset Table (@GOT). */
5682 if (reload_in_progress)
5683 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5684 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5685 new = gen_rtx_CONST (Pmode, new);
5686 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5687 new = gen_const_mem (Pmode, new);
5688 set_mem_alias_set (new, ix86_GOT_alias_set ());
5690 if (reg == 0)
5691 reg = gen_reg_rtx (Pmode);
5692 emit_move_insn (reg, new);
5693 new = reg;
5696 else
5698 if (GET_CODE (addr) == CONST)
5700 addr = XEXP (addr, 0);
5702 /* We must match stuff we generate before. Assume the only
5703 unspecs that can get here are ours. Not that we could do
5704 anything with them anyway.... */
5705 if (GET_CODE (addr) == UNSPEC
5706 || (GET_CODE (addr) == PLUS
5707 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5708 return orig;
5709 gcc_assert (GET_CODE (addr) == PLUS);
5711 if (GET_CODE (addr) == PLUS)
5713 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5715 /* Check first to see if this is a constant offset from a @GOTOFF
5716 symbol reference. */
5717 if (local_symbolic_operand (op0, Pmode)
5718 && GET_CODE (op1) == CONST_INT)
5720 if (!TARGET_64BIT)
5722 if (reload_in_progress)
5723 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5724 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5725 UNSPEC_GOTOFF);
5726 new = gen_rtx_PLUS (Pmode, new, op1);
5727 new = gen_rtx_CONST (Pmode, new);
5728 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5730 if (reg != 0)
5732 emit_move_insn (reg, new);
5733 new = reg;
5736 else
5738 if (INTVAL (op1) < -16*1024*1024
5739 || INTVAL (op1) >= 16*1024*1024)
5740 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5743 else
5745 base = legitimize_pic_address (XEXP (addr, 0), reg);
5746 new = legitimize_pic_address (XEXP (addr, 1),
5747 base == reg ? NULL_RTX : reg);
5749 if (GET_CODE (new) == CONST_INT)
5750 new = plus_constant (base, INTVAL (new));
5751 else
5753 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5755 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5756 new = XEXP (new, 1);
5758 new = gen_rtx_PLUS (Pmode, base, new);
5763 return new;
5766 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5768 static rtx
5769 get_thread_pointer (int to_reg)
5771 rtx tp, reg, insn;
5773 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5774 if (!to_reg)
5775 return tp;
5777 reg = gen_reg_rtx (Pmode);
5778 insn = gen_rtx_SET (VOIDmode, reg, tp);
5779 insn = emit_insn (insn);
5781 return reg;
5784 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5785 false if we expect this to be used for a memory address and true if
5786 we expect to load the address into a register. */
5788 static rtx
5789 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5791 rtx dest, base, off, pic;
5792 int type;
5794 switch (model)
5796 case TLS_MODEL_GLOBAL_DYNAMIC:
5797 dest = gen_reg_rtx (Pmode);
5798 if (TARGET_64BIT)
5800 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5802 start_sequence ();
5803 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5804 insns = get_insns ();
5805 end_sequence ();
5807 emit_libcall_block (insns, dest, rax, x);
5809 else
5810 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5811 break;
5813 case TLS_MODEL_LOCAL_DYNAMIC:
5814 base = gen_reg_rtx (Pmode);
5815 if (TARGET_64BIT)
5817 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5819 start_sequence ();
5820 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5821 insns = get_insns ();
5822 end_sequence ();
5824 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5825 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5826 emit_libcall_block (insns, base, rax, note);
5828 else
5829 emit_insn (gen_tls_local_dynamic_base_32 (base));
5831 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5832 off = gen_rtx_CONST (Pmode, off);
5834 return gen_rtx_PLUS (Pmode, base, off);
5836 case TLS_MODEL_INITIAL_EXEC:
5837 if (TARGET_64BIT)
5839 pic = NULL;
5840 type = UNSPEC_GOTNTPOFF;
5842 else if (flag_pic)
5844 if (reload_in_progress)
5845 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5846 pic = pic_offset_table_rtx;
5847 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5849 else if (!TARGET_GNU_TLS)
5851 pic = gen_reg_rtx (Pmode);
5852 emit_insn (gen_set_got (pic));
5853 type = UNSPEC_GOTTPOFF;
5855 else
5857 pic = NULL;
5858 type = UNSPEC_INDNTPOFF;
5861 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5862 off = gen_rtx_CONST (Pmode, off);
5863 if (pic)
5864 off = gen_rtx_PLUS (Pmode, pic, off);
5865 off = gen_const_mem (Pmode, off);
5866 set_mem_alias_set (off, ix86_GOT_alias_set ());
5868 if (TARGET_64BIT || TARGET_GNU_TLS)
5870 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5871 off = force_reg (Pmode, off);
5872 return gen_rtx_PLUS (Pmode, base, off);
5874 else
5876 base = get_thread_pointer (true);
5877 dest = gen_reg_rtx (Pmode);
5878 emit_insn (gen_subsi3 (dest, base, off));
5880 break;
5882 case TLS_MODEL_LOCAL_EXEC:
5883 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5884 (TARGET_64BIT || TARGET_GNU_TLS)
5885 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5886 off = gen_rtx_CONST (Pmode, off);
5888 if (TARGET_64BIT || TARGET_GNU_TLS)
5890 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5891 return gen_rtx_PLUS (Pmode, base, off);
5893 else
5895 base = get_thread_pointer (true);
5896 dest = gen_reg_rtx (Pmode);
5897 emit_insn (gen_subsi3 (dest, base, off));
5899 break;
5901 default:
5902 gcc_unreachable ();
5905 return dest;
5908 /* Try machine-dependent ways of modifying an illegitimate address
5909 to be legitimate. If we find one, return the new, valid address.
5910 This macro is used in only one place: `memory_address' in explow.c.
5912 OLDX is the address as it was before break_out_memory_refs was called.
5913 In some cases it is useful to look at this to decide what needs to be done.
5915 MODE and WIN are passed so that this macro can use
5916 GO_IF_LEGITIMATE_ADDRESS.
5918 It is always safe for this macro to do nothing. It exists to recognize
5919 opportunities to optimize the output.
5921 For the 80386, we handle X+REG by loading X into a register R and
5922 using R+REG. R will go in a general reg and indexing will be used.
5923 However, if REG is a broken-out memory address or multiplication,
5924 nothing needs to be done because REG can certainly go in a general reg.
5926 When -fpic is used, special handling is needed for symbolic references.
5927 See comments by legitimize_pic_address in i386.c for details. */
5930 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5932 int changed = 0;
5933 unsigned log;
5935 if (TARGET_DEBUG_ADDR)
5937 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5938 GET_MODE_NAME (mode));
5939 debug_rtx (x);
5942 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5943 if (log)
5944 return legitimize_tls_address (x, log, false);
5945 if (GET_CODE (x) == CONST
5946 && GET_CODE (XEXP (x, 0)) == PLUS
5947 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5948 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5950 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5951 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5954 if (flag_pic && SYMBOLIC_CONST (x))
5955 return legitimize_pic_address (x, 0);
5957 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5958 if (GET_CODE (x) == ASHIFT
5959 && GET_CODE (XEXP (x, 1)) == CONST_INT
5960 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
5962 changed = 1;
5963 log = INTVAL (XEXP (x, 1));
5964 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5965 GEN_INT (1 << log));
5968 if (GET_CODE (x) == PLUS)
5970 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5972 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5973 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5974 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
5976 changed = 1;
5977 log = INTVAL (XEXP (XEXP (x, 0), 1));
5978 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5979 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5980 GEN_INT (1 << log));
5983 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5984 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5985 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
5987 changed = 1;
5988 log = INTVAL (XEXP (XEXP (x, 1), 1));
5989 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5990 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5991 GEN_INT (1 << log));
5994 /* Put multiply first if it isn't already. */
5995 if (GET_CODE (XEXP (x, 1)) == MULT)
5997 rtx tmp = XEXP (x, 0);
5998 XEXP (x, 0) = XEXP (x, 1);
5999 XEXP (x, 1) = tmp;
6000 changed = 1;
6003 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6004 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6005 created by virtual register instantiation, register elimination, and
6006 similar optimizations. */
6007 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6009 changed = 1;
6010 x = gen_rtx_PLUS (Pmode,
6011 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6012 XEXP (XEXP (x, 1), 0)),
6013 XEXP (XEXP (x, 1), 1));
6016 /* Canonicalize
6017 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6018 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6019 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6020 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6021 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6022 && CONSTANT_P (XEXP (x, 1)))
6024 rtx constant;
6025 rtx other = NULL_RTX;
6027 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6029 constant = XEXP (x, 1);
6030 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6032 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6034 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6035 other = XEXP (x, 1);
6037 else
6038 constant = 0;
6040 if (constant)
6042 changed = 1;
6043 x = gen_rtx_PLUS (Pmode,
6044 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6045 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6046 plus_constant (other, INTVAL (constant)));
6050 if (changed && legitimate_address_p (mode, x, FALSE))
6051 return x;
6053 if (GET_CODE (XEXP (x, 0)) == MULT)
6055 changed = 1;
6056 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6059 if (GET_CODE (XEXP (x, 1)) == MULT)
6061 changed = 1;
6062 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6065 if (changed
6066 && GET_CODE (XEXP (x, 1)) == REG
6067 && GET_CODE (XEXP (x, 0)) == REG)
6068 return x;
6070 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6072 changed = 1;
6073 x = legitimize_pic_address (x, 0);
6076 if (changed && legitimate_address_p (mode, x, FALSE))
6077 return x;
6079 if (GET_CODE (XEXP (x, 0)) == REG)
6081 rtx temp = gen_reg_rtx (Pmode);
6082 rtx val = force_operand (XEXP (x, 1), temp);
6083 if (val != temp)
6084 emit_move_insn (temp, val);
6086 XEXP (x, 1) = temp;
6087 return x;
6090 else if (GET_CODE (XEXP (x, 1)) == REG)
6092 rtx temp = gen_reg_rtx (Pmode);
6093 rtx val = force_operand (XEXP (x, 0), temp);
6094 if (val != temp)
6095 emit_move_insn (temp, val);
6097 XEXP (x, 0) = temp;
6098 return x;
6102 return x;
6105 /* Print an integer constant expression in assembler syntax. Addition
6106 and subtraction are the only arithmetic that may appear in these
6107 expressions. FILE is the stdio stream to write to, X is the rtx, and
6108 CODE is the operand print code from the output string. */
6110 static void
6111 output_pic_addr_const (FILE *file, rtx x, int code)
6113 char buf[256];
6115 switch (GET_CODE (x))
6117 case PC:
6118 gcc_assert (flag_pic);
6119 putc ('.', file);
6120 break;
6122 case SYMBOL_REF:
6123 assemble_name (file, XSTR (x, 0));
6124 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6125 fputs ("@PLT", file);
6126 break;
6128 case LABEL_REF:
6129 x = XEXP (x, 0);
6130 /* FALLTHRU */
6131 case CODE_LABEL:
6132 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6133 assemble_name (asm_out_file, buf);
6134 break;
6136 case CONST_INT:
6137 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6138 break;
6140 case CONST:
6141 /* This used to output parentheses around the expression,
6142 but that does not work on the 386 (either ATT or BSD assembler). */
6143 output_pic_addr_const (file, XEXP (x, 0), code);
6144 break;
6146 case CONST_DOUBLE:
6147 if (GET_MODE (x) == VOIDmode)
6149 /* We can use %d if the number is <32 bits and positive. */
6150 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6151 fprintf (file, "0x%lx%08lx",
6152 (unsigned long) CONST_DOUBLE_HIGH (x),
6153 (unsigned long) CONST_DOUBLE_LOW (x));
6154 else
6155 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6157 else
6158 /* We can't handle floating point constants;
6159 PRINT_OPERAND must handle them. */
6160 output_operand_lossage ("floating constant misused");
6161 break;
6163 case PLUS:
6164 /* Some assemblers need integer constants to appear first. */
6165 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6167 output_pic_addr_const (file, XEXP (x, 0), code);
6168 putc ('+', file);
6169 output_pic_addr_const (file, XEXP (x, 1), code);
6171 else
6173 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6174 output_pic_addr_const (file, XEXP (x, 1), code);
6175 putc ('+', file);
6176 output_pic_addr_const (file, XEXP (x, 0), code);
6178 break;
6180 case MINUS:
6181 if (!TARGET_MACHO)
6182 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6183 output_pic_addr_const (file, XEXP (x, 0), code);
6184 putc ('-', file);
6185 output_pic_addr_const (file, XEXP (x, 1), code);
6186 if (!TARGET_MACHO)
6187 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6188 break;
6190 case UNSPEC:
6191 gcc_assert (XVECLEN (x, 0) == 1);
6192 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6193 switch (XINT (x, 1))
6195 case UNSPEC_GOT:
6196 fputs ("@GOT", file);
6197 break;
6198 case UNSPEC_GOTOFF:
6199 fputs ("@GOTOFF", file);
6200 break;
6201 case UNSPEC_GOTPCREL:
6202 fputs ("@GOTPCREL(%rip)", file);
6203 break;
6204 case UNSPEC_GOTTPOFF:
6205 /* FIXME: This might be @TPOFF in Sun ld too. */
6206 fputs ("@GOTTPOFF", file);
6207 break;
6208 case UNSPEC_TPOFF:
6209 fputs ("@TPOFF", file);
6210 break;
6211 case UNSPEC_NTPOFF:
6212 if (TARGET_64BIT)
6213 fputs ("@TPOFF", file);
6214 else
6215 fputs ("@NTPOFF", file);
6216 break;
6217 case UNSPEC_DTPOFF:
6218 fputs ("@DTPOFF", file);
6219 break;
6220 case UNSPEC_GOTNTPOFF:
6221 if (TARGET_64BIT)
6222 fputs ("@GOTTPOFF(%rip)", file);
6223 else
6224 fputs ("@GOTNTPOFF", file);
6225 break;
6226 case UNSPEC_INDNTPOFF:
6227 fputs ("@INDNTPOFF", file);
6228 break;
6229 default:
6230 output_operand_lossage ("invalid UNSPEC as operand");
6231 break;
6233 break;
6235 default:
6236 output_operand_lossage ("invalid expression as operand");
6240 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6241 We need to emit DTP-relative relocations. */
6243 static void
6244 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6246 fputs (ASM_LONG, file);
6247 output_addr_const (file, x);
6248 fputs ("@DTPOFF", file);
6249 switch (size)
6251 case 4:
6252 break;
6253 case 8:
6254 fputs (", 0", file);
6255 break;
6256 default:
6257 gcc_unreachable ();
6261 /* In the name of slightly smaller debug output, and to cater to
6262 general assembler lossage, recognize PIC+GOTOFF and turn it back
6263 into a direct symbol reference. */
6265 static rtx
6266 ix86_delegitimize_address (rtx orig_x)
6268 rtx x = orig_x, y;
6270 if (GET_CODE (x) == MEM)
6271 x = XEXP (x, 0);
6273 if (TARGET_64BIT)
6275 if (GET_CODE (x) != CONST
6276 || GET_CODE (XEXP (x, 0)) != UNSPEC
6277 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6278 || GET_CODE (orig_x) != MEM)
6279 return orig_x;
6280 return XVECEXP (XEXP (x, 0), 0, 0);
6283 if (GET_CODE (x) != PLUS
6284 || GET_CODE (XEXP (x, 1)) != CONST)
6285 return orig_x;
6287 if (GET_CODE (XEXP (x, 0)) == REG
6288 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6289 /* %ebx + GOT/GOTOFF */
6290 y = NULL;
6291 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6293 /* %ebx + %reg * scale + GOT/GOTOFF */
6294 y = XEXP (x, 0);
6295 if (GET_CODE (XEXP (y, 0)) == REG
6296 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6297 y = XEXP (y, 1);
6298 else if (GET_CODE (XEXP (y, 1)) == REG
6299 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6300 y = XEXP (y, 0);
6301 else
6302 return orig_x;
6303 if (GET_CODE (y) != REG
6304 && GET_CODE (y) != MULT
6305 && GET_CODE (y) != ASHIFT)
6306 return orig_x;
6308 else
6309 return orig_x;
6311 x = XEXP (XEXP (x, 1), 0);
6312 if (GET_CODE (x) == UNSPEC
6313 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6314 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6316 if (y)
6317 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6318 return XVECEXP (x, 0, 0);
6321 if (GET_CODE (x) == PLUS
6322 && GET_CODE (XEXP (x, 0)) == UNSPEC
6323 && GET_CODE (XEXP (x, 1)) == CONST_INT
6324 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6325 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6326 && GET_CODE (orig_x) != MEM)))
6328 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6329 if (y)
6330 return gen_rtx_PLUS (Pmode, y, x);
6331 return x;
6334 return orig_x;
6337 static void
6338 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6339 int fp, FILE *file)
6341 const char *suffix;
6343 if (mode == CCFPmode || mode == CCFPUmode)
6345 enum rtx_code second_code, bypass_code;
6346 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6347 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6348 code = ix86_fp_compare_code_to_integer (code);
6349 mode = CCmode;
6351 if (reverse)
6352 code = reverse_condition (code);
6354 switch (code)
6356 case EQ:
6357 suffix = "e";
6358 break;
6359 case NE:
6360 suffix = "ne";
6361 break;
6362 case GT:
6363 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6364 suffix = "g";
6365 break;
6366 case GTU:
6367 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6368 Those same assemblers have the same but opposite lossage on cmov. */
6369 gcc_assert (mode == CCmode);
6370 suffix = fp ? "nbe" : "a";
6371 break;
6372 case LT:
6373 switch (mode)
6375 case CCNOmode:
6376 case CCGOCmode:
6377 suffix = "s";
6378 break;
6380 case CCmode:
6381 case CCGCmode:
6382 suffix = "l";
6383 break;
6385 default:
6386 gcc_unreachable ();
6388 break;
6389 case LTU:
6390 gcc_assert (mode == CCmode);
6391 suffix = "b";
6392 break;
6393 case GE:
6394 switch (mode)
6396 case CCNOmode:
6397 case CCGOCmode:
6398 suffix = "ns";
6399 break;
6401 case CCmode:
6402 case CCGCmode:
6403 suffix = "ge";
6404 break;
6406 default:
6407 gcc_unreachable ();
6409 break;
6410 case GEU:
6411 /* ??? As above. */
6412 gcc_assert (mode == CCmode);
6413 suffix = fp ? "nb" : "ae";
6414 break;
6415 case LE:
6416 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6417 suffix = "le";
6418 break;
6419 case LEU:
6420 gcc_assert (mode == CCmode);
6421 suffix = "be";
6422 break;
6423 case UNORDERED:
6424 suffix = fp ? "u" : "p";
6425 break;
6426 case ORDERED:
6427 suffix = fp ? "nu" : "np";
6428 break;
6429 default:
6430 gcc_unreachable ();
6432 fputs (suffix, file);
6435 /* Print the name of register X to FILE based on its machine mode and number.
6436 If CODE is 'w', pretend the mode is HImode.
6437 If CODE is 'b', pretend the mode is QImode.
6438 If CODE is 'k', pretend the mode is SImode.
6439 If CODE is 'q', pretend the mode is DImode.
6440 If CODE is 'h', pretend the reg is the 'high' byte register.
6441 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6443 void
6444 print_reg (rtx x, int code, FILE *file)
6446 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6447 && REGNO (x) != FRAME_POINTER_REGNUM
6448 && REGNO (x) != FLAGS_REG
6449 && REGNO (x) != FPSR_REG);
6451 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6452 putc ('%', file);
6454 if (code == 'w' || MMX_REG_P (x))
6455 code = 2;
6456 else if (code == 'b')
6457 code = 1;
6458 else if (code == 'k')
6459 code = 4;
6460 else if (code == 'q')
6461 code = 8;
6462 else if (code == 'y')
6463 code = 3;
6464 else if (code == 'h')
6465 code = 0;
6466 else
6467 code = GET_MODE_SIZE (GET_MODE (x));
6469 /* Irritatingly, AMD extended registers use different naming convention
6470 from the normal registers. */
6471 if (REX_INT_REG_P (x))
6473 gcc_assert (TARGET_64BIT);
6474 switch (code)
6476 case 0:
6477 error ("extended registers have no high halves");
6478 break;
6479 case 1:
6480 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6481 break;
6482 case 2:
6483 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6484 break;
6485 case 4:
6486 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6487 break;
6488 case 8:
6489 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6490 break;
6491 default:
6492 error ("unsupported operand size for extended register");
6493 break;
6495 return;
6497 switch (code)
6499 case 3:
6500 if (STACK_TOP_P (x))
6502 fputs ("st(0)", file);
6503 break;
6505 /* FALLTHRU */
6506 case 8:
6507 case 4:
6508 case 12:
6509 if (! ANY_FP_REG_P (x))
6510 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6511 /* FALLTHRU */
6512 case 16:
6513 case 2:
6514 normal:
6515 fputs (hi_reg_name[REGNO (x)], file);
6516 break;
6517 case 1:
6518 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6519 goto normal;
6520 fputs (qi_reg_name[REGNO (x)], file);
6521 break;
6522 case 0:
6523 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6524 goto normal;
6525 fputs (qi_high_reg_name[REGNO (x)], file);
6526 break;
6527 default:
6528 gcc_unreachable ();
6532 /* Locate some local-dynamic symbol still in use by this function
6533 so that we can print its name in some tls_local_dynamic_base
6534 pattern. */
6536 static const char *
6537 get_some_local_dynamic_name (void)
6539 rtx insn;
6541 if (cfun->machine->some_ld_name)
6542 return cfun->machine->some_ld_name;
6544 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6545 if (INSN_P (insn)
6546 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6547 return cfun->machine->some_ld_name;
6549 gcc_unreachable ();
6552 static int
6553 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6555 rtx x = *px;
6557 if (GET_CODE (x) == SYMBOL_REF
6558 && local_dynamic_symbolic_operand (x, Pmode))
6560 cfun->machine->some_ld_name = XSTR (x, 0);
6561 return 1;
6564 return 0;
6567 /* Meaning of CODE:
6568 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6569 C -- print opcode suffix for set/cmov insn.
6570 c -- like C, but print reversed condition
6571 F,f -- likewise, but for floating-point.
6572 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6573 otherwise nothing
6574 R -- print the prefix for register names.
6575 z -- print the opcode suffix for the size of the current operand.
6576 * -- print a star (in certain assembler syntax)
6577 A -- print an absolute memory reference.
6578 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6579 s -- print a shift double count, followed by the assemblers argument
6580 delimiter.
6581 b -- print the QImode name of the register for the indicated operand.
6582 %b0 would print %al if operands[0] is reg 0.
6583 w -- likewise, print the HImode name of the register.
6584 k -- likewise, print the SImode name of the register.
6585 q -- likewise, print the DImode name of the register.
6586 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6587 y -- print "st(0)" instead of "st" as a register.
6588 D -- print condition for SSE cmp instruction.
6589 P -- if PIC, print an @PLT suffix.
6590 X -- don't print any sort of PIC '@' suffix for a symbol.
6591 & -- print some in-use local-dynamic symbol name.
6592 H -- print a memory address offset by 8; used for sse high-parts
6595 void
6596 print_operand (FILE *file, rtx x, int code)
6598 if (code)
6600 switch (code)
6602 case '*':
6603 if (ASSEMBLER_DIALECT == ASM_ATT)
6604 putc ('*', file);
6605 return;
6607 case '&':
6608 assemble_name (file, get_some_local_dynamic_name ());
6609 return;
6611 case 'A':
6612 switch (ASSEMBLER_DIALECT)
6614 case ASM_ATT:
6615 putc ('*', file);
6616 break;
6618 case ASM_INTEL:
6619 /* Intel syntax. For absolute addresses, registers should not
6620 be surrounded by braces. */
6621 if (GET_CODE (x) != REG)
6623 putc ('[', file);
6624 PRINT_OPERAND (file, x, 0);
6625 putc (']', file);
6626 return;
6628 break;
6630 default:
6631 gcc_unreachable ();
6634 PRINT_OPERAND (file, x, 0);
6635 return;
6638 case 'L':
6639 if (ASSEMBLER_DIALECT == ASM_ATT)
6640 putc ('l', file);
6641 return;
6643 case 'W':
6644 if (ASSEMBLER_DIALECT == ASM_ATT)
6645 putc ('w', file);
6646 return;
6648 case 'B':
6649 if (ASSEMBLER_DIALECT == ASM_ATT)
6650 putc ('b', file);
6651 return;
6653 case 'Q':
6654 if (ASSEMBLER_DIALECT == ASM_ATT)
6655 putc ('l', file);
6656 return;
6658 case 'S':
6659 if (ASSEMBLER_DIALECT == ASM_ATT)
6660 putc ('s', file);
6661 return;
6663 case 'T':
6664 if (ASSEMBLER_DIALECT == ASM_ATT)
6665 putc ('t', file);
6666 return;
6668 case 'z':
6669 /* 387 opcodes don't get size suffixes if the operands are
6670 registers. */
6671 if (STACK_REG_P (x))
6672 return;
6674 /* Likewise if using Intel opcodes. */
6675 if (ASSEMBLER_DIALECT == ASM_INTEL)
6676 return;
6678 /* This is the size of op from size of operand. */
6679 switch (GET_MODE_SIZE (GET_MODE (x)))
6681 case 2:
6682 #ifdef HAVE_GAS_FILDS_FISTS
6683 putc ('s', file);
6684 #endif
6685 return;
6687 case 4:
6688 if (GET_MODE (x) == SFmode)
6690 putc ('s', file);
6691 return;
6693 else
6694 putc ('l', file);
6695 return;
6697 case 12:
6698 case 16:
6699 putc ('t', file);
6700 return;
6702 case 8:
6703 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6705 #ifdef GAS_MNEMONICS
6706 putc ('q', file);
6707 #else
6708 putc ('l', file);
6709 putc ('l', file);
6710 #endif
6712 else
6713 putc ('l', file);
6714 return;
6716 default:
6717 gcc_unreachable ();
6720 case 'b':
6721 case 'w':
6722 case 'k':
6723 case 'q':
6724 case 'h':
6725 case 'y':
6726 case 'X':
6727 case 'P':
6728 break;
6730 case 's':
6731 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6733 PRINT_OPERAND (file, x, 0);
6734 putc (',', file);
6736 return;
6738 case 'D':
6739 /* Little bit of braindamage here. The SSE compare instructions
6740 does use completely different names for the comparisons that the
6741 fp conditional moves. */
6742 switch (GET_CODE (x))
6744 case EQ:
6745 case UNEQ:
6746 fputs ("eq", file);
6747 break;
6748 case LT:
6749 case UNLT:
6750 fputs ("lt", file);
6751 break;
6752 case LE:
6753 case UNLE:
6754 fputs ("le", file);
6755 break;
6756 case UNORDERED:
6757 fputs ("unord", file);
6758 break;
6759 case NE:
6760 case LTGT:
6761 fputs ("neq", file);
6762 break;
6763 case UNGE:
6764 case GE:
6765 fputs ("nlt", file);
6766 break;
6767 case UNGT:
6768 case GT:
6769 fputs ("nle", file);
6770 break;
6771 case ORDERED:
6772 fputs ("ord", file);
6773 break;
6774 default:
6775 gcc_unreachable ();
6777 return;
6778 case 'O':
6779 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6780 if (ASSEMBLER_DIALECT == ASM_ATT)
6782 switch (GET_MODE (x))
6784 case HImode: putc ('w', file); break;
6785 case SImode:
6786 case SFmode: putc ('l', file); break;
6787 case DImode:
6788 case DFmode: putc ('q', file); break;
6789 default: gcc_unreachable ();
6791 putc ('.', file);
6793 #endif
6794 return;
6795 case 'C':
6796 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6797 return;
6798 case 'F':
6799 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6800 if (ASSEMBLER_DIALECT == ASM_ATT)
6801 putc ('.', file);
6802 #endif
6803 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6804 return;
6806 /* Like above, but reverse condition */
6807 case 'c':
6808 /* Check to see if argument to %c is really a constant
6809 and not a condition code which needs to be reversed. */
6810 if (!COMPARISON_P (x))
6812 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6813 return;
6815 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6816 return;
6817 case 'f':
6818 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6819 if (ASSEMBLER_DIALECT == ASM_ATT)
6820 putc ('.', file);
6821 #endif
6822 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6823 return;
6825 case 'H':
6826 /* It doesn't actually matter what mode we use here, as we're
6827 only going to use this for printing. */
6828 x = adjust_address_nv (x, DImode, 8);
6829 break;
6831 case '+':
6833 rtx x;
6835 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6836 return;
6838 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6839 if (x)
6841 int pred_val = INTVAL (XEXP (x, 0));
6843 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6844 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6846 int taken = pred_val > REG_BR_PROB_BASE / 2;
6847 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6849 /* Emit hints only in the case default branch prediction
6850 heuristics would fail. */
6851 if (taken != cputaken)
6853 /* We use 3e (DS) prefix for taken branches and
6854 2e (CS) prefix for not taken branches. */
6855 if (taken)
6856 fputs ("ds ; ", file);
6857 else
6858 fputs ("cs ; ", file);
6862 return;
6864 default:
6865 output_operand_lossage ("invalid operand code '%c'", code);
6869 if (GET_CODE (x) == REG)
6870 print_reg (x, code, file);
6872 else if (GET_CODE (x) == MEM)
6874 /* No `byte ptr' prefix for call instructions. */
6875 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6877 const char * size;
6878 switch (GET_MODE_SIZE (GET_MODE (x)))
6880 case 1: size = "BYTE"; break;
6881 case 2: size = "WORD"; break;
6882 case 4: size = "DWORD"; break;
6883 case 8: size = "QWORD"; break;
6884 case 12: size = "XWORD"; break;
6885 case 16: size = "XMMWORD"; break;
6886 default:
6887 gcc_unreachable ();
6890 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6891 if (code == 'b')
6892 size = "BYTE";
6893 else if (code == 'w')
6894 size = "WORD";
6895 else if (code == 'k')
6896 size = "DWORD";
6898 fputs (size, file);
6899 fputs (" PTR ", file);
6902 x = XEXP (x, 0);
6903 /* Avoid (%rip) for call operands. */
6904 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6905 && GET_CODE (x) != CONST_INT)
6906 output_addr_const (file, x);
6907 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6908 output_operand_lossage ("invalid constraints for operand");
6909 else
6910 output_address (x);
6913 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6915 REAL_VALUE_TYPE r;
6916 long l;
6918 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6919 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6921 if (ASSEMBLER_DIALECT == ASM_ATT)
6922 putc ('$', file);
6923 fprintf (file, "0x%08lx", l);
6926 /* These float cases don't actually occur as immediate operands. */
6927 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6929 char dstr[30];
6931 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6932 fprintf (file, "%s", dstr);
6935 else if (GET_CODE (x) == CONST_DOUBLE
6936 && GET_MODE (x) == XFmode)
6938 char dstr[30];
6940 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6941 fprintf (file, "%s", dstr);
6944 else
6946 /* We have patterns that allow zero sets of memory, for instance.
6947 In 64-bit mode, we should probably support all 8-byte vectors,
6948 since we can in fact encode that into an immediate. */
6949 if (GET_CODE (x) == CONST_VECTOR)
6951 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
6952 x = const0_rtx;
6955 if (code != 'P')
6957 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6959 if (ASSEMBLER_DIALECT == ASM_ATT)
6960 putc ('$', file);
6962 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6963 || GET_CODE (x) == LABEL_REF)
6965 if (ASSEMBLER_DIALECT == ASM_ATT)
6966 putc ('$', file);
6967 else
6968 fputs ("OFFSET FLAT:", file);
6971 if (GET_CODE (x) == CONST_INT)
6972 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6973 else if (flag_pic)
6974 output_pic_addr_const (file, x, code);
6975 else
6976 output_addr_const (file, x);
6980 /* Print a memory operand whose address is ADDR. */
6982 void
6983 print_operand_address (FILE *file, rtx addr)
6985 struct ix86_address parts;
6986 rtx base, index, disp;
6987 int scale;
6988 int ok = ix86_decompose_address (addr, &parts);
6990 gcc_assert (ok);
6992 base = parts.base;
6993 index = parts.index;
6994 disp = parts.disp;
6995 scale = parts.scale;
6997 switch (parts.seg)
6999 case SEG_DEFAULT:
7000 break;
7001 case SEG_FS:
7002 case SEG_GS:
7003 if (USER_LABEL_PREFIX[0] == 0)
7004 putc ('%', file);
7005 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7006 break;
7007 default:
7008 gcc_unreachable ();
7011 if (!base && !index)
7013 /* Displacement only requires special attention. */
7015 if (GET_CODE (disp) == CONST_INT)
7017 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7019 if (USER_LABEL_PREFIX[0] == 0)
7020 putc ('%', file);
7021 fputs ("ds:", file);
7023 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7025 else if (flag_pic)
7026 output_pic_addr_const (file, disp, 0);
7027 else
7028 output_addr_const (file, disp);
7030 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7031 if (TARGET_64BIT
7032 && ((GET_CODE (disp) == SYMBOL_REF
7033 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7034 || GET_CODE (disp) == LABEL_REF
7035 || (GET_CODE (disp) == CONST
7036 && GET_CODE (XEXP (disp, 0)) == PLUS
7037 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7038 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7039 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7040 fputs ("(%rip)", file);
7042 else
7044 if (ASSEMBLER_DIALECT == ASM_ATT)
7046 if (disp)
7048 if (flag_pic)
7049 output_pic_addr_const (file, disp, 0);
7050 else if (GET_CODE (disp) == LABEL_REF)
7051 output_asm_label (disp);
7052 else
7053 output_addr_const (file, disp);
7056 putc ('(', file);
7057 if (base)
7058 print_reg (base, 0, file);
7059 if (index)
7061 putc (',', file);
7062 print_reg (index, 0, file);
7063 if (scale != 1)
7064 fprintf (file, ",%d", scale);
7066 putc (')', file);
7068 else
7070 rtx offset = NULL_RTX;
7072 if (disp)
7074 /* Pull out the offset of a symbol; print any symbol itself. */
7075 if (GET_CODE (disp) == CONST
7076 && GET_CODE (XEXP (disp, 0)) == PLUS
7077 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7079 offset = XEXP (XEXP (disp, 0), 1);
7080 disp = gen_rtx_CONST (VOIDmode,
7081 XEXP (XEXP (disp, 0), 0));
7084 if (flag_pic)
7085 output_pic_addr_const (file, disp, 0);
7086 else if (GET_CODE (disp) == LABEL_REF)
7087 output_asm_label (disp);
7088 else if (GET_CODE (disp) == CONST_INT)
7089 offset = disp;
7090 else
7091 output_addr_const (file, disp);
7094 putc ('[', file);
7095 if (base)
7097 print_reg (base, 0, file);
7098 if (offset)
7100 if (INTVAL (offset) >= 0)
7101 putc ('+', file);
7102 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7105 else if (offset)
7106 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7107 else
7108 putc ('0', file);
7110 if (index)
7112 putc ('+', file);
7113 print_reg (index, 0, file);
7114 if (scale != 1)
7115 fprintf (file, "*%d", scale);
7117 putc (']', file);
7122 bool
7123 output_addr_const_extra (FILE *file, rtx x)
7125 rtx op;
7127 if (GET_CODE (x) != UNSPEC)
7128 return false;
7130 op = XVECEXP (x, 0, 0);
7131 switch (XINT (x, 1))
7133 case UNSPEC_GOTTPOFF:
7134 output_addr_const (file, op);
7135 /* FIXME: This might be @TPOFF in Sun ld. */
7136 fputs ("@GOTTPOFF", file);
7137 break;
7138 case UNSPEC_TPOFF:
7139 output_addr_const (file, op);
7140 fputs ("@TPOFF", file);
7141 break;
7142 case UNSPEC_NTPOFF:
7143 output_addr_const (file, op);
7144 if (TARGET_64BIT)
7145 fputs ("@TPOFF", file);
7146 else
7147 fputs ("@NTPOFF", file);
7148 break;
7149 case UNSPEC_DTPOFF:
7150 output_addr_const (file, op);
7151 fputs ("@DTPOFF", file);
7152 break;
7153 case UNSPEC_GOTNTPOFF:
7154 output_addr_const (file, op);
7155 if (TARGET_64BIT)
7156 fputs ("@GOTTPOFF(%rip)", file);
7157 else
7158 fputs ("@GOTNTPOFF", file);
7159 break;
7160 case UNSPEC_INDNTPOFF:
7161 output_addr_const (file, op);
7162 fputs ("@INDNTPOFF", file);
7163 break;
7165 default:
7166 return false;
7169 return true;
7172 /* Split one or more DImode RTL references into pairs of SImode
7173 references. The RTL can be REG, offsettable MEM, integer constant, or
7174 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7175 split and "num" is its length. lo_half and hi_half are output arrays
7176 that parallel "operands". */
7178 void
7179 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7181 while (num--)
7183 rtx op = operands[num];
7185 /* simplify_subreg refuse to split volatile memory addresses,
7186 but we still have to handle it. */
7187 if (GET_CODE (op) == MEM)
7189 lo_half[num] = adjust_address (op, SImode, 0);
7190 hi_half[num] = adjust_address (op, SImode, 4);
7192 else
7194 lo_half[num] = simplify_gen_subreg (SImode, op,
7195 GET_MODE (op) == VOIDmode
7196 ? DImode : GET_MODE (op), 0);
7197 hi_half[num] = simplify_gen_subreg (SImode, op,
7198 GET_MODE (op) == VOIDmode
7199 ? DImode : GET_MODE (op), 4);
7203 /* Split one or more TImode RTL references into pairs of DImode
7204 references. The RTL can be REG, offsettable MEM, integer constant, or
7205 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7206 split and "num" is its length. lo_half and hi_half are output arrays
7207 that parallel "operands". */
7209 void
7210 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7212 while (num--)
7214 rtx op = operands[num];
7216 /* simplify_subreg refuse to split volatile memory addresses, but we
7217 still have to handle it. */
7218 if (GET_CODE (op) == MEM)
7220 lo_half[num] = adjust_address (op, DImode, 0);
7221 hi_half[num] = adjust_address (op, DImode, 8);
7223 else
7225 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7226 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7231 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7232 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7233 is the expression of the binary operation. The output may either be
7234 emitted here, or returned to the caller, like all output_* functions.
7236 There is no guarantee that the operands are the same mode, as they
7237 might be within FLOAT or FLOAT_EXTEND expressions. */
7239 #ifndef SYSV386_COMPAT
7240 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7241 wants to fix the assemblers because that causes incompatibility
7242 with gcc. No-one wants to fix gcc because that causes
7243 incompatibility with assemblers... You can use the option of
7244 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7245 #define SYSV386_COMPAT 1
7246 #endif
7248 const char *
7249 output_387_binary_op (rtx insn, rtx *operands)
7251 static char buf[30];
7252 const char *p;
7253 const char *ssep;
7254 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7256 #ifdef ENABLE_CHECKING
7257 /* Even if we do not want to check the inputs, this documents input
7258 constraints. Which helps in understanding the following code. */
7259 if (STACK_REG_P (operands[0])
7260 && ((REG_P (operands[1])
7261 && REGNO (operands[0]) == REGNO (operands[1])
7262 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7263 || (REG_P (operands[2])
7264 && REGNO (operands[0]) == REGNO (operands[2])
7265 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7266 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7267 ; /* ok */
7268 else
7269 gcc_assert (is_sse);
7270 #endif
7272 switch (GET_CODE (operands[3]))
7274 case PLUS:
7275 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7276 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7277 p = "fiadd";
7278 else
7279 p = "fadd";
7280 ssep = "add";
7281 break;
7283 case MINUS:
7284 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7285 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7286 p = "fisub";
7287 else
7288 p = "fsub";
7289 ssep = "sub";
7290 break;
7292 case MULT:
7293 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7294 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7295 p = "fimul";
7296 else
7297 p = "fmul";
7298 ssep = "mul";
7299 break;
7301 case DIV:
7302 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7303 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7304 p = "fidiv";
7305 else
7306 p = "fdiv";
7307 ssep = "div";
7308 break;
7310 default:
7311 gcc_unreachable ();
7314 if (is_sse)
7316 strcpy (buf, ssep);
7317 if (GET_MODE (operands[0]) == SFmode)
7318 strcat (buf, "ss\t{%2, %0|%0, %2}");
7319 else
7320 strcat (buf, "sd\t{%2, %0|%0, %2}");
7321 return buf;
7323 strcpy (buf, p);
7325 switch (GET_CODE (operands[3]))
7327 case MULT:
7328 case PLUS:
7329 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7331 rtx temp = operands[2];
7332 operands[2] = operands[1];
7333 operands[1] = temp;
7336 /* know operands[0] == operands[1]. */
7338 if (GET_CODE (operands[2]) == MEM)
7340 p = "%z2\t%2";
7341 break;
7344 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7346 if (STACK_TOP_P (operands[0]))
7347 /* How is it that we are storing to a dead operand[2]?
7348 Well, presumably operands[1] is dead too. We can't
7349 store the result to st(0) as st(0) gets popped on this
7350 instruction. Instead store to operands[2] (which I
7351 think has to be st(1)). st(1) will be popped later.
7352 gcc <= 2.8.1 didn't have this check and generated
7353 assembly code that the Unixware assembler rejected. */
7354 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7355 else
7356 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7357 break;
7360 if (STACK_TOP_P (operands[0]))
7361 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7362 else
7363 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7364 break;
7366 case MINUS:
7367 case DIV:
7368 if (GET_CODE (operands[1]) == MEM)
7370 p = "r%z1\t%1";
7371 break;
7374 if (GET_CODE (operands[2]) == MEM)
7376 p = "%z2\t%2";
7377 break;
7380 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7382 #if SYSV386_COMPAT
7383 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7384 derived assemblers, confusingly reverse the direction of
7385 the operation for fsub{r} and fdiv{r} when the
7386 destination register is not st(0). The Intel assembler
7387 doesn't have this brain damage. Read !SYSV386_COMPAT to
7388 figure out what the hardware really does. */
7389 if (STACK_TOP_P (operands[0]))
7390 p = "{p\t%0, %2|rp\t%2, %0}";
7391 else
7392 p = "{rp\t%2, %0|p\t%0, %2}";
7393 #else
7394 if (STACK_TOP_P (operands[0]))
7395 /* As above for fmul/fadd, we can't store to st(0). */
7396 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7397 else
7398 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7399 #endif
7400 break;
7403 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7405 #if SYSV386_COMPAT
7406 if (STACK_TOP_P (operands[0]))
7407 p = "{rp\t%0, %1|p\t%1, %0}";
7408 else
7409 p = "{p\t%1, %0|rp\t%0, %1}";
7410 #else
7411 if (STACK_TOP_P (operands[0]))
7412 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7413 else
7414 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7415 #endif
7416 break;
7419 if (STACK_TOP_P (operands[0]))
7421 if (STACK_TOP_P (operands[1]))
7422 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7423 else
7424 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7425 break;
7427 else if (STACK_TOP_P (operands[1]))
7429 #if SYSV386_COMPAT
7430 p = "{\t%1, %0|r\t%0, %1}";
7431 #else
7432 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7433 #endif
7435 else
7437 #if SYSV386_COMPAT
7438 p = "{r\t%2, %0|\t%0, %2}";
7439 #else
7440 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7441 #endif
7443 break;
7445 default:
7446 gcc_unreachable ();
7449 strcat (buf, p);
7450 return buf;
7453 /* Return needed mode for entity in optimize_mode_switching pass. */
7456 ix86_mode_needed (int entity, rtx insn)
7458 enum attr_i387_cw mode;
7460 /* The mode UNINITIALIZED is used to store control word after a
7461 function call or ASM pattern. The mode ANY specify that function
7462 has no requirements on the control word and make no changes in the
7463 bits we are interested in. */
7465 if (CALL_P (insn)
7466 || (NONJUMP_INSN_P (insn)
7467 && (asm_noperands (PATTERN (insn)) >= 0
7468 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
7469 return I387_CW_UNINITIALIZED;
7471 if (recog_memoized (insn) < 0)
7472 return I387_CW_ANY;
7474 mode = get_attr_i387_cw (insn);
7476 switch (entity)
7478 case I387_TRUNC:
7479 if (mode == I387_CW_TRUNC)
7480 return mode;
7481 break;
7483 case I387_FLOOR:
7484 if (mode == I387_CW_FLOOR)
7485 return mode;
7486 break;
7488 case I387_CEIL:
7489 if (mode == I387_CW_CEIL)
7490 return mode;
7491 break;
7493 case I387_MASK_PM:
7494 if (mode == I387_CW_MASK_PM)
7495 return mode;
7496 break;
7498 default:
7499 gcc_unreachable ();
7502 return I387_CW_ANY;
7505 /* Output code to initialize control word copies used by trunc?f?i and
7506 rounding patterns. CURRENT_MODE is set to current control word,
7507 while NEW_MODE is set to new control word. */
7509 void
7510 emit_i387_cw_initialization (int mode)
7512 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
7513 rtx new_mode;
7515 int slot;
7517 rtx reg = gen_reg_rtx (HImode);
7519 emit_insn (gen_x86_fnstcw_1 (stored_mode));
7520 emit_move_insn (reg, stored_mode);
7522 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
7524 switch (mode)
7526 case I387_CW_TRUNC:
7527 /* round toward zero (truncate) */
7528 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7529 slot = SLOT_CW_TRUNC;
7530 break;
7532 case I387_CW_FLOOR:
7533 /* round down toward -oo */
7534 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7535 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7536 slot = SLOT_CW_FLOOR;
7537 break;
7539 case I387_CW_CEIL:
7540 /* round up toward +oo */
7541 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7542 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7543 slot = SLOT_CW_CEIL;
7544 break;
7546 case I387_CW_MASK_PM:
7547 /* mask precision exception for nearbyint() */
7548 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7549 slot = SLOT_CW_MASK_PM;
7550 break;
7552 default:
7553 gcc_unreachable ();
7556 else
7558 switch (mode)
7560 case I387_CW_TRUNC:
7561 /* round toward zero (truncate) */
7562 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7563 slot = SLOT_CW_TRUNC;
7564 break;
7566 case I387_CW_FLOOR:
7567 /* round down toward -oo */
7568 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7569 slot = SLOT_CW_FLOOR;
7570 break;
7572 case I387_CW_CEIL:
7573 /* round up toward +oo */
7574 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7575 slot = SLOT_CW_CEIL;
7576 break;
7578 case I387_CW_MASK_PM:
7579 /* mask precision exception for nearbyint() */
7580 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7581 slot = SLOT_CW_MASK_PM;
7582 break;
7584 default:
7585 gcc_unreachable ();
7589 gcc_assert (slot < MAX_386_STACK_LOCALS);
7591 new_mode = assign_386_stack_local (HImode, slot);
7592 emit_move_insn (new_mode, reg);
7595 /* Output code for INSN to convert a float to a signed int. OPERANDS
7596 are the insn operands. The output may be [HSD]Imode and the input
7597 operand may be [SDX]Fmode. */
7599 const char *
7600 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7602 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7603 int dimode_p = GET_MODE (operands[0]) == DImode;
7604 int round_mode = get_attr_i387_cw (insn);
7606 /* Jump through a hoop or two for DImode, since the hardware has no
7607 non-popping instruction. We used to do this a different way, but
7608 that was somewhat fragile and broke with post-reload splitters. */
7609 if ((dimode_p || fisttp) && !stack_top_dies)
7610 output_asm_insn ("fld\t%y1", operands);
7612 gcc_assert (STACK_TOP_P (operands[1]));
7613 gcc_assert (GET_CODE (operands[0]) == MEM);
7615 if (fisttp)
7616 output_asm_insn ("fisttp%z0\t%0", operands);
7617 else
7619 if (round_mode != I387_CW_ANY)
7620 output_asm_insn ("fldcw\t%3", operands);
7621 if (stack_top_dies || dimode_p)
7622 output_asm_insn ("fistp%z0\t%0", operands);
7623 else
7624 output_asm_insn ("fist%z0\t%0", operands);
7625 if (round_mode != I387_CW_ANY)
7626 output_asm_insn ("fldcw\t%2", operands);
7629 return "";
7632 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7633 should be used. UNORDERED_P is true when fucom should be used. */
7635 const char *
7636 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7638 int stack_top_dies;
7639 rtx cmp_op0, cmp_op1;
7640 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7642 if (eflags_p)
7644 cmp_op0 = operands[0];
7645 cmp_op1 = operands[1];
7647 else
7649 cmp_op0 = operands[1];
7650 cmp_op1 = operands[2];
7653 if (is_sse)
7655 if (GET_MODE (operands[0]) == SFmode)
7656 if (unordered_p)
7657 return "ucomiss\t{%1, %0|%0, %1}";
7658 else
7659 return "comiss\t{%1, %0|%0, %1}";
7660 else
7661 if (unordered_p)
7662 return "ucomisd\t{%1, %0|%0, %1}";
7663 else
7664 return "comisd\t{%1, %0|%0, %1}";
7667 gcc_assert (STACK_TOP_P (cmp_op0));
7669 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7671 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7673 if (stack_top_dies)
7675 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7676 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7678 else
7679 return "ftst\n\tfnstsw\t%0";
7682 if (STACK_REG_P (cmp_op1)
7683 && stack_top_dies
7684 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7685 && REGNO (cmp_op1) != FIRST_STACK_REG)
7687 /* If both the top of the 387 stack dies, and the other operand
7688 is also a stack register that dies, then this must be a
7689 `fcompp' float compare */
7691 if (eflags_p)
7693 /* There is no double popping fcomi variant. Fortunately,
7694 eflags is immune from the fstp's cc clobbering. */
7695 if (unordered_p)
7696 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7697 else
7698 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7699 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7701 else
7703 if (unordered_p)
7704 return "fucompp\n\tfnstsw\t%0";
7705 else
7706 return "fcompp\n\tfnstsw\t%0";
7709 else
7711 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7713 static const char * const alt[16] =
7715 "fcom%z2\t%y2\n\tfnstsw\t%0",
7716 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7717 "fucom%z2\t%y2\n\tfnstsw\t%0",
7718 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7720 "ficom%z2\t%y2\n\tfnstsw\t%0",
7721 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7722 NULL,
7723 NULL,
7725 "fcomi\t{%y1, %0|%0, %y1}",
7726 "fcomip\t{%y1, %0|%0, %y1}",
7727 "fucomi\t{%y1, %0|%0, %y1}",
7728 "fucomip\t{%y1, %0|%0, %y1}",
7730 NULL,
7731 NULL,
7732 NULL,
7733 NULL
7736 int mask;
7737 const char *ret;
7739 mask = eflags_p << 3;
7740 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7741 mask |= unordered_p << 1;
7742 mask |= stack_top_dies;
7744 gcc_assert (mask < 16);
7745 ret = alt[mask];
7746 gcc_assert (ret);
7748 return ret;
7752 void
7753 ix86_output_addr_vec_elt (FILE *file, int value)
7755 const char *directive = ASM_LONG;
7757 #ifdef ASM_QUAD
7758 if (TARGET_64BIT)
7759 directive = ASM_QUAD;
7760 #else
7761 gcc_assert (!TARGET_64BIT);
7762 #endif
7764 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7767 void
7768 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7770 if (TARGET_64BIT)
7771 fprintf (file, "%s%s%d-%s%d\n",
7772 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7773 else if (HAVE_AS_GOTOFF_IN_DATA)
7774 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7775 #if TARGET_MACHO
7776 else if (TARGET_MACHO)
7778 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7779 machopic_output_function_base_name (file);
7780 fprintf(file, "\n");
7782 #endif
7783 else
7784 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7785 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7788 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7789 for the target. */
7791 void
7792 ix86_expand_clear (rtx dest)
7794 rtx tmp;
7796 /* We play register width games, which are only valid after reload. */
7797 gcc_assert (reload_completed);
7799 /* Avoid HImode and its attendant prefix byte. */
7800 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7801 dest = gen_rtx_REG (SImode, REGNO (dest));
7803 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7805 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7806 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7808 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7809 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7812 emit_insn (tmp);
7815 /* X is an unchanging MEM. If it is a constant pool reference, return
7816 the constant pool rtx, else NULL. */
7819 maybe_get_pool_constant (rtx x)
7821 x = ix86_delegitimize_address (XEXP (x, 0));
7823 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7824 return get_pool_constant (x);
7826 return NULL_RTX;
7829 void
7830 ix86_expand_move (enum machine_mode mode, rtx operands[])
7832 int strict = (reload_in_progress || reload_completed);
7833 rtx op0, op1;
7834 enum tls_model model;
7836 op0 = operands[0];
7837 op1 = operands[1];
7839 if (GET_CODE (op1) == SYMBOL_REF)
7841 model = SYMBOL_REF_TLS_MODEL (op1);
7842 if (model)
7844 op1 = legitimize_tls_address (op1, model, true);
7845 op1 = force_operand (op1, op0);
7846 if (op1 == op0)
7847 return;
7850 else if (GET_CODE (op1) == CONST
7851 && GET_CODE (XEXP (op1, 0)) == PLUS
7852 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7854 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7855 if (model)
7857 rtx addend = XEXP (XEXP (op1, 0), 1);
7858 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7859 op1 = force_operand (op1, NULL);
7860 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7861 op0, 1, OPTAB_DIRECT);
7862 if (op1 == op0)
7863 return;
7867 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7869 #if TARGET_MACHO
7870 if (MACHOPIC_PURE)
7872 rtx temp = ((reload_in_progress
7873 || ((op0 && GET_CODE (op0) == REG)
7874 && mode == Pmode))
7875 ? op0 : gen_reg_rtx (Pmode));
7876 op1 = machopic_indirect_data_reference (op1, temp);
7877 op1 = machopic_legitimize_pic_address (op1, mode,
7878 temp == op1 ? 0 : temp);
7880 else if (MACHOPIC_INDIRECT)
7881 op1 = machopic_indirect_data_reference (op1, 0);
7882 if (op0 == op1)
7883 return;
7884 #else
7885 if (GET_CODE (op0) == MEM)
7886 op1 = force_reg (Pmode, op1);
7887 else
7888 op1 = legitimize_address (op1, op1, Pmode);
7889 #endif /* TARGET_MACHO */
7891 else
7893 if (GET_CODE (op0) == MEM
7894 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7895 || !push_operand (op0, mode))
7896 && GET_CODE (op1) == MEM)
7897 op1 = force_reg (mode, op1);
7899 if (push_operand (op0, mode)
7900 && ! general_no_elim_operand (op1, mode))
7901 op1 = copy_to_mode_reg (mode, op1);
7903 /* Force large constants in 64bit compilation into register
7904 to get them CSEed. */
7905 if (TARGET_64BIT && mode == DImode
7906 && immediate_operand (op1, mode)
7907 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7908 && !register_operand (op0, mode)
7909 && optimize && !reload_completed && !reload_in_progress)
7910 op1 = copy_to_mode_reg (mode, op1);
7912 if (FLOAT_MODE_P (mode))
7914 /* If we are loading a floating point constant to a register,
7915 force the value to memory now, since we'll get better code
7916 out the back end. */
7918 if (strict)
7920 else if (GET_CODE (op1) == CONST_DOUBLE)
7922 op1 = validize_mem (force_const_mem (mode, op1));
7923 if (!register_operand (op0, mode))
7925 rtx temp = gen_reg_rtx (mode);
7926 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7927 emit_move_insn (op0, temp);
7928 return;
7934 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7937 void
7938 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7940 rtx op0 = operands[0], op1 = operands[1];
7942 /* Force constants other than zero into memory. We do not know how
7943 the instructions used to build constants modify the upper 64 bits
7944 of the register, once we have that information we may be able
7945 to handle some of them more efficiently. */
7946 if ((reload_in_progress | reload_completed) == 0
7947 && register_operand (op0, mode)
7948 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7949 op1 = validize_mem (force_const_mem (mode, op1));
7951 /* Make operand1 a register if it isn't already. */
7952 if (!no_new_pseudos
7953 && !register_operand (op0, mode)
7954 && !register_operand (op1, mode))
7956 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7957 return;
7960 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7963 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7964 straight to ix86_expand_vector_move. */
7966 void
7967 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7969 rtx op0, op1, m;
7971 op0 = operands[0];
7972 op1 = operands[1];
7974 if (MEM_P (op1))
7976 /* If we're optimizing for size, movups is the smallest. */
7977 if (optimize_size)
7979 op0 = gen_lowpart (V4SFmode, op0);
7980 op1 = gen_lowpart (V4SFmode, op1);
7981 emit_insn (gen_sse_movups (op0, op1));
7982 return;
7985 /* ??? If we have typed data, then it would appear that using
7986 movdqu is the only way to get unaligned data loaded with
7987 integer type. */
7988 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7990 op0 = gen_lowpart (V16QImode, op0);
7991 op1 = gen_lowpart (V16QImode, op1);
7992 emit_insn (gen_sse2_movdqu (op0, op1));
7993 return;
7996 if (TARGET_SSE2 && mode == V2DFmode)
7998 rtx zero;
8000 /* When SSE registers are split into halves, we can avoid
8001 writing to the top half twice. */
8002 if (TARGET_SSE_SPLIT_REGS)
8004 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8005 zero = op0;
8007 else
8009 /* ??? Not sure about the best option for the Intel chips.
8010 The following would seem to satisfy; the register is
8011 entirely cleared, breaking the dependency chain. We
8012 then store to the upper half, with a dependency depth
8013 of one. A rumor has it that Intel recommends two movsd
8014 followed by an unpacklpd, but this is unconfirmed. And
8015 given that the dependency depth of the unpacklpd would
8016 still be one, I'm not sure why this would be better. */
8017 zero = CONST0_RTX (V2DFmode);
8020 m = adjust_address (op1, DFmode, 0);
8021 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8022 m = adjust_address (op1, DFmode, 8);
8023 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8025 else
8027 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8028 emit_move_insn (op0, CONST0_RTX (mode));
8029 else
8030 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8032 if (mode != V4SFmode)
8033 op0 = gen_lowpart (V4SFmode, op0);
8034 m = adjust_address (op1, V2SFmode, 0);
8035 emit_insn (gen_sse_loadlps (op0, op0, m));
8036 m = adjust_address (op1, V2SFmode, 8);
8037 emit_insn (gen_sse_loadhps (op0, op0, m));
8040 else if (MEM_P (op0))
8042 /* If we're optimizing for size, movups is the smallest. */
8043 if (optimize_size)
8045 op0 = gen_lowpart (V4SFmode, op0);
8046 op1 = gen_lowpart (V4SFmode, op1);
8047 emit_insn (gen_sse_movups (op0, op1));
8048 return;
8051 /* ??? Similar to above, only less clear because of quote
8052 typeless stores unquote. */
8053 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8054 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8056 op0 = gen_lowpart (V16QImode, op0);
8057 op1 = gen_lowpart (V16QImode, op1);
8058 emit_insn (gen_sse2_movdqu (op0, op1));
8059 return;
8062 if (TARGET_SSE2 && mode == V2DFmode)
8064 m = adjust_address (op0, DFmode, 0);
8065 emit_insn (gen_sse2_storelpd (m, op1));
8066 m = adjust_address (op0, DFmode, 8);
8067 emit_insn (gen_sse2_storehpd (m, op1));
8069 else
8071 if (mode != V4SFmode)
8072 op1 = gen_lowpart (V4SFmode, op1);
8073 m = adjust_address (op0, V2SFmode, 0);
8074 emit_insn (gen_sse_storelps (m, op1));
8075 m = adjust_address (op0, V2SFmode, 8);
8076 emit_insn (gen_sse_storehps (m, op1));
8079 else
8080 gcc_unreachable ();
8083 /* Expand a push in MODE. This is some mode for which we do not support
8084 proper push instructions, at least from the registers that we expect
8085 the value to live in. */
8087 void
8088 ix86_expand_push (enum machine_mode mode, rtx x)
8090 rtx tmp;
8092 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8093 GEN_INT (-GET_MODE_SIZE (mode)),
8094 stack_pointer_rtx, 1, OPTAB_DIRECT);
8095 if (tmp != stack_pointer_rtx)
8096 emit_move_insn (stack_pointer_rtx, tmp);
8098 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8099 emit_move_insn (tmp, x);
8102 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8103 destination to use for the operation. If different from the true
8104 destination in operands[0], a copy operation will be required. */
8107 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8108 rtx operands[])
8110 int matching_memory;
8111 rtx src1, src2, dst;
8113 dst = operands[0];
8114 src1 = operands[1];
8115 src2 = operands[2];
8117 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8118 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8119 && (rtx_equal_p (dst, src2)
8120 || immediate_operand (src1, mode)))
8122 rtx temp = src1;
8123 src1 = src2;
8124 src2 = temp;
8127 /* If the destination is memory, and we do not have matching source
8128 operands, do things in registers. */
8129 matching_memory = 0;
8130 if (GET_CODE (dst) == MEM)
8132 if (rtx_equal_p (dst, src1))
8133 matching_memory = 1;
8134 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8135 && rtx_equal_p (dst, src2))
8136 matching_memory = 2;
8137 else
8138 dst = gen_reg_rtx (mode);
8141 /* Both source operands cannot be in memory. */
8142 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8144 if (matching_memory != 2)
8145 src2 = force_reg (mode, src2);
8146 else
8147 src1 = force_reg (mode, src1);
8150 /* If the operation is not commutable, source 1 cannot be a constant
8151 or non-matching memory. */
8152 if ((CONSTANT_P (src1)
8153 || (!matching_memory && GET_CODE (src1) == MEM))
8154 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8155 src1 = force_reg (mode, src1);
8157 /* If optimizing, copy to regs to improve CSE */
8158 if (optimize && ! no_new_pseudos)
8160 if (GET_CODE (dst) == MEM)
8161 dst = gen_reg_rtx (mode);
8162 if (GET_CODE (src1) == MEM)
8163 src1 = force_reg (mode, src1);
8164 if (GET_CODE (src2) == MEM)
8165 src2 = force_reg (mode, src2);
8168 src1 = operands[1] = src1;
8169 src2 = operands[2] = src2;
8170 return dst;
8173 /* Similarly, but assume that the destination has already been
8174 set up properly. */
8176 void
8177 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8178 enum machine_mode mode, rtx operands[])
8180 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8181 gcc_assert (dst == operands[0]);
8184 /* Attempt to expand a binary operator. Make the expansion closer to the
8185 actual machine, then just general_operand, which will allow 3 separate
8186 memory references (one output, two input) in a single insn. */
8188 void
8189 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8190 rtx operands[])
8192 rtx src1, src2, dst, op, clob;
8194 dst = ix86_fixup_binary_operands (code, mode, operands);
8195 src1 = operands[1];
8196 src2 = operands[2];
8198 /* Emit the instruction. */
8200 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8201 if (reload_in_progress)
8203 /* Reload doesn't know about the flags register, and doesn't know that
8204 it doesn't want to clobber it. We can only do this with PLUS. */
8205 gcc_assert (code == PLUS);
8206 emit_insn (op);
8208 else
8210 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8211 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8214 /* Fix up the destination if needed. */
8215 if (dst != operands[0])
8216 emit_move_insn (operands[0], dst);
8219 /* Return TRUE or FALSE depending on whether the binary operator meets the
8220 appropriate constraints. */
8223 ix86_binary_operator_ok (enum rtx_code code,
8224 enum machine_mode mode ATTRIBUTE_UNUSED,
8225 rtx operands[3])
8227 /* Both source operands cannot be in memory. */
8228 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8229 return 0;
8230 /* If the operation is not commutable, source 1 cannot be a constant. */
8231 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8232 return 0;
8233 /* If the destination is memory, we must have a matching source operand. */
8234 if (GET_CODE (operands[0]) == MEM
8235 && ! (rtx_equal_p (operands[0], operands[1])
8236 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8237 && rtx_equal_p (operands[0], operands[2]))))
8238 return 0;
8239 /* If the operation is not commutable and the source 1 is memory, we must
8240 have a matching destination. */
8241 if (GET_CODE (operands[1]) == MEM
8242 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8243 && ! rtx_equal_p (operands[0], operands[1]))
8244 return 0;
8245 return 1;
8248 /* Attempt to expand a unary operator. Make the expansion closer to the
8249 actual machine, then just general_operand, which will allow 2 separate
8250 memory references (one output, one input) in a single insn. */
8252 void
8253 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8254 rtx operands[])
8256 int matching_memory;
8257 rtx src, dst, op, clob;
8259 dst = operands[0];
8260 src = operands[1];
8262 /* If the destination is memory, and we do not have matching source
8263 operands, do things in registers. */
8264 matching_memory = 0;
8265 if (MEM_P (dst))
8267 if (rtx_equal_p (dst, src))
8268 matching_memory = 1;
8269 else
8270 dst = gen_reg_rtx (mode);
8273 /* When source operand is memory, destination must match. */
8274 if (MEM_P (src) && !matching_memory)
8275 src = force_reg (mode, src);
8277 /* If optimizing, copy to regs to improve CSE. */
8278 if (optimize && ! no_new_pseudos)
8280 if (GET_CODE (dst) == MEM)
8281 dst = gen_reg_rtx (mode);
8282 if (GET_CODE (src) == MEM)
8283 src = force_reg (mode, src);
8286 /* Emit the instruction. */
8288 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8289 if (reload_in_progress || code == NOT)
8291 /* Reload doesn't know about the flags register, and doesn't know that
8292 it doesn't want to clobber it. */
8293 gcc_assert (code == NOT);
8294 emit_insn (op);
8296 else
8298 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8299 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8302 /* Fix up the destination if needed. */
8303 if (dst != operands[0])
8304 emit_move_insn (operands[0], dst);
8307 /* Return TRUE or FALSE depending on whether the unary operator meets the
8308 appropriate constraints. */
8311 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8312 enum machine_mode mode ATTRIBUTE_UNUSED,
8313 rtx operands[2] ATTRIBUTE_UNUSED)
8315 /* If one of operands is memory, source and destination must match. */
8316 if ((GET_CODE (operands[0]) == MEM
8317 || GET_CODE (operands[1]) == MEM)
8318 && ! rtx_equal_p (operands[0], operands[1]))
8319 return FALSE;
8320 return TRUE;
8323 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8324 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8325 true, then replicate the mask for all elements of the vector register.
8326 If INVERT is true, then create a mask excluding the sign bit. */
8329 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8331 enum machine_mode vec_mode;
8332 HOST_WIDE_INT hi, lo;
8333 int shift = 63;
8334 rtvec v;
8335 rtx mask;
8337 /* Find the sign bit, sign extended to 2*HWI. */
8338 if (mode == SFmode)
8339 lo = 0x80000000, hi = lo < 0;
8340 else if (HOST_BITS_PER_WIDE_INT >= 64)
8341 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8342 else
8343 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8345 if (invert)
8346 lo = ~lo, hi = ~hi;
8348 /* Force this value into the low part of a fp vector constant. */
8349 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8350 mask = gen_lowpart (mode, mask);
8352 if (mode == SFmode)
8354 if (vect)
8355 v = gen_rtvec (4, mask, mask, mask, mask);
8356 else
8357 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8358 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8359 vec_mode = V4SFmode;
8361 else
8363 if (vect)
8364 v = gen_rtvec (2, mask, mask);
8365 else
8366 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8367 vec_mode = V2DFmode;
8370 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8373 /* Generate code for floating point ABS or NEG. */
8375 void
8376 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8377 rtx operands[])
8379 rtx mask, set, use, clob, dst, src;
8380 bool matching_memory;
8381 bool use_sse = false;
8382 bool vector_mode = VECTOR_MODE_P (mode);
8383 enum machine_mode elt_mode = mode;
8385 if (vector_mode)
8387 elt_mode = GET_MODE_INNER (mode);
8388 use_sse = true;
8390 else if (TARGET_SSE_MATH)
8391 use_sse = SSE_FLOAT_MODE_P (mode);
8393 /* NEG and ABS performed with SSE use bitwise mask operations.
8394 Create the appropriate mask now. */
8395 if (use_sse)
8396 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8397 else
8399 /* When not using SSE, we don't use the mask, but prefer to keep the
8400 same general form of the insn pattern to reduce duplication when
8401 it comes time to split. */
8402 mask = const0_rtx;
8405 dst = operands[0];
8406 src = operands[1];
8408 /* If the destination is memory, and we don't have matching source
8409 operands, do things in registers. */
8410 matching_memory = false;
8411 if (MEM_P (dst))
8413 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8414 matching_memory = true;
8415 else
8416 dst = gen_reg_rtx (mode);
8418 if (MEM_P (src) && !matching_memory)
8419 src = force_reg (mode, src);
8421 if (vector_mode)
8423 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8424 set = gen_rtx_SET (VOIDmode, dst, set);
8425 emit_insn (set);
8427 else
8429 set = gen_rtx_fmt_e (code, mode, src);
8430 set = gen_rtx_SET (VOIDmode, dst, set);
8431 use = gen_rtx_USE (VOIDmode, mask);
8432 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8433 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8436 if (dst != operands[0])
8437 emit_move_insn (operands[0], dst);
8440 /* Expand a copysign operation. Special case operand 0 being a constant. */
8442 void
8443 ix86_expand_copysign (rtx operands[])
8445 enum machine_mode mode, vmode;
8446 rtx dest, op0, op1, mask, nmask;
8448 dest = operands[0];
8449 op0 = operands[1];
8450 op1 = operands[2];
8452 mode = GET_MODE (dest);
8453 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8455 if (GET_CODE (op0) == CONST_DOUBLE)
8457 rtvec v;
8459 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8460 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8462 if (op0 == CONST0_RTX (mode))
8463 op0 = CONST0_RTX (vmode);
8464 else
8466 if (mode == SFmode)
8467 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8468 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8469 else
8470 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8471 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8474 mask = ix86_build_signbit_mask (mode, 0, 0);
8476 if (mode == SFmode)
8477 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8478 else
8479 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8481 else
8483 nmask = ix86_build_signbit_mask (mode, 0, 1);
8484 mask = ix86_build_signbit_mask (mode, 0, 0);
8486 if (mode == SFmode)
8487 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8488 else
8489 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8493 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8494 be a constant, and so has already been expanded into a vector constant. */
8496 void
8497 ix86_split_copysign_const (rtx operands[])
8499 enum machine_mode mode, vmode;
8500 rtx dest, op0, op1, mask, x;
8502 dest = operands[0];
8503 op0 = operands[1];
8504 op1 = operands[2];
8505 mask = operands[3];
8507 mode = GET_MODE (dest);
8508 vmode = GET_MODE (mask);
8510 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8511 x = gen_rtx_AND (vmode, dest, mask);
8512 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8514 if (op0 != CONST0_RTX (vmode))
8516 x = gen_rtx_IOR (vmode, dest, op0);
8517 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8521 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8522 so we have to do two masks. */
8524 void
8525 ix86_split_copysign_var (rtx operands[])
8527 enum machine_mode mode, vmode;
8528 rtx dest, scratch, op0, op1, mask, nmask, x;
8530 dest = operands[0];
8531 scratch = operands[1];
8532 op0 = operands[2];
8533 op1 = operands[3];
8534 nmask = operands[4];
8535 mask = operands[5];
8537 mode = GET_MODE (dest);
8538 vmode = GET_MODE (mask);
8540 if (rtx_equal_p (op0, op1))
8542 /* Shouldn't happen often (it's useless, obviously), but when it does
8543 we'd generate incorrect code if we continue below. */
8544 emit_move_insn (dest, op0);
8545 return;
8548 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8550 gcc_assert (REGNO (op1) == REGNO (scratch));
8552 x = gen_rtx_AND (vmode, scratch, mask);
8553 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8555 dest = mask;
8556 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8557 x = gen_rtx_NOT (vmode, dest);
8558 x = gen_rtx_AND (vmode, x, op0);
8559 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8561 else
8563 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8565 x = gen_rtx_AND (vmode, scratch, mask);
8567 else /* alternative 2,4 */
8569 gcc_assert (REGNO (mask) == REGNO (scratch));
8570 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8571 x = gen_rtx_AND (vmode, scratch, op1);
8573 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8575 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8577 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8578 x = gen_rtx_AND (vmode, dest, nmask);
8580 else /* alternative 3,4 */
8582 gcc_assert (REGNO (nmask) == REGNO (dest));
8583 dest = nmask;
8584 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8585 x = gen_rtx_AND (vmode, dest, op0);
8587 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8590 x = gen_rtx_IOR (vmode, dest, scratch);
8591 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8594 /* Return TRUE or FALSE depending on whether the first SET in INSN
8595 has source and destination with matching CC modes, and that the
8596 CC mode is at least as constrained as REQ_MODE. */
8599 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8601 rtx set;
8602 enum machine_mode set_mode;
8604 set = PATTERN (insn);
8605 if (GET_CODE (set) == PARALLEL)
8606 set = XVECEXP (set, 0, 0);
8607 gcc_assert (GET_CODE (set) == SET);
8608 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
8610 set_mode = GET_MODE (SET_DEST (set));
8611 switch (set_mode)
8613 case CCNOmode:
8614 if (req_mode != CCNOmode
8615 && (req_mode != CCmode
8616 || XEXP (SET_SRC (set), 1) != const0_rtx))
8617 return 0;
8618 break;
8619 case CCmode:
8620 if (req_mode == CCGCmode)
8621 return 0;
8622 /* FALLTHRU */
8623 case CCGCmode:
8624 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8625 return 0;
8626 /* FALLTHRU */
8627 case CCGOCmode:
8628 if (req_mode == CCZmode)
8629 return 0;
8630 /* FALLTHRU */
8631 case CCZmode:
8632 break;
8634 default:
8635 gcc_unreachable ();
8638 return (GET_MODE (SET_SRC (set)) == set_mode);
8641 /* Generate insn patterns to do an integer compare of OPERANDS. */
8643 static rtx
8644 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8646 enum machine_mode cmpmode;
8647 rtx tmp, flags;
8649 cmpmode = SELECT_CC_MODE (code, op0, op1);
8650 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8652 /* This is very simple, but making the interface the same as in the
8653 FP case makes the rest of the code easier. */
8654 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8655 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8657 /* Return the test that should be put into the flags user, i.e.
8658 the bcc, scc, or cmov instruction. */
8659 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8662 /* Figure out whether to use ordered or unordered fp comparisons.
8663 Return the appropriate mode to use. */
8665 enum machine_mode
8666 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8668 /* ??? In order to make all comparisons reversible, we do all comparisons
8669 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8670 all forms trapping and nontrapping comparisons, we can make inequality
8671 comparisons trapping again, since it results in better code when using
8672 FCOM based compares. */
8673 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8676 enum machine_mode
8677 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8679 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8680 return ix86_fp_compare_mode (code);
8681 switch (code)
8683 /* Only zero flag is needed. */
8684 case EQ: /* ZF=0 */
8685 case NE: /* ZF!=0 */
8686 return CCZmode;
8687 /* Codes needing carry flag. */
8688 case GEU: /* CF=0 */
8689 case GTU: /* CF=0 & ZF=0 */
8690 case LTU: /* CF=1 */
8691 case LEU: /* CF=1 | ZF=1 */
8692 return CCmode;
8693 /* Codes possibly doable only with sign flag when
8694 comparing against zero. */
8695 case GE: /* SF=OF or SF=0 */
8696 case LT: /* SF<>OF or SF=1 */
8697 if (op1 == const0_rtx)
8698 return CCGOCmode;
8699 else
8700 /* For other cases Carry flag is not required. */
8701 return CCGCmode;
8702 /* Codes doable only with sign flag when comparing
8703 against zero, but we miss jump instruction for it
8704 so we need to use relational tests against overflow
8705 that thus needs to be zero. */
8706 case GT: /* ZF=0 & SF=OF */
8707 case LE: /* ZF=1 | SF<>OF */
8708 if (op1 == const0_rtx)
8709 return CCNOmode;
8710 else
8711 return CCGCmode;
8712 /* strcmp pattern do (use flags) and combine may ask us for proper
8713 mode. */
8714 case USE:
8715 return CCmode;
8716 default:
8717 gcc_unreachable ();
8721 /* Return the fixed registers used for condition codes. */
8723 static bool
8724 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8726 *p1 = FLAGS_REG;
8727 *p2 = FPSR_REG;
8728 return true;
8731 /* If two condition code modes are compatible, return a condition code
8732 mode which is compatible with both. Otherwise, return
8733 VOIDmode. */
8735 static enum machine_mode
8736 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8738 if (m1 == m2)
8739 return m1;
8741 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8742 return VOIDmode;
8744 if ((m1 == CCGCmode && m2 == CCGOCmode)
8745 || (m1 == CCGOCmode && m2 == CCGCmode))
8746 return CCGCmode;
8748 switch (m1)
8750 default:
8751 gcc_unreachable ();
8753 case CCmode:
8754 case CCGCmode:
8755 case CCGOCmode:
8756 case CCNOmode:
8757 case CCZmode:
8758 switch (m2)
8760 default:
8761 return VOIDmode;
8763 case CCmode:
8764 case CCGCmode:
8765 case CCGOCmode:
8766 case CCNOmode:
8767 case CCZmode:
8768 return CCmode;
8771 case CCFPmode:
8772 case CCFPUmode:
8773 /* These are only compatible with themselves, which we already
8774 checked above. */
8775 return VOIDmode;
8779 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8782 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8784 enum rtx_code swapped_code = swap_condition (code);
8785 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8786 || (ix86_fp_comparison_cost (swapped_code)
8787 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8790 /* Swap, force into registers, or otherwise massage the two operands
8791 to a fp comparison. The operands are updated in place; the new
8792 comparison code is returned. */
8794 static enum rtx_code
8795 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8797 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8798 rtx op0 = *pop0, op1 = *pop1;
8799 enum machine_mode op_mode = GET_MODE (op0);
8800 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8802 /* All of the unordered compare instructions only work on registers.
8803 The same is true of the fcomi compare instructions. The same is
8804 true of the XFmode compare instructions if not comparing with
8805 zero (ftst insn is used in this case). */
8807 if (!is_sse
8808 && (fpcmp_mode == CCFPUmode
8809 || (op_mode == XFmode
8810 && ! (standard_80387_constant_p (op0) == 1
8811 || standard_80387_constant_p (op1) == 1))
8812 || ix86_use_fcomi_compare (code)))
8814 op0 = force_reg (op_mode, op0);
8815 op1 = force_reg (op_mode, op1);
8817 else
8819 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8820 things around if they appear profitable, otherwise force op0
8821 into a register. */
8823 if (standard_80387_constant_p (op0) == 0
8824 || (GET_CODE (op0) == MEM
8825 && ! (standard_80387_constant_p (op1) == 0
8826 || GET_CODE (op1) == MEM)))
8828 rtx tmp;
8829 tmp = op0, op0 = op1, op1 = tmp;
8830 code = swap_condition (code);
8833 if (GET_CODE (op0) != REG)
8834 op0 = force_reg (op_mode, op0);
8836 if (CONSTANT_P (op1))
8838 int tmp = standard_80387_constant_p (op1);
8839 if (tmp == 0)
8840 op1 = validize_mem (force_const_mem (op_mode, op1));
8841 else if (tmp == 1)
8843 if (TARGET_CMOVE)
8844 op1 = force_reg (op_mode, op1);
8846 else
8847 op1 = force_reg (op_mode, op1);
8851 /* Try to rearrange the comparison to make it cheaper. */
8852 if (ix86_fp_comparison_cost (code)
8853 > ix86_fp_comparison_cost (swap_condition (code))
8854 && (GET_CODE (op1) == REG || !no_new_pseudos))
8856 rtx tmp;
8857 tmp = op0, op0 = op1, op1 = tmp;
8858 code = swap_condition (code);
8859 if (GET_CODE (op0) != REG)
8860 op0 = force_reg (op_mode, op0);
8863 *pop0 = op0;
8864 *pop1 = op1;
8865 return code;
8868 /* Convert comparison codes we use to represent FP comparison to integer
8869 code that will result in proper branch. Return UNKNOWN if no such code
8870 is available. */
8872 enum rtx_code
8873 ix86_fp_compare_code_to_integer (enum rtx_code code)
8875 switch (code)
8877 case GT:
8878 return GTU;
8879 case GE:
8880 return GEU;
8881 case ORDERED:
8882 case UNORDERED:
8883 return code;
8884 break;
8885 case UNEQ:
8886 return EQ;
8887 break;
8888 case UNLT:
8889 return LTU;
8890 break;
8891 case UNLE:
8892 return LEU;
8893 break;
8894 case LTGT:
8895 return NE;
8896 break;
8897 default:
8898 return UNKNOWN;
8902 /* Split comparison code CODE into comparisons we can do using branch
8903 instructions. BYPASS_CODE is comparison code for branch that will
8904 branch around FIRST_CODE and SECOND_CODE. If some of branches
8905 is not required, set value to UNKNOWN.
8906 We never require more than two branches. */
8908 void
8909 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8910 enum rtx_code *first_code,
8911 enum rtx_code *second_code)
8913 *first_code = code;
8914 *bypass_code = UNKNOWN;
8915 *second_code = UNKNOWN;
8917 /* The fcomi comparison sets flags as follows:
8919 cmp ZF PF CF
8920 > 0 0 0
8921 < 0 0 1
8922 = 1 0 0
8923 un 1 1 1 */
8925 switch (code)
8927 case GT: /* GTU - CF=0 & ZF=0 */
8928 case GE: /* GEU - CF=0 */
8929 case ORDERED: /* PF=0 */
8930 case UNORDERED: /* PF=1 */
8931 case UNEQ: /* EQ - ZF=1 */
8932 case UNLT: /* LTU - CF=1 */
8933 case UNLE: /* LEU - CF=1 | ZF=1 */
8934 case LTGT: /* EQ - ZF=0 */
8935 break;
8936 case LT: /* LTU - CF=1 - fails on unordered */
8937 *first_code = UNLT;
8938 *bypass_code = UNORDERED;
8939 break;
8940 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8941 *first_code = UNLE;
8942 *bypass_code = UNORDERED;
8943 break;
8944 case EQ: /* EQ - ZF=1 - fails on unordered */
8945 *first_code = UNEQ;
8946 *bypass_code = UNORDERED;
8947 break;
8948 case NE: /* NE - ZF=0 - fails on unordered */
8949 *first_code = LTGT;
8950 *second_code = UNORDERED;
8951 break;
8952 case UNGE: /* GEU - CF=0 - fails on unordered */
8953 *first_code = GE;
8954 *second_code = UNORDERED;
8955 break;
8956 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8957 *first_code = GT;
8958 *second_code = UNORDERED;
8959 break;
8960 default:
8961 gcc_unreachable ();
8963 if (!TARGET_IEEE_FP)
8965 *second_code = UNKNOWN;
8966 *bypass_code = UNKNOWN;
8970 /* Return cost of comparison done fcom + arithmetics operations on AX.
8971 All following functions do use number of instructions as a cost metrics.
8972 In future this should be tweaked to compute bytes for optimize_size and
8973 take into account performance of various instructions on various CPUs. */
8974 static int
8975 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8977 if (!TARGET_IEEE_FP)
8978 return 4;
8979 /* The cost of code output by ix86_expand_fp_compare. */
8980 switch (code)
8982 case UNLE:
8983 case UNLT:
8984 case LTGT:
8985 case GT:
8986 case GE:
8987 case UNORDERED:
8988 case ORDERED:
8989 case UNEQ:
8990 return 4;
8991 break;
8992 case LT:
8993 case NE:
8994 case EQ:
8995 case UNGE:
8996 return 5;
8997 break;
8998 case LE:
8999 case UNGT:
9000 return 6;
9001 break;
9002 default:
9003 gcc_unreachable ();
9007 /* Return cost of comparison done using fcomi operation.
9008 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9009 static int
9010 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9012 enum rtx_code bypass_code, first_code, second_code;
9013 /* Return arbitrarily high cost when instruction is not supported - this
9014 prevents gcc from using it. */
9015 if (!TARGET_CMOVE)
9016 return 1024;
9017 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9018 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9021 /* Return cost of comparison done using sahf operation.
9022 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9023 static int
9024 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9026 enum rtx_code bypass_code, first_code, second_code;
9027 /* Return arbitrarily high cost when instruction is not preferred - this
9028 avoids gcc from using it. */
9029 if (!TARGET_USE_SAHF && !optimize_size)
9030 return 1024;
9031 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9032 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9035 /* Compute cost of the comparison done using any method.
9036 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9037 static int
9038 ix86_fp_comparison_cost (enum rtx_code code)
9040 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9041 int min;
9043 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9044 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9046 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9047 if (min > sahf_cost)
9048 min = sahf_cost;
9049 if (min > fcomi_cost)
9050 min = fcomi_cost;
9051 return min;
9054 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9056 static rtx
9057 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9058 rtx *second_test, rtx *bypass_test)
9060 enum machine_mode fpcmp_mode, intcmp_mode;
9061 rtx tmp, tmp2;
9062 int cost = ix86_fp_comparison_cost (code);
9063 enum rtx_code bypass_code, first_code, second_code;
9065 fpcmp_mode = ix86_fp_compare_mode (code);
9066 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9068 if (second_test)
9069 *second_test = NULL_RTX;
9070 if (bypass_test)
9071 *bypass_test = NULL_RTX;
9073 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9075 /* Do fcomi/sahf based test when profitable. */
9076 if ((bypass_code == UNKNOWN || bypass_test)
9077 && (second_code == UNKNOWN || second_test)
9078 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9080 if (TARGET_CMOVE)
9082 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9083 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9084 tmp);
9085 emit_insn (tmp);
9087 else
9089 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9090 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9091 if (!scratch)
9092 scratch = gen_reg_rtx (HImode);
9093 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9094 emit_insn (gen_x86_sahf_1 (scratch));
9097 /* The FP codes work out to act like unsigned. */
9098 intcmp_mode = fpcmp_mode;
9099 code = first_code;
9100 if (bypass_code != UNKNOWN)
9101 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9102 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9103 const0_rtx);
9104 if (second_code != UNKNOWN)
9105 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9106 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9107 const0_rtx);
9109 else
9111 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9112 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9113 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9114 if (!scratch)
9115 scratch = gen_reg_rtx (HImode);
9116 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9118 /* In the unordered case, we have to check C2 for NaN's, which
9119 doesn't happen to work out to anything nice combination-wise.
9120 So do some bit twiddling on the value we've got in AH to come
9121 up with an appropriate set of condition codes. */
9123 intcmp_mode = CCNOmode;
9124 switch (code)
9126 case GT:
9127 case UNGT:
9128 if (code == GT || !TARGET_IEEE_FP)
9130 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9131 code = EQ;
9133 else
9135 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9136 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9137 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9138 intcmp_mode = CCmode;
9139 code = GEU;
9141 break;
9142 case LT:
9143 case UNLT:
9144 if (code == LT && TARGET_IEEE_FP)
9146 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9147 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9148 intcmp_mode = CCmode;
9149 code = EQ;
9151 else
9153 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9154 code = NE;
9156 break;
9157 case GE:
9158 case UNGE:
9159 if (code == GE || !TARGET_IEEE_FP)
9161 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9162 code = EQ;
9164 else
9166 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9167 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9168 GEN_INT (0x01)));
9169 code = NE;
9171 break;
9172 case LE:
9173 case UNLE:
9174 if (code == LE && TARGET_IEEE_FP)
9176 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9177 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9178 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9179 intcmp_mode = CCmode;
9180 code = LTU;
9182 else
9184 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9185 code = NE;
9187 break;
9188 case EQ:
9189 case UNEQ:
9190 if (code == EQ && TARGET_IEEE_FP)
9192 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9194 intcmp_mode = CCmode;
9195 code = EQ;
9197 else
9199 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9200 code = NE;
9201 break;
9203 break;
9204 case NE:
9205 case LTGT:
9206 if (code == NE && TARGET_IEEE_FP)
9208 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9209 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9210 GEN_INT (0x40)));
9211 code = NE;
9213 else
9215 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9216 code = EQ;
9218 break;
9220 case UNORDERED:
9221 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9222 code = NE;
9223 break;
9224 case ORDERED:
9225 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9226 code = EQ;
9227 break;
9229 default:
9230 gcc_unreachable ();
9234 /* Return the test that should be put into the flags user, i.e.
9235 the bcc, scc, or cmov instruction. */
9236 return gen_rtx_fmt_ee (code, VOIDmode,
9237 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9238 const0_rtx);
9242 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9244 rtx op0, op1, ret;
9245 op0 = ix86_compare_op0;
9246 op1 = ix86_compare_op1;
9248 if (second_test)
9249 *second_test = NULL_RTX;
9250 if (bypass_test)
9251 *bypass_test = NULL_RTX;
9253 if (ix86_compare_emitted)
9255 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9256 ix86_compare_emitted = NULL_RTX;
9258 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9259 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9260 second_test, bypass_test);
9261 else
9262 ret = ix86_expand_int_compare (code, op0, op1);
9264 return ret;
9267 /* Return true if the CODE will result in nontrivial jump sequence. */
9268 bool
9269 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9271 enum rtx_code bypass_code, first_code, second_code;
9272 if (!TARGET_CMOVE)
9273 return true;
9274 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9275 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9278 void
9279 ix86_expand_branch (enum rtx_code code, rtx label)
9281 rtx tmp;
9283 switch (GET_MODE (ix86_compare_op0))
9285 case QImode:
9286 case HImode:
9287 case SImode:
9288 simple:
9289 tmp = ix86_expand_compare (code, NULL, NULL);
9290 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9291 gen_rtx_LABEL_REF (VOIDmode, label),
9292 pc_rtx);
9293 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9294 return;
9296 case SFmode:
9297 case DFmode:
9298 case XFmode:
9300 rtvec vec;
9301 int use_fcomi;
9302 enum rtx_code bypass_code, first_code, second_code;
9304 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9305 &ix86_compare_op1);
9307 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9309 /* Check whether we will use the natural sequence with one jump. If
9310 so, we can expand jump early. Otherwise delay expansion by
9311 creating compound insn to not confuse optimizers. */
9312 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9313 && TARGET_CMOVE)
9315 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9316 gen_rtx_LABEL_REF (VOIDmode, label),
9317 pc_rtx, NULL_RTX, NULL_RTX);
9319 else
9321 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9322 ix86_compare_op0, ix86_compare_op1);
9323 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9324 gen_rtx_LABEL_REF (VOIDmode, label),
9325 pc_rtx);
9326 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9328 use_fcomi = ix86_use_fcomi_compare (code);
9329 vec = rtvec_alloc (3 + !use_fcomi);
9330 RTVEC_ELT (vec, 0) = tmp;
9331 RTVEC_ELT (vec, 1)
9332 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9333 RTVEC_ELT (vec, 2)
9334 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9335 if (! use_fcomi)
9336 RTVEC_ELT (vec, 3)
9337 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9339 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9341 return;
9344 case DImode:
9345 if (TARGET_64BIT)
9346 goto simple;
9347 case TImode:
9348 /* Expand DImode branch into multiple compare+branch. */
9350 rtx lo[2], hi[2], label2;
9351 enum rtx_code code1, code2, code3;
9352 enum machine_mode submode;
9354 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9356 tmp = ix86_compare_op0;
9357 ix86_compare_op0 = ix86_compare_op1;
9358 ix86_compare_op1 = tmp;
9359 code = swap_condition (code);
9361 if (GET_MODE (ix86_compare_op0) == DImode)
9363 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9364 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9365 submode = SImode;
9367 else
9369 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
9370 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
9371 submode = DImode;
9374 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9375 avoid two branches. This costs one extra insn, so disable when
9376 optimizing for size. */
9378 if ((code == EQ || code == NE)
9379 && (!optimize_size
9380 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9382 rtx xor0, xor1;
9384 xor1 = hi[0];
9385 if (hi[1] != const0_rtx)
9386 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
9387 NULL_RTX, 0, OPTAB_WIDEN);
9389 xor0 = lo[0];
9390 if (lo[1] != const0_rtx)
9391 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
9392 NULL_RTX, 0, OPTAB_WIDEN);
9394 tmp = expand_binop (submode, ior_optab, xor1, xor0,
9395 NULL_RTX, 0, OPTAB_WIDEN);
9397 ix86_compare_op0 = tmp;
9398 ix86_compare_op1 = const0_rtx;
9399 ix86_expand_branch (code, label);
9400 return;
9403 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9404 op1 is a constant and the low word is zero, then we can just
9405 examine the high word. */
9407 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9408 switch (code)
9410 case LT: case LTU: case GE: case GEU:
9411 ix86_compare_op0 = hi[0];
9412 ix86_compare_op1 = hi[1];
9413 ix86_expand_branch (code, label);
9414 return;
9415 default:
9416 break;
9419 /* Otherwise, we need two or three jumps. */
9421 label2 = gen_label_rtx ();
9423 code1 = code;
9424 code2 = swap_condition (code);
9425 code3 = unsigned_condition (code);
9427 switch (code)
9429 case LT: case GT: case LTU: case GTU:
9430 break;
9432 case LE: code1 = LT; code2 = GT; break;
9433 case GE: code1 = GT; code2 = LT; break;
9434 case LEU: code1 = LTU; code2 = GTU; break;
9435 case GEU: code1 = GTU; code2 = LTU; break;
9437 case EQ: code1 = UNKNOWN; code2 = NE; break;
9438 case NE: code2 = UNKNOWN; break;
9440 default:
9441 gcc_unreachable ();
9445 * a < b =>
9446 * if (hi(a) < hi(b)) goto true;
9447 * if (hi(a) > hi(b)) goto false;
9448 * if (lo(a) < lo(b)) goto true;
9449 * false:
9452 ix86_compare_op0 = hi[0];
9453 ix86_compare_op1 = hi[1];
9455 if (code1 != UNKNOWN)
9456 ix86_expand_branch (code1, label);
9457 if (code2 != UNKNOWN)
9458 ix86_expand_branch (code2, label2);
9460 ix86_compare_op0 = lo[0];
9461 ix86_compare_op1 = lo[1];
9462 ix86_expand_branch (code3, label);
9464 if (code2 != UNKNOWN)
9465 emit_label (label2);
9466 return;
9469 default:
9470 gcc_unreachable ();
9474 /* Split branch based on floating point condition. */
9475 void
9476 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9477 rtx target1, rtx target2, rtx tmp, rtx pushed)
9479 rtx second, bypass;
9480 rtx label = NULL_RTX;
9481 rtx condition;
9482 int bypass_probability = -1, second_probability = -1, probability = -1;
9483 rtx i;
9485 if (target2 != pc_rtx)
9487 rtx tmp = target2;
9488 code = reverse_condition_maybe_unordered (code);
9489 target2 = target1;
9490 target1 = tmp;
9493 condition = ix86_expand_fp_compare (code, op1, op2,
9494 tmp, &second, &bypass);
9496 /* Remove pushed operand from stack. */
9497 if (pushed)
9498 ix86_free_from_memory (GET_MODE (pushed));
9500 if (split_branch_probability >= 0)
9502 /* Distribute the probabilities across the jumps.
9503 Assume the BYPASS and SECOND to be always test
9504 for UNORDERED. */
9505 probability = split_branch_probability;
9507 /* Value of 1 is low enough to make no need for probability
9508 to be updated. Later we may run some experiments and see
9509 if unordered values are more frequent in practice. */
9510 if (bypass)
9511 bypass_probability = 1;
9512 if (second)
9513 second_probability = 1;
9515 if (bypass != NULL_RTX)
9517 label = gen_label_rtx ();
9518 i = emit_jump_insn (gen_rtx_SET
9519 (VOIDmode, pc_rtx,
9520 gen_rtx_IF_THEN_ELSE (VOIDmode,
9521 bypass,
9522 gen_rtx_LABEL_REF (VOIDmode,
9523 label),
9524 pc_rtx)));
9525 if (bypass_probability >= 0)
9526 REG_NOTES (i)
9527 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9528 GEN_INT (bypass_probability),
9529 REG_NOTES (i));
9531 i = emit_jump_insn (gen_rtx_SET
9532 (VOIDmode, pc_rtx,
9533 gen_rtx_IF_THEN_ELSE (VOIDmode,
9534 condition, target1, target2)));
9535 if (probability >= 0)
9536 REG_NOTES (i)
9537 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9538 GEN_INT (probability),
9539 REG_NOTES (i));
9540 if (second != NULL_RTX)
9542 i = emit_jump_insn (gen_rtx_SET
9543 (VOIDmode, pc_rtx,
9544 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9545 target2)));
9546 if (second_probability >= 0)
9547 REG_NOTES (i)
9548 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9549 GEN_INT (second_probability),
9550 REG_NOTES (i));
9552 if (label != NULL_RTX)
9553 emit_label (label);
9557 ix86_expand_setcc (enum rtx_code code, rtx dest)
9559 rtx ret, tmp, tmpreg, equiv;
9560 rtx second_test, bypass_test;
9562 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
9563 return 0; /* FAIL */
9565 gcc_assert (GET_MODE (dest) == QImode);
9567 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9568 PUT_MODE (ret, QImode);
9570 tmp = dest;
9571 tmpreg = dest;
9573 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9574 if (bypass_test || second_test)
9576 rtx test = second_test;
9577 int bypass = 0;
9578 rtx tmp2 = gen_reg_rtx (QImode);
9579 if (bypass_test)
9581 gcc_assert (!second_test);
9582 test = bypass_test;
9583 bypass = 1;
9584 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9586 PUT_MODE (test, QImode);
9587 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9589 if (bypass)
9590 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9591 else
9592 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9595 /* Attach a REG_EQUAL note describing the comparison result. */
9596 if (ix86_compare_op0 && ix86_compare_op1)
9598 equiv = simplify_gen_relational (code, QImode,
9599 GET_MODE (ix86_compare_op0),
9600 ix86_compare_op0, ix86_compare_op1);
9601 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9604 return 1; /* DONE */
9607 /* Expand comparison setting or clearing carry flag. Return true when
9608 successful and set pop for the operation. */
9609 static bool
9610 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9612 enum machine_mode mode =
9613 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9615 /* Do not handle DImode compares that go trought special path. Also we can't
9616 deal with FP compares yet. This is possible to add. */
9617 if (mode == (TARGET_64BIT ? TImode : DImode))
9618 return false;
9619 if (FLOAT_MODE_P (mode))
9621 rtx second_test = NULL, bypass_test = NULL;
9622 rtx compare_op, compare_seq;
9624 /* Shortcut: following common codes never translate into carry flag compares. */
9625 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9626 || code == ORDERED || code == UNORDERED)
9627 return false;
9629 /* These comparisons require zero flag; swap operands so they won't. */
9630 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9631 && !TARGET_IEEE_FP)
9633 rtx tmp = op0;
9634 op0 = op1;
9635 op1 = tmp;
9636 code = swap_condition (code);
9639 /* Try to expand the comparison and verify that we end up with carry flag
9640 based comparison. This is fails to be true only when we decide to expand
9641 comparison using arithmetic that is not too common scenario. */
9642 start_sequence ();
9643 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9644 &second_test, &bypass_test);
9645 compare_seq = get_insns ();
9646 end_sequence ();
9648 if (second_test || bypass_test)
9649 return false;
9650 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9651 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9652 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9653 else
9654 code = GET_CODE (compare_op);
9655 if (code != LTU && code != GEU)
9656 return false;
9657 emit_insn (compare_seq);
9658 *pop = compare_op;
9659 return true;
9661 if (!INTEGRAL_MODE_P (mode))
9662 return false;
9663 switch (code)
9665 case LTU:
9666 case GEU:
9667 break;
9669 /* Convert a==0 into (unsigned)a<1. */
9670 case EQ:
9671 case NE:
9672 if (op1 != const0_rtx)
9673 return false;
9674 op1 = const1_rtx;
9675 code = (code == EQ ? LTU : GEU);
9676 break;
9678 /* Convert a>b into b<a or a>=b-1. */
9679 case GTU:
9680 case LEU:
9681 if (GET_CODE (op1) == CONST_INT)
9683 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9684 /* Bail out on overflow. We still can swap operands but that
9685 would force loading of the constant into register. */
9686 if (op1 == const0_rtx
9687 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9688 return false;
9689 code = (code == GTU ? GEU : LTU);
9691 else
9693 rtx tmp = op1;
9694 op1 = op0;
9695 op0 = tmp;
9696 code = (code == GTU ? LTU : GEU);
9698 break;
9700 /* Convert a>=0 into (unsigned)a<0x80000000. */
9701 case LT:
9702 case GE:
9703 if (mode == DImode || op1 != const0_rtx)
9704 return false;
9705 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9706 code = (code == LT ? GEU : LTU);
9707 break;
9708 case LE:
9709 case GT:
9710 if (mode == DImode || op1 != constm1_rtx)
9711 return false;
9712 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9713 code = (code == LE ? GEU : LTU);
9714 break;
9716 default:
9717 return false;
9719 /* Swapping operands may cause constant to appear as first operand. */
9720 if (!nonimmediate_operand (op0, VOIDmode))
9722 if (no_new_pseudos)
9723 return false;
9724 op0 = force_reg (mode, op0);
9726 ix86_compare_op0 = op0;
9727 ix86_compare_op1 = op1;
9728 *pop = ix86_expand_compare (code, NULL, NULL);
9729 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
9730 return true;
9734 ix86_expand_int_movcc (rtx operands[])
9736 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9737 rtx compare_seq, compare_op;
9738 rtx second_test, bypass_test;
9739 enum machine_mode mode = GET_MODE (operands[0]);
9740 bool sign_bit_compare_p = false;;
9742 start_sequence ();
9743 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9744 compare_seq = get_insns ();
9745 end_sequence ();
9747 compare_code = GET_CODE (compare_op);
9749 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9750 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9751 sign_bit_compare_p = true;
9753 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9754 HImode insns, we'd be swallowed in word prefix ops. */
9756 if ((mode != HImode || TARGET_FAST_PREFIX)
9757 && (mode != (TARGET_64BIT ? TImode : DImode))
9758 && GET_CODE (operands[2]) == CONST_INT
9759 && GET_CODE (operands[3]) == CONST_INT)
9761 rtx out = operands[0];
9762 HOST_WIDE_INT ct = INTVAL (operands[2]);
9763 HOST_WIDE_INT cf = INTVAL (operands[3]);
9764 HOST_WIDE_INT diff;
9766 diff = ct - cf;
9767 /* Sign bit compares are better done using shifts than we do by using
9768 sbb. */
9769 if (sign_bit_compare_p
9770 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9771 ix86_compare_op1, &compare_op))
9773 /* Detect overlap between destination and compare sources. */
9774 rtx tmp = out;
9776 if (!sign_bit_compare_p)
9778 bool fpcmp = false;
9780 compare_code = GET_CODE (compare_op);
9782 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9783 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9785 fpcmp = true;
9786 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9789 /* To simplify rest of code, restrict to the GEU case. */
9790 if (compare_code == LTU)
9792 HOST_WIDE_INT tmp = ct;
9793 ct = cf;
9794 cf = tmp;
9795 compare_code = reverse_condition (compare_code);
9796 code = reverse_condition (code);
9798 else
9800 if (fpcmp)
9801 PUT_CODE (compare_op,
9802 reverse_condition_maybe_unordered
9803 (GET_CODE (compare_op)));
9804 else
9805 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9807 diff = ct - cf;
9809 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9810 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9811 tmp = gen_reg_rtx (mode);
9813 if (mode == DImode)
9814 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9815 else
9816 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9818 else
9820 if (code == GT || code == GE)
9821 code = reverse_condition (code);
9822 else
9824 HOST_WIDE_INT tmp = ct;
9825 ct = cf;
9826 cf = tmp;
9827 diff = ct - cf;
9829 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9830 ix86_compare_op1, VOIDmode, 0, -1);
9833 if (diff == 1)
9836 * cmpl op0,op1
9837 * sbbl dest,dest
9838 * [addl dest, ct]
9840 * Size 5 - 8.
9842 if (ct)
9843 tmp = expand_simple_binop (mode, PLUS,
9844 tmp, GEN_INT (ct),
9845 copy_rtx (tmp), 1, OPTAB_DIRECT);
9847 else if (cf == -1)
9850 * cmpl op0,op1
9851 * sbbl dest,dest
9852 * orl $ct, dest
9854 * Size 8.
9856 tmp = expand_simple_binop (mode, IOR,
9857 tmp, GEN_INT (ct),
9858 copy_rtx (tmp), 1, OPTAB_DIRECT);
9860 else if (diff == -1 && ct)
9863 * cmpl op0,op1
9864 * sbbl dest,dest
9865 * notl dest
9866 * [addl dest, cf]
9868 * Size 8 - 11.
9870 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9871 if (cf)
9872 tmp = expand_simple_binop (mode, PLUS,
9873 copy_rtx (tmp), GEN_INT (cf),
9874 copy_rtx (tmp), 1, OPTAB_DIRECT);
9876 else
9879 * cmpl op0,op1
9880 * sbbl dest,dest
9881 * [notl dest]
9882 * andl cf - ct, dest
9883 * [addl dest, ct]
9885 * Size 8 - 11.
9888 if (cf == 0)
9890 cf = ct;
9891 ct = 0;
9892 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9895 tmp = expand_simple_binop (mode, AND,
9896 copy_rtx (tmp),
9897 gen_int_mode (cf - ct, mode),
9898 copy_rtx (tmp), 1, OPTAB_DIRECT);
9899 if (ct)
9900 tmp = expand_simple_binop (mode, PLUS,
9901 copy_rtx (tmp), GEN_INT (ct),
9902 copy_rtx (tmp), 1, OPTAB_DIRECT);
9905 if (!rtx_equal_p (tmp, out))
9906 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9908 return 1; /* DONE */
9911 if (diff < 0)
9913 HOST_WIDE_INT tmp;
9914 tmp = ct, ct = cf, cf = tmp;
9915 diff = -diff;
9916 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9918 /* We may be reversing unordered compare to normal compare, that
9919 is not valid in general (we may convert non-trapping condition
9920 to trapping one), however on i386 we currently emit all
9921 comparisons unordered. */
9922 compare_code = reverse_condition_maybe_unordered (compare_code);
9923 code = reverse_condition_maybe_unordered (code);
9925 else
9927 compare_code = reverse_condition (compare_code);
9928 code = reverse_condition (code);
9932 compare_code = UNKNOWN;
9933 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9934 && GET_CODE (ix86_compare_op1) == CONST_INT)
9936 if (ix86_compare_op1 == const0_rtx
9937 && (code == LT || code == GE))
9938 compare_code = code;
9939 else if (ix86_compare_op1 == constm1_rtx)
9941 if (code == LE)
9942 compare_code = LT;
9943 else if (code == GT)
9944 compare_code = GE;
9948 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9949 if (compare_code != UNKNOWN
9950 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9951 && (cf == -1 || ct == -1))
9953 /* If lea code below could be used, only optimize
9954 if it results in a 2 insn sequence. */
9956 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9957 || diff == 3 || diff == 5 || diff == 9)
9958 || (compare_code == LT && ct == -1)
9959 || (compare_code == GE && cf == -1))
9962 * notl op1 (if necessary)
9963 * sarl $31, op1
9964 * orl cf, op1
9966 if (ct != -1)
9968 cf = ct;
9969 ct = -1;
9970 code = reverse_condition (code);
9973 out = emit_store_flag (out, code, ix86_compare_op0,
9974 ix86_compare_op1, VOIDmode, 0, -1);
9976 out = expand_simple_binop (mode, IOR,
9977 out, GEN_INT (cf),
9978 out, 1, OPTAB_DIRECT);
9979 if (out != operands[0])
9980 emit_move_insn (operands[0], out);
9982 return 1; /* DONE */
9987 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9988 || diff == 3 || diff == 5 || diff == 9)
9989 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9990 && (mode != DImode
9991 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9994 * xorl dest,dest
9995 * cmpl op1,op2
9996 * setcc dest
9997 * lea cf(dest*(ct-cf)),dest
9999 * Size 14.
10001 * This also catches the degenerate setcc-only case.
10004 rtx tmp;
10005 int nops;
10007 out = emit_store_flag (out, code, ix86_compare_op0,
10008 ix86_compare_op1, VOIDmode, 0, 1);
10010 nops = 0;
10011 /* On x86_64 the lea instruction operates on Pmode, so we need
10012 to get arithmetics done in proper mode to match. */
10013 if (diff == 1)
10014 tmp = copy_rtx (out);
10015 else
10017 rtx out1;
10018 out1 = copy_rtx (out);
10019 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10020 nops++;
10021 if (diff & 1)
10023 tmp = gen_rtx_PLUS (mode, tmp, out1);
10024 nops++;
10027 if (cf != 0)
10029 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10030 nops++;
10032 if (!rtx_equal_p (tmp, out))
10034 if (nops == 1)
10035 out = force_operand (tmp, copy_rtx (out));
10036 else
10037 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10039 if (!rtx_equal_p (out, operands[0]))
10040 emit_move_insn (operands[0], copy_rtx (out));
10042 return 1; /* DONE */
10046 * General case: Jumpful:
10047 * xorl dest,dest cmpl op1, op2
10048 * cmpl op1, op2 movl ct, dest
10049 * setcc dest jcc 1f
10050 * decl dest movl cf, dest
10051 * andl (cf-ct),dest 1:
10052 * addl ct,dest
10054 * Size 20. Size 14.
10056 * This is reasonably steep, but branch mispredict costs are
10057 * high on modern cpus, so consider failing only if optimizing
10058 * for space.
10061 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10062 && BRANCH_COST >= 2)
10064 if (cf == 0)
10066 cf = ct;
10067 ct = 0;
10068 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10069 /* We may be reversing unordered compare to normal compare,
10070 that is not valid in general (we may convert non-trapping
10071 condition to trapping one), however on i386 we currently
10072 emit all comparisons unordered. */
10073 code = reverse_condition_maybe_unordered (code);
10074 else
10076 code = reverse_condition (code);
10077 if (compare_code != UNKNOWN)
10078 compare_code = reverse_condition (compare_code);
10082 if (compare_code != UNKNOWN)
10084 /* notl op1 (if needed)
10085 sarl $31, op1
10086 andl (cf-ct), op1
10087 addl ct, op1
10089 For x < 0 (resp. x <= -1) there will be no notl,
10090 so if possible swap the constants to get rid of the
10091 complement.
10092 True/false will be -1/0 while code below (store flag
10093 followed by decrement) is 0/-1, so the constants need
10094 to be exchanged once more. */
10096 if (compare_code == GE || !cf)
10098 code = reverse_condition (code);
10099 compare_code = LT;
10101 else
10103 HOST_WIDE_INT tmp = cf;
10104 cf = ct;
10105 ct = tmp;
10108 out = emit_store_flag (out, code, ix86_compare_op0,
10109 ix86_compare_op1, VOIDmode, 0, -1);
10111 else
10113 out = emit_store_flag (out, code, ix86_compare_op0,
10114 ix86_compare_op1, VOIDmode, 0, 1);
10116 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10117 copy_rtx (out), 1, OPTAB_DIRECT);
10120 out = expand_simple_binop (mode, AND, copy_rtx (out),
10121 gen_int_mode (cf - ct, mode),
10122 copy_rtx (out), 1, OPTAB_DIRECT);
10123 if (ct)
10124 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10125 copy_rtx (out), 1, OPTAB_DIRECT);
10126 if (!rtx_equal_p (out, operands[0]))
10127 emit_move_insn (operands[0], copy_rtx (out));
10129 return 1; /* DONE */
10133 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10135 /* Try a few things more with specific constants and a variable. */
10137 optab op;
10138 rtx var, orig_out, out, tmp;
10140 if (BRANCH_COST <= 2)
10141 return 0; /* FAIL */
10143 /* If one of the two operands is an interesting constant, load a
10144 constant with the above and mask it in with a logical operation. */
10146 if (GET_CODE (operands[2]) == CONST_INT)
10148 var = operands[3];
10149 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10150 operands[3] = constm1_rtx, op = and_optab;
10151 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10152 operands[3] = const0_rtx, op = ior_optab;
10153 else
10154 return 0; /* FAIL */
10156 else if (GET_CODE (operands[3]) == CONST_INT)
10158 var = operands[2];
10159 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10160 operands[2] = constm1_rtx, op = and_optab;
10161 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10162 operands[2] = const0_rtx, op = ior_optab;
10163 else
10164 return 0; /* FAIL */
10166 else
10167 return 0; /* FAIL */
10169 orig_out = operands[0];
10170 tmp = gen_reg_rtx (mode);
10171 operands[0] = tmp;
10173 /* Recurse to get the constant loaded. */
10174 if (ix86_expand_int_movcc (operands) == 0)
10175 return 0; /* FAIL */
10177 /* Mask in the interesting variable. */
10178 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10179 OPTAB_WIDEN);
10180 if (!rtx_equal_p (out, orig_out))
10181 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10183 return 1; /* DONE */
10187 * For comparison with above,
10189 * movl cf,dest
10190 * movl ct,tmp
10191 * cmpl op1,op2
10192 * cmovcc tmp,dest
10194 * Size 15.
10197 if (! nonimmediate_operand (operands[2], mode))
10198 operands[2] = force_reg (mode, operands[2]);
10199 if (! nonimmediate_operand (operands[3], mode))
10200 operands[3] = force_reg (mode, operands[3]);
10202 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10204 rtx tmp = gen_reg_rtx (mode);
10205 emit_move_insn (tmp, operands[3]);
10206 operands[3] = tmp;
10208 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10210 rtx tmp = gen_reg_rtx (mode);
10211 emit_move_insn (tmp, operands[2]);
10212 operands[2] = tmp;
10215 if (! register_operand (operands[2], VOIDmode)
10216 && (mode == QImode
10217 || ! register_operand (operands[3], VOIDmode)))
10218 operands[2] = force_reg (mode, operands[2]);
10220 if (mode == QImode
10221 && ! register_operand (operands[3], VOIDmode))
10222 operands[3] = force_reg (mode, operands[3]);
10224 emit_insn (compare_seq);
10225 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10226 gen_rtx_IF_THEN_ELSE (mode,
10227 compare_op, operands[2],
10228 operands[3])));
10229 if (bypass_test)
10230 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10231 gen_rtx_IF_THEN_ELSE (mode,
10232 bypass_test,
10233 copy_rtx (operands[3]),
10234 copy_rtx (operands[0]))));
10235 if (second_test)
10236 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10237 gen_rtx_IF_THEN_ELSE (mode,
10238 second_test,
10239 copy_rtx (operands[2]),
10240 copy_rtx (operands[0]))));
10242 return 1; /* DONE */
10245 /* Swap, force into registers, or otherwise massage the two operands
10246 to an sse comparison with a mask result. Thus we differ a bit from
10247 ix86_prepare_fp_compare_args which expects to produce a flags result.
10249 The DEST operand exists to help determine whether to commute commutative
10250 operators. The POP0/POP1 operands are updated in place. The new
10251 comparison code is returned, or UNKNOWN if not implementable. */
10253 static enum rtx_code
10254 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10255 rtx *pop0, rtx *pop1)
10257 rtx tmp;
10259 switch (code)
10261 case LTGT:
10262 case UNEQ:
10263 /* We have no LTGT as an operator. We could implement it with
10264 NE & ORDERED, but this requires an extra temporary. It's
10265 not clear that it's worth it. */
10266 return UNKNOWN;
10268 case LT:
10269 case LE:
10270 case UNGT:
10271 case UNGE:
10272 /* These are supported directly. */
10273 break;
10275 case EQ:
10276 case NE:
10277 case UNORDERED:
10278 case ORDERED:
10279 /* For commutative operators, try to canonicalize the destination
10280 operand to be first in the comparison - this helps reload to
10281 avoid extra moves. */
10282 if (!dest || !rtx_equal_p (dest, *pop1))
10283 break;
10284 /* FALLTHRU */
10286 case GE:
10287 case GT:
10288 case UNLE:
10289 case UNLT:
10290 /* These are not supported directly. Swap the comparison operands
10291 to transform into something that is supported. */
10292 tmp = *pop0;
10293 *pop0 = *pop1;
10294 *pop1 = tmp;
10295 code = swap_condition (code);
10296 break;
10298 default:
10299 gcc_unreachable ();
10302 return code;
10305 /* Detect conditional moves that exactly match min/max operational
10306 semantics. Note that this is IEEE safe, as long as we don't
10307 interchange the operands.
10309 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10310 and TRUE if the operation is successful and instructions are emitted. */
10312 static bool
10313 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10314 rtx cmp_op1, rtx if_true, rtx if_false)
10316 enum machine_mode mode;
10317 bool is_min;
10318 rtx tmp;
10320 if (code == LT)
10322 else if (code == UNGE)
10324 tmp = if_true;
10325 if_true = if_false;
10326 if_false = tmp;
10328 else
10329 return false;
10331 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10332 is_min = true;
10333 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10334 is_min = false;
10335 else
10336 return false;
10338 mode = GET_MODE (dest);
10340 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10341 but MODE may be a vector mode and thus not appropriate. */
10342 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10344 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10345 rtvec v;
10347 if_true = force_reg (mode, if_true);
10348 v = gen_rtvec (2, if_true, if_false);
10349 tmp = gen_rtx_UNSPEC (mode, v, u);
10351 else
10353 code = is_min ? SMIN : SMAX;
10354 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10357 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10358 return true;
10361 /* Expand an sse vector comparison. Return the register with the result. */
10363 static rtx
10364 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10365 rtx op_true, rtx op_false)
10367 enum machine_mode mode = GET_MODE (dest);
10368 rtx x;
10370 cmp_op0 = force_reg (mode, cmp_op0);
10371 if (!nonimmediate_operand (cmp_op1, mode))
10372 cmp_op1 = force_reg (mode, cmp_op1);
10374 if (optimize
10375 || reg_overlap_mentioned_p (dest, op_true)
10376 || reg_overlap_mentioned_p (dest, op_false))
10377 dest = gen_reg_rtx (mode);
10379 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10380 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10382 return dest;
10385 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10386 operations. This is used for both scalar and vector conditional moves. */
10388 static void
10389 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10391 enum machine_mode mode = GET_MODE (dest);
10392 rtx t2, t3, x;
10394 if (op_false == CONST0_RTX (mode))
10396 op_true = force_reg (mode, op_true);
10397 x = gen_rtx_AND (mode, cmp, op_true);
10398 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10400 else if (op_true == CONST0_RTX (mode))
10402 op_false = force_reg (mode, op_false);
10403 x = gen_rtx_NOT (mode, cmp);
10404 x = gen_rtx_AND (mode, x, op_false);
10405 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10407 else
10409 op_true = force_reg (mode, op_true);
10410 op_false = force_reg (mode, op_false);
10412 t2 = gen_reg_rtx (mode);
10413 if (optimize)
10414 t3 = gen_reg_rtx (mode);
10415 else
10416 t3 = dest;
10418 x = gen_rtx_AND (mode, op_true, cmp);
10419 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10421 x = gen_rtx_NOT (mode, cmp);
10422 x = gen_rtx_AND (mode, x, op_false);
10423 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10425 x = gen_rtx_IOR (mode, t3, t2);
10426 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10430 /* Expand a floating-point conditional move. Return true if successful. */
10433 ix86_expand_fp_movcc (rtx operands[])
10435 enum machine_mode mode = GET_MODE (operands[0]);
10436 enum rtx_code code = GET_CODE (operands[1]);
10437 rtx tmp, compare_op, second_test, bypass_test;
10439 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10441 enum machine_mode cmode;
10443 /* Since we've no cmove for sse registers, don't force bad register
10444 allocation just to gain access to it. Deny movcc when the
10445 comparison mode doesn't match the move mode. */
10446 cmode = GET_MODE (ix86_compare_op0);
10447 if (cmode == VOIDmode)
10448 cmode = GET_MODE (ix86_compare_op1);
10449 if (cmode != mode)
10450 return 0;
10452 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10453 &ix86_compare_op0,
10454 &ix86_compare_op1);
10455 if (code == UNKNOWN)
10456 return 0;
10458 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10459 ix86_compare_op1, operands[2],
10460 operands[3]))
10461 return 1;
10463 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10464 ix86_compare_op1, operands[2], operands[3]);
10465 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10466 return 1;
10469 /* The floating point conditional move instructions don't directly
10470 support conditions resulting from a signed integer comparison. */
10472 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10474 /* The floating point conditional move instructions don't directly
10475 support signed integer comparisons. */
10477 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10479 gcc_assert (!second_test && !bypass_test);
10480 tmp = gen_reg_rtx (QImode);
10481 ix86_expand_setcc (code, tmp);
10482 code = NE;
10483 ix86_compare_op0 = tmp;
10484 ix86_compare_op1 = const0_rtx;
10485 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10487 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10489 tmp = gen_reg_rtx (mode);
10490 emit_move_insn (tmp, operands[3]);
10491 operands[3] = tmp;
10493 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10495 tmp = gen_reg_rtx (mode);
10496 emit_move_insn (tmp, operands[2]);
10497 operands[2] = tmp;
10500 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10501 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10502 operands[2], operands[3])));
10503 if (bypass_test)
10504 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10505 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10506 operands[3], operands[0])));
10507 if (second_test)
10508 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10509 gen_rtx_IF_THEN_ELSE (mode, second_test,
10510 operands[2], operands[0])));
10512 return 1;
10515 /* Expand a floating-point vector conditional move; a vcond operation
10516 rather than a movcc operation. */
10518 bool
10519 ix86_expand_fp_vcond (rtx operands[])
10521 enum rtx_code code = GET_CODE (operands[3]);
10522 rtx cmp;
10524 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10525 &operands[4], &operands[5]);
10526 if (code == UNKNOWN)
10527 return false;
10529 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
10530 operands[5], operands[1], operands[2]))
10531 return true;
10533 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10534 operands[1], operands[2]);
10535 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10536 return true;
10539 /* Expand a signed integral vector conditional move. */
10541 bool
10542 ix86_expand_int_vcond (rtx operands[])
10544 enum machine_mode mode = GET_MODE (operands[0]);
10545 enum rtx_code code = GET_CODE (operands[3]);
10546 bool negate = false;
10547 rtx x, cop0, cop1;
10549 cop0 = operands[4];
10550 cop1 = operands[5];
10552 /* Canonicalize the comparison to EQ, GT, GTU. */
10553 switch (code)
10555 case EQ:
10556 case GT:
10557 case GTU:
10558 break;
10560 case NE:
10561 case LE:
10562 case LEU:
10563 code = reverse_condition (code);
10564 negate = true;
10565 break;
10567 case GE:
10568 case GEU:
10569 code = reverse_condition (code);
10570 negate = true;
10571 /* FALLTHRU */
10573 case LT:
10574 case LTU:
10575 code = swap_condition (code);
10576 x = cop0, cop0 = cop1, cop1 = x;
10577 break;
10579 default:
10580 gcc_unreachable ();
10583 /* Unsigned parallel compare is not supported by the hardware. Play some
10584 tricks to turn this into a signed comparison against 0. */
10585 if (code == GTU)
10587 switch (mode)
10589 case V4SImode:
10591 rtx t1, t2, mask;
10593 /* Perform a parallel modulo subtraction. */
10594 t1 = gen_reg_rtx (mode);
10595 emit_insn (gen_subv4si3 (t1, cop0, cop1));
10597 /* Extract the original sign bit of op0. */
10598 mask = GEN_INT (-0x80000000);
10599 mask = gen_rtx_CONST_VECTOR (mode,
10600 gen_rtvec (4, mask, mask, mask, mask));
10601 mask = force_reg (mode, mask);
10602 t2 = gen_reg_rtx (mode);
10603 emit_insn (gen_andv4si3 (t2, cop0, mask));
10605 /* XOR it back into the result of the subtraction. This results
10606 in the sign bit set iff we saw unsigned underflow. */
10607 x = gen_reg_rtx (mode);
10608 emit_insn (gen_xorv4si3 (x, t1, t2));
10610 code = GT;
10612 break;
10614 case V16QImode:
10615 case V8HImode:
10616 /* Perform a parallel unsigned saturating subtraction. */
10617 x = gen_reg_rtx (mode);
10618 emit_insn (gen_rtx_SET (VOIDmode, x,
10619 gen_rtx_US_MINUS (mode, cop0, cop1)));
10621 code = EQ;
10622 negate = !negate;
10623 break;
10625 default:
10626 gcc_unreachable ();
10629 cop0 = x;
10630 cop1 = CONST0_RTX (mode);
10633 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
10634 operands[1+negate], operands[2-negate]);
10636 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
10637 operands[2-negate]);
10638 return true;
10641 /* Expand conditional increment or decrement using adb/sbb instructions.
10642 The default case using setcc followed by the conditional move can be
10643 done by generic code. */
10645 ix86_expand_int_addcc (rtx operands[])
10647 enum rtx_code code = GET_CODE (operands[1]);
10648 rtx compare_op;
10649 rtx val = const0_rtx;
10650 bool fpcmp = false;
10651 enum machine_mode mode = GET_MODE (operands[0]);
10653 if (operands[3] != const1_rtx
10654 && operands[3] != constm1_rtx)
10655 return 0;
10656 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10657 ix86_compare_op1, &compare_op))
10658 return 0;
10659 code = GET_CODE (compare_op);
10661 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10662 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10664 fpcmp = true;
10665 code = ix86_fp_compare_code_to_integer (code);
10668 if (code != LTU)
10670 val = constm1_rtx;
10671 if (fpcmp)
10672 PUT_CODE (compare_op,
10673 reverse_condition_maybe_unordered
10674 (GET_CODE (compare_op)));
10675 else
10676 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10678 PUT_MODE (compare_op, mode);
10680 /* Construct either adc or sbb insn. */
10681 if ((code == LTU) == (operands[3] == constm1_rtx))
10683 switch (GET_MODE (operands[0]))
10685 case QImode:
10686 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10687 break;
10688 case HImode:
10689 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10690 break;
10691 case SImode:
10692 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10693 break;
10694 case DImode:
10695 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10696 break;
10697 default:
10698 gcc_unreachable ();
10701 else
10703 switch (GET_MODE (operands[0]))
10705 case QImode:
10706 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10707 break;
10708 case HImode:
10709 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10710 break;
10711 case SImode:
10712 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10713 break;
10714 case DImode:
10715 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10716 break;
10717 default:
10718 gcc_unreachable ();
10721 return 1; /* DONE */
10725 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10726 works for floating pointer parameters and nonoffsetable memories.
10727 For pushes, it returns just stack offsets; the values will be saved
10728 in the right order. Maximally three parts are generated. */
10730 static int
10731 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10733 int size;
10735 if (!TARGET_64BIT)
10736 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10737 else
10738 size = (GET_MODE_SIZE (mode) + 4) / 8;
10740 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
10741 gcc_assert (size >= 2 && size <= 3);
10743 /* Optimize constant pool reference to immediates. This is used by fp
10744 moves, that force all constants to memory to allow combining. */
10745 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10747 rtx tmp = maybe_get_pool_constant (operand);
10748 if (tmp)
10749 operand = tmp;
10752 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10754 /* The only non-offsetable memories we handle are pushes. */
10755 int ok = push_operand (operand, VOIDmode);
10757 gcc_assert (ok);
10759 operand = copy_rtx (operand);
10760 PUT_MODE (operand, Pmode);
10761 parts[0] = parts[1] = parts[2] = operand;
10762 return size;
10765 if (GET_CODE (operand) == CONST_VECTOR)
10767 enum machine_mode imode = int_mode_for_mode (mode);
10768 /* Caution: if we looked through a constant pool memory above,
10769 the operand may actually have a different mode now. That's
10770 ok, since we want to pun this all the way back to an integer. */
10771 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
10772 gcc_assert (operand != NULL);
10773 mode = imode;
10776 if (!TARGET_64BIT)
10778 if (mode == DImode)
10779 split_di (&operand, 1, &parts[0], &parts[1]);
10780 else
10782 if (REG_P (operand))
10784 gcc_assert (reload_completed);
10785 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10786 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10787 if (size == 3)
10788 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10790 else if (offsettable_memref_p (operand))
10792 operand = adjust_address (operand, SImode, 0);
10793 parts[0] = operand;
10794 parts[1] = adjust_address (operand, SImode, 4);
10795 if (size == 3)
10796 parts[2] = adjust_address (operand, SImode, 8);
10798 else if (GET_CODE (operand) == CONST_DOUBLE)
10800 REAL_VALUE_TYPE r;
10801 long l[4];
10803 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10804 switch (mode)
10806 case XFmode:
10807 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10808 parts[2] = gen_int_mode (l[2], SImode);
10809 break;
10810 case DFmode:
10811 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10812 break;
10813 default:
10814 gcc_unreachable ();
10816 parts[1] = gen_int_mode (l[1], SImode);
10817 parts[0] = gen_int_mode (l[0], SImode);
10819 else
10820 gcc_unreachable ();
10823 else
10825 if (mode == TImode)
10826 split_ti (&operand, 1, &parts[0], &parts[1]);
10827 if (mode == XFmode || mode == TFmode)
10829 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10830 if (REG_P (operand))
10832 gcc_assert (reload_completed);
10833 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10834 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10836 else if (offsettable_memref_p (operand))
10838 operand = adjust_address (operand, DImode, 0);
10839 parts[0] = operand;
10840 parts[1] = adjust_address (operand, upper_mode, 8);
10842 else if (GET_CODE (operand) == CONST_DOUBLE)
10844 REAL_VALUE_TYPE r;
10845 long l[4];
10847 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10848 real_to_target (l, &r, mode);
10850 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10851 if (HOST_BITS_PER_WIDE_INT >= 64)
10852 parts[0]
10853 = gen_int_mode
10854 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10855 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10856 DImode);
10857 else
10858 parts[0] = immed_double_const (l[0], l[1], DImode);
10860 if (upper_mode == SImode)
10861 parts[1] = gen_int_mode (l[2], SImode);
10862 else if (HOST_BITS_PER_WIDE_INT >= 64)
10863 parts[1]
10864 = gen_int_mode
10865 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10866 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10867 DImode);
10868 else
10869 parts[1] = immed_double_const (l[2], l[3], DImode);
10871 else
10872 gcc_unreachable ();
10876 return size;
10879 /* Emit insns to perform a move or push of DI, DF, and XF values.
10880 Return false when normal moves are needed; true when all required
10881 insns have been emitted. Operands 2-4 contain the input values
10882 int the correct order; operands 5-7 contain the output values. */
10884 void
10885 ix86_split_long_move (rtx operands[])
10887 rtx part[2][3];
10888 int nparts;
10889 int push = 0;
10890 int collisions = 0;
10891 enum machine_mode mode = GET_MODE (operands[0]);
10893 /* The DFmode expanders may ask us to move double.
10894 For 64bit target this is single move. By hiding the fact
10895 here we simplify i386.md splitters. */
10896 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10898 /* Optimize constant pool reference to immediates. This is used by
10899 fp moves, that force all constants to memory to allow combining. */
10901 if (GET_CODE (operands[1]) == MEM
10902 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10903 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10904 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10905 if (push_operand (operands[0], VOIDmode))
10907 operands[0] = copy_rtx (operands[0]);
10908 PUT_MODE (operands[0], Pmode);
10910 else
10911 operands[0] = gen_lowpart (DImode, operands[0]);
10912 operands[1] = gen_lowpart (DImode, operands[1]);
10913 emit_move_insn (operands[0], operands[1]);
10914 return;
10917 /* The only non-offsettable memory we handle is push. */
10918 if (push_operand (operands[0], VOIDmode))
10919 push = 1;
10920 else
10921 gcc_assert (GET_CODE (operands[0]) != MEM
10922 || offsettable_memref_p (operands[0]));
10924 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10925 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10927 /* When emitting push, take care for source operands on the stack. */
10928 if (push && GET_CODE (operands[1]) == MEM
10929 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10931 if (nparts == 3)
10932 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10933 XEXP (part[1][2], 0));
10934 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10935 XEXP (part[1][1], 0));
10938 /* We need to do copy in the right order in case an address register
10939 of the source overlaps the destination. */
10940 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10942 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10943 collisions++;
10944 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10945 collisions++;
10946 if (nparts == 3
10947 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10948 collisions++;
10950 /* Collision in the middle part can be handled by reordering. */
10951 if (collisions == 1 && nparts == 3
10952 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10954 rtx tmp;
10955 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10956 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10959 /* If there are more collisions, we can't handle it by reordering.
10960 Do an lea to the last part and use only one colliding move. */
10961 else if (collisions > 1)
10963 rtx base;
10965 collisions = 1;
10967 base = part[0][nparts - 1];
10969 /* Handle the case when the last part isn't valid for lea.
10970 Happens in 64-bit mode storing the 12-byte XFmode. */
10971 if (GET_MODE (base) != Pmode)
10972 base = gen_rtx_REG (Pmode, REGNO (base));
10974 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10975 part[1][0] = replace_equiv_address (part[1][0], base);
10976 part[1][1] = replace_equiv_address (part[1][1],
10977 plus_constant (base, UNITS_PER_WORD));
10978 if (nparts == 3)
10979 part[1][2] = replace_equiv_address (part[1][2],
10980 plus_constant (base, 8));
10984 if (push)
10986 if (!TARGET_64BIT)
10988 if (nparts == 3)
10990 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10991 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10992 emit_move_insn (part[0][2], part[1][2]);
10995 else
10997 /* In 64bit mode we don't have 32bit push available. In case this is
10998 register, it is OK - we will just use larger counterpart. We also
10999 retype memory - these comes from attempt to avoid REX prefix on
11000 moving of second half of TFmode value. */
11001 if (GET_MODE (part[1][1]) == SImode)
11003 switch (GET_CODE (part[1][1]))
11005 case MEM:
11006 part[1][1] = adjust_address (part[1][1], DImode, 0);
11007 break;
11009 case REG:
11010 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11011 break;
11013 default:
11014 gcc_unreachable ();
11017 if (GET_MODE (part[1][0]) == SImode)
11018 part[1][0] = part[1][1];
11021 emit_move_insn (part[0][1], part[1][1]);
11022 emit_move_insn (part[0][0], part[1][0]);
11023 return;
11026 /* Choose correct order to not overwrite the source before it is copied. */
11027 if ((REG_P (part[0][0])
11028 && REG_P (part[1][1])
11029 && (REGNO (part[0][0]) == REGNO (part[1][1])
11030 || (nparts == 3
11031 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11032 || (collisions > 0
11033 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11035 if (nparts == 3)
11037 operands[2] = part[0][2];
11038 operands[3] = part[0][1];
11039 operands[4] = part[0][0];
11040 operands[5] = part[1][2];
11041 operands[6] = part[1][1];
11042 operands[7] = part[1][0];
11044 else
11046 operands[2] = part[0][1];
11047 operands[3] = part[0][0];
11048 operands[5] = part[1][1];
11049 operands[6] = part[1][0];
11052 else
11054 if (nparts == 3)
11056 operands[2] = part[0][0];
11057 operands[3] = part[0][1];
11058 operands[4] = part[0][2];
11059 operands[5] = part[1][0];
11060 operands[6] = part[1][1];
11061 operands[7] = part[1][2];
11063 else
11065 operands[2] = part[0][0];
11066 operands[3] = part[0][1];
11067 operands[5] = part[1][0];
11068 operands[6] = part[1][1];
11072 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11073 if (optimize_size)
11075 if (GET_CODE (operands[5]) == CONST_INT
11076 && operands[5] != const0_rtx
11077 && REG_P (operands[2]))
11079 if (GET_CODE (operands[6]) == CONST_INT
11080 && INTVAL (operands[6]) == INTVAL (operands[5]))
11081 operands[6] = operands[2];
11083 if (nparts == 3
11084 && GET_CODE (operands[7]) == CONST_INT
11085 && INTVAL (operands[7]) == INTVAL (operands[5]))
11086 operands[7] = operands[2];
11089 if (nparts == 3
11090 && GET_CODE (operands[6]) == CONST_INT
11091 && operands[6] != const0_rtx
11092 && REG_P (operands[3])
11093 && GET_CODE (operands[7]) == CONST_INT
11094 && INTVAL (operands[7]) == INTVAL (operands[6]))
11095 operands[7] = operands[3];
11098 emit_move_insn (operands[2], operands[5]);
11099 emit_move_insn (operands[3], operands[6]);
11100 if (nparts == 3)
11101 emit_move_insn (operands[4], operands[7]);
11103 return;
11106 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11107 left shift by a constant, either using a single shift or
11108 a sequence of add instructions. */
11110 static void
11111 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11113 if (count == 1)
11115 emit_insn ((mode == DImode
11116 ? gen_addsi3
11117 : gen_adddi3) (operand, operand, operand));
11119 else if (!optimize_size
11120 && count * ix86_cost->add <= ix86_cost->shift_const)
11122 int i;
11123 for (i=0; i<count; i++)
11125 emit_insn ((mode == DImode
11126 ? gen_addsi3
11127 : gen_adddi3) (operand, operand, operand));
11130 else
11131 emit_insn ((mode == DImode
11132 ? gen_ashlsi3
11133 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11136 void
11137 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11139 rtx low[2], high[2];
11140 int count;
11141 const int single_width = mode == DImode ? 32 : 64;
11143 if (GET_CODE (operands[2]) == CONST_INT)
11145 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11146 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11148 if (count >= single_width)
11150 emit_move_insn (high[0], low[1]);
11151 emit_move_insn (low[0], const0_rtx);
11153 if (count > single_width)
11154 ix86_expand_ashl_const (high[0], count - single_width, mode);
11156 else
11158 if (!rtx_equal_p (operands[0], operands[1]))
11159 emit_move_insn (operands[0], operands[1]);
11160 emit_insn ((mode == DImode
11161 ? gen_x86_shld_1
11162 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11163 ix86_expand_ashl_const (low[0], count, mode);
11165 return;
11168 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11170 if (operands[1] == const1_rtx)
11172 /* Assuming we've chosen a QImode capable registers, then 1 << N
11173 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11174 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11176 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11178 ix86_expand_clear (low[0]);
11179 ix86_expand_clear (high[0]);
11180 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11182 d = gen_lowpart (QImode, low[0]);
11183 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11184 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11185 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11187 d = gen_lowpart (QImode, high[0]);
11188 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11189 s = gen_rtx_NE (QImode, flags, const0_rtx);
11190 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11193 /* Otherwise, we can get the same results by manually performing
11194 a bit extract operation on bit 5/6, and then performing the two
11195 shifts. The two methods of getting 0/1 into low/high are exactly
11196 the same size. Avoiding the shift in the bit extract case helps
11197 pentium4 a bit; no one else seems to care much either way. */
11198 else
11200 rtx x;
11202 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11203 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
11204 else
11205 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
11206 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11208 emit_insn ((mode == DImode
11209 ? gen_lshrsi3
11210 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
11211 emit_insn ((mode == DImode
11212 ? gen_andsi3
11213 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
11214 emit_move_insn (low[0], high[0]);
11215 emit_insn ((mode == DImode
11216 ? gen_xorsi3
11217 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
11220 emit_insn ((mode == DImode
11221 ? gen_ashlsi3
11222 : gen_ashldi3) (low[0], low[0], operands[2]));
11223 emit_insn ((mode == DImode
11224 ? gen_ashlsi3
11225 : gen_ashldi3) (high[0], high[0], operands[2]));
11226 return;
11229 if (operands[1] == constm1_rtx)
11231 /* For -1 << N, we can avoid the shld instruction, because we
11232 know that we're shifting 0...31/63 ones into a -1. */
11233 emit_move_insn (low[0], constm1_rtx);
11234 if (optimize_size)
11235 emit_move_insn (high[0], low[0]);
11236 else
11237 emit_move_insn (high[0], constm1_rtx);
11239 else
11241 if (!rtx_equal_p (operands[0], operands[1]))
11242 emit_move_insn (operands[0], operands[1]);
11244 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11245 emit_insn ((mode == DImode
11246 ? gen_x86_shld_1
11247 : gen_x86_64_shld) (high[0], low[0], operands[2]));
11250 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
11252 if (TARGET_CMOVE && scratch)
11254 ix86_expand_clear (scratch);
11255 emit_insn ((mode == DImode
11256 ? gen_x86_shift_adj_1
11257 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
11259 else
11260 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11263 void
11264 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
11266 rtx low[2], high[2];
11267 int count;
11268 const int single_width = mode == DImode ? 32 : 64;
11270 if (GET_CODE (operands[2]) == CONST_INT)
11272 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11273 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11275 if (count == single_width * 2 - 1)
11277 emit_move_insn (high[0], high[1]);
11278 emit_insn ((mode == DImode
11279 ? gen_ashrsi3
11280 : gen_ashrdi3) (high[0], high[0],
11281 GEN_INT (single_width - 1)));
11282 emit_move_insn (low[0], high[0]);
11285 else if (count >= single_width)
11287 emit_move_insn (low[0], high[1]);
11288 emit_move_insn (high[0], low[0]);
11289 emit_insn ((mode == DImode
11290 ? gen_ashrsi3
11291 : gen_ashrdi3) (high[0], high[0],
11292 GEN_INT (single_width - 1)));
11293 if (count > single_width)
11294 emit_insn ((mode == DImode
11295 ? gen_ashrsi3
11296 : gen_ashrdi3) (low[0], low[0],
11297 GEN_INT (count - single_width)));
11299 else
11301 if (!rtx_equal_p (operands[0], operands[1]))
11302 emit_move_insn (operands[0], operands[1]);
11303 emit_insn ((mode == DImode
11304 ? gen_x86_shrd_1
11305 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11306 emit_insn ((mode == DImode
11307 ? gen_ashrsi3
11308 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
11311 else
11313 if (!rtx_equal_p (operands[0], operands[1]))
11314 emit_move_insn (operands[0], operands[1]);
11316 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11318 emit_insn ((mode == DImode
11319 ? gen_x86_shrd_1
11320 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11321 emit_insn ((mode == DImode
11322 ? gen_ashrsi3
11323 : gen_ashrdi3) (high[0], high[0], operands[2]));
11325 if (TARGET_CMOVE && scratch)
11327 emit_move_insn (scratch, high[0]);
11328 emit_insn ((mode == DImode
11329 ? gen_ashrsi3
11330 : gen_ashrdi3) (scratch, scratch,
11331 GEN_INT (single_width - 1)));
11332 emit_insn ((mode == DImode
11333 ? gen_x86_shift_adj_1
11334 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11335 scratch));
11337 else
11338 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11342 void
11343 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
11345 rtx low[2], high[2];
11346 int count;
11347 const int single_width = mode == DImode ? 32 : 64;
11349 if (GET_CODE (operands[2]) == CONST_INT)
11351 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11352 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11354 if (count >= single_width)
11356 emit_move_insn (low[0], high[1]);
11357 ix86_expand_clear (high[0]);
11359 if (count > single_width)
11360 emit_insn ((mode == DImode
11361 ? gen_lshrsi3
11362 : gen_lshrdi3) (low[0], low[0],
11363 GEN_INT (count - single_width)));
11365 else
11367 if (!rtx_equal_p (operands[0], operands[1]))
11368 emit_move_insn (operands[0], operands[1]);
11369 emit_insn ((mode == DImode
11370 ? gen_x86_shrd_1
11371 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11372 emit_insn ((mode == DImode
11373 ? gen_lshrsi3
11374 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
11377 else
11379 if (!rtx_equal_p (operands[0], operands[1]))
11380 emit_move_insn (operands[0], operands[1]);
11382 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11384 emit_insn ((mode == DImode
11385 ? gen_x86_shrd_1
11386 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11387 emit_insn ((mode == DImode
11388 ? gen_lshrsi3
11389 : gen_lshrdi3) (high[0], high[0], operands[2]));
11391 /* Heh. By reversing the arguments, we can reuse this pattern. */
11392 if (TARGET_CMOVE && scratch)
11394 ix86_expand_clear (scratch);
11395 emit_insn ((mode == DImode
11396 ? gen_x86_shift_adj_1
11397 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11398 scratch));
11400 else
11401 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11405 /* Helper function for the string operations below. Dest VARIABLE whether
11406 it is aligned to VALUE bytes. If true, jump to the label. */
11407 static rtx
11408 ix86_expand_aligntest (rtx variable, int value)
11410 rtx label = gen_label_rtx ();
11411 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11412 if (GET_MODE (variable) == DImode)
11413 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11414 else
11415 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11416 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11417 1, label);
11418 return label;
11421 /* Adjust COUNTER by the VALUE. */
11422 static void
11423 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11425 if (GET_MODE (countreg) == DImode)
11426 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11427 else
11428 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11431 /* Zero extend possibly SImode EXP to Pmode register. */
11433 ix86_zero_extend_to_Pmode (rtx exp)
11435 rtx r;
11436 if (GET_MODE (exp) == VOIDmode)
11437 return force_reg (Pmode, exp);
11438 if (GET_MODE (exp) == Pmode)
11439 return copy_to_mode_reg (Pmode, exp);
11440 r = gen_reg_rtx (Pmode);
11441 emit_insn (gen_zero_extendsidi2 (r, exp));
11442 return r;
11445 /* Expand string move (memcpy) operation. Use i386 string operations when
11446 profitable. expand_clrmem contains similar code. */
11448 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11450 rtx srcreg, destreg, countreg, srcexp, destexp;
11451 enum machine_mode counter_mode;
11452 HOST_WIDE_INT align = 0;
11453 unsigned HOST_WIDE_INT count = 0;
11455 if (GET_CODE (align_exp) == CONST_INT)
11456 align = INTVAL (align_exp);
11458 /* Can't use any of this if the user has appropriated esi or edi. */
11459 if (global_regs[4] || global_regs[5])
11460 return 0;
11462 /* This simple hack avoids all inlining code and simplifies code below. */
11463 if (!TARGET_ALIGN_STRINGOPS)
11464 align = 64;
11466 if (GET_CODE (count_exp) == CONST_INT)
11468 count = INTVAL (count_exp);
11469 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11470 return 0;
11473 /* Figure out proper mode for counter. For 32bits it is always SImode,
11474 for 64bits use SImode when possible, otherwise DImode.
11475 Set count to number of bytes copied when known at compile time. */
11476 if (!TARGET_64BIT
11477 || GET_MODE (count_exp) == SImode
11478 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11479 counter_mode = SImode;
11480 else
11481 counter_mode = DImode;
11483 gcc_assert (counter_mode == SImode || counter_mode == DImode);
11485 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11486 if (destreg != XEXP (dst, 0))
11487 dst = replace_equiv_address_nv (dst, destreg);
11488 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11489 if (srcreg != XEXP (src, 0))
11490 src = replace_equiv_address_nv (src, srcreg);
11492 /* When optimizing for size emit simple rep ; movsb instruction for
11493 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11494 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11495 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11496 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11497 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11498 known to be zero or not. The rep; movsb sequence causes higher
11499 register pressure though, so take that into account. */
11501 if ((!optimize || optimize_size)
11502 && (count == 0
11503 || ((count & 0x03)
11504 && (!optimize_size
11505 || count > 5 * 4
11506 || (count & 3) + count / 4 > 6))))
11508 emit_insn (gen_cld ());
11509 countreg = ix86_zero_extend_to_Pmode (count_exp);
11510 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11511 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11512 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11513 destexp, srcexp));
11516 /* For constant aligned (or small unaligned) copies use rep movsl
11517 followed by code copying the rest. For PentiumPro ensure 8 byte
11518 alignment to allow rep movsl acceleration. */
11520 else if (count != 0
11521 && (align >= 8
11522 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11523 || optimize_size || count < (unsigned int) 64))
11525 unsigned HOST_WIDE_INT offset = 0;
11526 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11527 rtx srcmem, dstmem;
11529 emit_insn (gen_cld ());
11530 if (count & ~(size - 1))
11532 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
11534 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
11536 while (offset < (count & ~(size - 1)))
11538 srcmem = adjust_automodify_address_nv (src, movs_mode,
11539 srcreg, offset);
11540 dstmem = adjust_automodify_address_nv (dst, movs_mode,
11541 destreg, offset);
11542 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11543 offset += size;
11546 else
11548 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
11549 & (TARGET_64BIT ? -1 : 0x3fffffff));
11550 countreg = copy_to_mode_reg (counter_mode, countreg);
11551 countreg = ix86_zero_extend_to_Pmode (countreg);
11553 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11554 GEN_INT (size == 4 ? 2 : 3));
11555 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11556 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11558 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11559 countreg, destexp, srcexp));
11560 offset = count & ~(size - 1);
11563 if (size == 8 && (count & 0x04))
11565 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11566 offset);
11567 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11568 offset);
11569 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11570 offset += 4;
11572 if (count & 0x02)
11574 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11575 offset);
11576 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11577 offset);
11578 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11579 offset += 2;
11581 if (count & 0x01)
11583 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11584 offset);
11585 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11586 offset);
11587 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11590 /* The generic code based on the glibc implementation:
11591 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11592 allowing accelerated copying there)
11593 - copy the data using rep movsl
11594 - copy the rest. */
11595 else
11597 rtx countreg2;
11598 rtx label = NULL;
11599 rtx srcmem, dstmem;
11600 int desired_alignment = (TARGET_PENTIUMPRO
11601 && (count == 0 || count >= (unsigned int) 260)
11602 ? 8 : UNITS_PER_WORD);
11603 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11604 dst = change_address (dst, BLKmode, destreg);
11605 src = change_address (src, BLKmode, srcreg);
11607 /* In case we don't know anything about the alignment, default to
11608 library version, since it is usually equally fast and result in
11609 shorter code.
11611 Also emit call when we know that the count is large and call overhead
11612 will not be important. */
11613 if (!TARGET_INLINE_ALL_STRINGOPS
11614 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11615 return 0;
11617 if (TARGET_SINGLE_STRINGOP)
11618 emit_insn (gen_cld ());
11620 countreg2 = gen_reg_rtx (Pmode);
11621 countreg = copy_to_mode_reg (counter_mode, count_exp);
11623 /* We don't use loops to align destination and to copy parts smaller
11624 than 4 bytes, because gcc is able to optimize such code better (in
11625 the case the destination or the count really is aligned, gcc is often
11626 able to predict the branches) and also it is friendlier to the
11627 hardware branch prediction.
11629 Using loops is beneficial for generic case, because we can
11630 handle small counts using the loops. Many CPUs (such as Athlon)
11631 have large REP prefix setup costs.
11633 This is quite costly. Maybe we can revisit this decision later or
11634 add some customizability to this code. */
11636 if (count == 0 && align < desired_alignment)
11638 label = gen_label_rtx ();
11639 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11640 LEU, 0, counter_mode, 1, label);
11642 if (align <= 1)
11644 rtx label = ix86_expand_aligntest (destreg, 1);
11645 srcmem = change_address (src, QImode, srcreg);
11646 dstmem = change_address (dst, QImode, destreg);
11647 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11648 ix86_adjust_counter (countreg, 1);
11649 emit_label (label);
11650 LABEL_NUSES (label) = 1;
11652 if (align <= 2)
11654 rtx label = ix86_expand_aligntest (destreg, 2);
11655 srcmem = change_address (src, HImode, srcreg);
11656 dstmem = change_address (dst, HImode, destreg);
11657 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11658 ix86_adjust_counter (countreg, 2);
11659 emit_label (label);
11660 LABEL_NUSES (label) = 1;
11662 if (align <= 4 && desired_alignment > 4)
11664 rtx label = ix86_expand_aligntest (destreg, 4);
11665 srcmem = change_address (src, SImode, srcreg);
11666 dstmem = change_address (dst, SImode, destreg);
11667 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11668 ix86_adjust_counter (countreg, 4);
11669 emit_label (label);
11670 LABEL_NUSES (label) = 1;
11673 if (label && desired_alignment > 4 && !TARGET_64BIT)
11675 emit_label (label);
11676 LABEL_NUSES (label) = 1;
11677 label = NULL_RTX;
11679 if (!TARGET_SINGLE_STRINGOP)
11680 emit_insn (gen_cld ());
11681 if (TARGET_64BIT)
11683 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11684 GEN_INT (3)));
11685 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11687 else
11689 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11690 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11692 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11693 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11694 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11695 countreg2, destexp, srcexp));
11697 if (label)
11699 emit_label (label);
11700 LABEL_NUSES (label) = 1;
11702 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11704 srcmem = change_address (src, SImode, srcreg);
11705 dstmem = change_address (dst, SImode, destreg);
11706 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11708 if ((align <= 4 || count == 0) && TARGET_64BIT)
11710 rtx label = ix86_expand_aligntest (countreg, 4);
11711 srcmem = change_address (src, SImode, srcreg);
11712 dstmem = change_address (dst, SImode, destreg);
11713 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11714 emit_label (label);
11715 LABEL_NUSES (label) = 1;
11717 if (align > 2 && count != 0 && (count & 2))
11719 srcmem = change_address (src, HImode, srcreg);
11720 dstmem = change_address (dst, HImode, destreg);
11721 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11723 if (align <= 2 || count == 0)
11725 rtx label = ix86_expand_aligntest (countreg, 2);
11726 srcmem = change_address (src, HImode, srcreg);
11727 dstmem = change_address (dst, HImode, destreg);
11728 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11729 emit_label (label);
11730 LABEL_NUSES (label) = 1;
11732 if (align > 1 && count != 0 && (count & 1))
11734 srcmem = change_address (src, QImode, srcreg);
11735 dstmem = change_address (dst, QImode, destreg);
11736 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11738 if (align <= 1 || count == 0)
11740 rtx label = ix86_expand_aligntest (countreg, 1);
11741 srcmem = change_address (src, QImode, srcreg);
11742 dstmem = change_address (dst, QImode, destreg);
11743 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11744 emit_label (label);
11745 LABEL_NUSES (label) = 1;
11749 return 1;
11752 /* Expand string clear operation (bzero). Use i386 string operations when
11753 profitable. expand_movmem contains similar code. */
11755 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11757 rtx destreg, zeroreg, countreg, destexp;
11758 enum machine_mode counter_mode;
11759 HOST_WIDE_INT align = 0;
11760 unsigned HOST_WIDE_INT count = 0;
11762 if (GET_CODE (align_exp) == CONST_INT)
11763 align = INTVAL (align_exp);
11765 /* Can't use any of this if the user has appropriated esi. */
11766 if (global_regs[4])
11767 return 0;
11769 /* This simple hack avoids all inlining code and simplifies code below. */
11770 if (!TARGET_ALIGN_STRINGOPS)
11771 align = 32;
11773 if (GET_CODE (count_exp) == CONST_INT)
11775 count = INTVAL (count_exp);
11776 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11777 return 0;
11779 /* Figure out proper mode for counter. For 32bits it is always SImode,
11780 for 64bits use SImode when possible, otherwise DImode.
11781 Set count to number of bytes copied when known at compile time. */
11782 if (!TARGET_64BIT
11783 || GET_MODE (count_exp) == SImode
11784 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11785 counter_mode = SImode;
11786 else
11787 counter_mode = DImode;
11789 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11790 if (destreg != XEXP (dst, 0))
11791 dst = replace_equiv_address_nv (dst, destreg);
11794 /* When optimizing for size emit simple rep ; movsb instruction for
11795 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11796 sequence is 7 bytes long, so if optimizing for size and count is
11797 small enough that some stosl, stosw and stosb instructions without
11798 rep are shorter, fall back into the next if. */
11800 if ((!optimize || optimize_size)
11801 && (count == 0
11802 || ((count & 0x03)
11803 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11805 emit_insn (gen_cld ());
11807 countreg = ix86_zero_extend_to_Pmode (count_exp);
11808 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11809 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11810 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11812 else if (count != 0
11813 && (align >= 8
11814 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11815 || optimize_size || count < (unsigned int) 64))
11817 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11818 unsigned HOST_WIDE_INT offset = 0;
11820 emit_insn (gen_cld ());
11822 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11823 if (count & ~(size - 1))
11825 unsigned HOST_WIDE_INT repcount;
11826 unsigned int max_nonrep;
11828 repcount = count >> (size == 4 ? 2 : 3);
11829 if (!TARGET_64BIT)
11830 repcount &= 0x3fffffff;
11832 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11833 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11834 bytes. In both cases the latter seems to be faster for small
11835 values of N. */
11836 max_nonrep = size == 4 ? 7 : 4;
11837 if (!optimize_size)
11838 switch (ix86_tune)
11840 case PROCESSOR_PENTIUM4:
11841 case PROCESSOR_NOCONA:
11842 max_nonrep = 3;
11843 break;
11844 default:
11845 break;
11848 if (repcount <= max_nonrep)
11849 while (repcount-- > 0)
11851 rtx mem = adjust_automodify_address_nv (dst,
11852 GET_MODE (zeroreg),
11853 destreg, offset);
11854 emit_insn (gen_strset (destreg, mem, zeroreg));
11855 offset += size;
11857 else
11859 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11860 countreg = ix86_zero_extend_to_Pmode (countreg);
11861 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11862 GEN_INT (size == 4 ? 2 : 3));
11863 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11864 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11865 destexp));
11866 offset = count & ~(size - 1);
11869 if (size == 8 && (count & 0x04))
11871 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11872 offset);
11873 emit_insn (gen_strset (destreg, mem,
11874 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11875 offset += 4;
11877 if (count & 0x02)
11879 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11880 offset);
11881 emit_insn (gen_strset (destreg, mem,
11882 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11883 offset += 2;
11885 if (count & 0x01)
11887 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11888 offset);
11889 emit_insn (gen_strset (destreg, mem,
11890 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11893 else
11895 rtx countreg2;
11896 rtx label = NULL;
11897 /* Compute desired alignment of the string operation. */
11898 int desired_alignment = (TARGET_PENTIUMPRO
11899 && (count == 0 || count >= (unsigned int) 260)
11900 ? 8 : UNITS_PER_WORD);
11902 /* In case we don't know anything about the alignment, default to
11903 library version, since it is usually equally fast and result in
11904 shorter code.
11906 Also emit call when we know that the count is large and call overhead
11907 will not be important. */
11908 if (!TARGET_INLINE_ALL_STRINGOPS
11909 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11910 return 0;
11912 if (TARGET_SINGLE_STRINGOP)
11913 emit_insn (gen_cld ());
11915 countreg2 = gen_reg_rtx (Pmode);
11916 countreg = copy_to_mode_reg (counter_mode, count_exp);
11917 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11918 /* Get rid of MEM_OFFSET, it won't be accurate. */
11919 dst = change_address (dst, BLKmode, destreg);
11921 if (count == 0 && align < desired_alignment)
11923 label = gen_label_rtx ();
11924 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11925 LEU, 0, counter_mode, 1, label);
11927 if (align <= 1)
11929 rtx label = ix86_expand_aligntest (destreg, 1);
11930 emit_insn (gen_strset (destreg, dst,
11931 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11932 ix86_adjust_counter (countreg, 1);
11933 emit_label (label);
11934 LABEL_NUSES (label) = 1;
11936 if (align <= 2)
11938 rtx label = ix86_expand_aligntest (destreg, 2);
11939 emit_insn (gen_strset (destreg, dst,
11940 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11941 ix86_adjust_counter (countreg, 2);
11942 emit_label (label);
11943 LABEL_NUSES (label) = 1;
11945 if (align <= 4 && desired_alignment > 4)
11947 rtx label = ix86_expand_aligntest (destreg, 4);
11948 emit_insn (gen_strset (destreg, dst,
11949 (TARGET_64BIT
11950 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11951 : zeroreg)));
11952 ix86_adjust_counter (countreg, 4);
11953 emit_label (label);
11954 LABEL_NUSES (label) = 1;
11957 if (label && desired_alignment > 4 && !TARGET_64BIT)
11959 emit_label (label);
11960 LABEL_NUSES (label) = 1;
11961 label = NULL_RTX;
11964 if (!TARGET_SINGLE_STRINGOP)
11965 emit_insn (gen_cld ());
11966 if (TARGET_64BIT)
11968 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11969 GEN_INT (3)));
11970 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11972 else
11974 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11975 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11977 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11978 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11980 if (label)
11982 emit_label (label);
11983 LABEL_NUSES (label) = 1;
11986 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11987 emit_insn (gen_strset (destreg, dst,
11988 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11989 if (TARGET_64BIT && (align <= 4 || count == 0))
11991 rtx label = ix86_expand_aligntest (countreg, 4);
11992 emit_insn (gen_strset (destreg, dst,
11993 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11994 emit_label (label);
11995 LABEL_NUSES (label) = 1;
11997 if (align > 2 && count != 0 && (count & 2))
11998 emit_insn (gen_strset (destreg, dst,
11999 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12000 if (align <= 2 || count == 0)
12002 rtx label = ix86_expand_aligntest (countreg, 2);
12003 emit_insn (gen_strset (destreg, dst,
12004 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12005 emit_label (label);
12006 LABEL_NUSES (label) = 1;
12008 if (align > 1 && count != 0 && (count & 1))
12009 emit_insn (gen_strset (destreg, dst,
12010 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12011 if (align <= 1 || count == 0)
12013 rtx label = ix86_expand_aligntest (countreg, 1);
12014 emit_insn (gen_strset (destreg, dst,
12015 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12016 emit_label (label);
12017 LABEL_NUSES (label) = 1;
12020 return 1;
12023 /* Expand strlen. */
12025 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12027 rtx addr, scratch1, scratch2, scratch3, scratch4;
12029 /* The generic case of strlen expander is long. Avoid it's
12030 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12032 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12033 && !TARGET_INLINE_ALL_STRINGOPS
12034 && !optimize_size
12035 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12036 return 0;
12038 addr = force_reg (Pmode, XEXP (src, 0));
12039 scratch1 = gen_reg_rtx (Pmode);
12041 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12042 && !optimize_size)
12044 /* Well it seems that some optimizer does not combine a call like
12045 foo(strlen(bar), strlen(bar));
12046 when the move and the subtraction is done here. It does calculate
12047 the length just once when these instructions are done inside of
12048 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12049 often used and I use one fewer register for the lifetime of
12050 output_strlen_unroll() this is better. */
12052 emit_move_insn (out, addr);
12054 ix86_expand_strlensi_unroll_1 (out, src, align);
12056 /* strlensi_unroll_1 returns the address of the zero at the end of
12057 the string, like memchr(), so compute the length by subtracting
12058 the start address. */
12059 if (TARGET_64BIT)
12060 emit_insn (gen_subdi3 (out, out, addr));
12061 else
12062 emit_insn (gen_subsi3 (out, out, addr));
12064 else
12066 rtx unspec;
12067 scratch2 = gen_reg_rtx (Pmode);
12068 scratch3 = gen_reg_rtx (Pmode);
12069 scratch4 = force_reg (Pmode, constm1_rtx);
12071 emit_move_insn (scratch3, addr);
12072 eoschar = force_reg (QImode, eoschar);
12074 emit_insn (gen_cld ());
12075 src = replace_equiv_address_nv (src, scratch3);
12077 /* If .md starts supporting :P, this can be done in .md. */
12078 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12079 scratch4), UNSPEC_SCAS);
12080 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12081 if (TARGET_64BIT)
12083 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12084 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12086 else
12088 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12089 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12092 return 1;
12095 /* Expand the appropriate insns for doing strlen if not just doing
12096 repnz; scasb
12098 out = result, initialized with the start address
12099 align_rtx = alignment of the address.
12100 scratch = scratch register, initialized with the startaddress when
12101 not aligned, otherwise undefined
12103 This is just the body. It needs the initializations mentioned above and
12104 some address computing at the end. These things are done in i386.md. */
12106 static void
12107 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12109 int align;
12110 rtx tmp;
12111 rtx align_2_label = NULL_RTX;
12112 rtx align_3_label = NULL_RTX;
12113 rtx align_4_label = gen_label_rtx ();
12114 rtx end_0_label = gen_label_rtx ();
12115 rtx mem;
12116 rtx tmpreg = gen_reg_rtx (SImode);
12117 rtx scratch = gen_reg_rtx (SImode);
12118 rtx cmp;
12120 align = 0;
12121 if (GET_CODE (align_rtx) == CONST_INT)
12122 align = INTVAL (align_rtx);
12124 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12126 /* Is there a known alignment and is it less than 4? */
12127 if (align < 4)
12129 rtx scratch1 = gen_reg_rtx (Pmode);
12130 emit_move_insn (scratch1, out);
12131 /* Is there a known alignment and is it not 2? */
12132 if (align != 2)
12134 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12135 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12137 /* Leave just the 3 lower bits. */
12138 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12139 NULL_RTX, 0, OPTAB_WIDEN);
12141 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12142 Pmode, 1, align_4_label);
12143 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12144 Pmode, 1, align_2_label);
12145 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12146 Pmode, 1, align_3_label);
12148 else
12150 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12151 check if is aligned to 4 - byte. */
12153 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12154 NULL_RTX, 0, OPTAB_WIDEN);
12156 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12157 Pmode, 1, align_4_label);
12160 mem = change_address (src, QImode, out);
12162 /* Now compare the bytes. */
12164 /* Compare the first n unaligned byte on a byte per byte basis. */
12165 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12166 QImode, 1, end_0_label);
12168 /* Increment the address. */
12169 if (TARGET_64BIT)
12170 emit_insn (gen_adddi3 (out, out, const1_rtx));
12171 else
12172 emit_insn (gen_addsi3 (out, out, const1_rtx));
12174 /* Not needed with an alignment of 2 */
12175 if (align != 2)
12177 emit_label (align_2_label);
12179 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12180 end_0_label);
12182 if (TARGET_64BIT)
12183 emit_insn (gen_adddi3 (out, out, const1_rtx));
12184 else
12185 emit_insn (gen_addsi3 (out, out, const1_rtx));
12187 emit_label (align_3_label);
12190 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12191 end_0_label);
12193 if (TARGET_64BIT)
12194 emit_insn (gen_adddi3 (out, out, const1_rtx));
12195 else
12196 emit_insn (gen_addsi3 (out, out, const1_rtx));
12199 /* Generate loop to check 4 bytes at a time. It is not a good idea to
12200 align this loop. It gives only huge programs, but does not help to
12201 speed up. */
12202 emit_label (align_4_label);
12204 mem = change_address (src, SImode, out);
12205 emit_move_insn (scratch, mem);
12206 if (TARGET_64BIT)
12207 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12208 else
12209 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12211 /* This formula yields a nonzero result iff one of the bytes is zero.
12212 This saves three branches inside loop and many cycles. */
12214 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12215 emit_insn (gen_one_cmplsi2 (scratch, scratch));
12216 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12217 emit_insn (gen_andsi3 (tmpreg, tmpreg,
12218 gen_int_mode (0x80808080, SImode)));
12219 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12220 align_4_label);
12222 if (TARGET_CMOVE)
12224 rtx reg = gen_reg_rtx (SImode);
12225 rtx reg2 = gen_reg_rtx (Pmode);
12226 emit_move_insn (reg, tmpreg);
12227 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12229 /* If zero is not in the first two bytes, move two bytes forward. */
12230 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12231 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12232 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12233 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12234 gen_rtx_IF_THEN_ELSE (SImode, tmp,
12235 reg,
12236 tmpreg)));
12237 /* Emit lea manually to avoid clobbering of flags. */
12238 emit_insn (gen_rtx_SET (SImode, reg2,
12239 gen_rtx_PLUS (Pmode, out, const2_rtx)));
12241 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12242 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12243 emit_insn (gen_rtx_SET (VOIDmode, out,
12244 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12245 reg2,
12246 out)));
12249 else
12251 rtx end_2_label = gen_label_rtx ();
12252 /* Is zero in the first two bytes? */
12254 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12255 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12256 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12257 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12258 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12259 pc_rtx);
12260 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12261 JUMP_LABEL (tmp) = end_2_label;
12263 /* Not in the first two. Move two bytes forward. */
12264 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12265 if (TARGET_64BIT)
12266 emit_insn (gen_adddi3 (out, out, const2_rtx));
12267 else
12268 emit_insn (gen_addsi3 (out, out, const2_rtx));
12270 emit_label (end_2_label);
12274 /* Avoid branch in fixing the byte. */
12275 tmpreg = gen_lowpart (QImode, tmpreg);
12276 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12277 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12278 if (TARGET_64BIT)
12279 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12280 else
12281 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12283 emit_label (end_0_label);
12286 void
12287 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12288 rtx callarg2 ATTRIBUTE_UNUSED,
12289 rtx pop, int sibcall)
12291 rtx use = NULL, call;
12293 if (pop == const0_rtx)
12294 pop = NULL;
12295 gcc_assert (!TARGET_64BIT || !pop);
12297 #if TARGET_MACHO
12298 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12299 fnaddr = machopic_indirect_call_target (fnaddr);
12300 #else
12301 /* Static functions and indirect calls don't need the pic register. */
12302 if (! TARGET_64BIT && flag_pic
12303 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12304 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12305 use_reg (&use, pic_offset_table_rtx);
12307 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12309 rtx al = gen_rtx_REG (QImode, 0);
12310 emit_move_insn (al, callarg2);
12311 use_reg (&use, al);
12313 #endif /* TARGET_MACHO */
12315 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12317 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12318 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12320 if (sibcall && TARGET_64BIT
12321 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12323 rtx addr;
12324 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12325 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12326 emit_move_insn (fnaddr, addr);
12327 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12330 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12331 if (retval)
12332 call = gen_rtx_SET (VOIDmode, retval, call);
12333 if (pop)
12335 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12336 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12337 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12340 call = emit_call_insn (call);
12341 if (use)
12342 CALL_INSN_FUNCTION_USAGE (call) = use;
12346 /* Clear stack slot assignments remembered from previous functions.
12347 This is called from INIT_EXPANDERS once before RTL is emitted for each
12348 function. */
12350 static struct machine_function *
12351 ix86_init_machine_status (void)
12353 struct machine_function *f;
12355 f = ggc_alloc_cleared (sizeof (struct machine_function));
12356 f->use_fast_prologue_epilogue_nregs = -1;
12358 return f;
12361 /* Return a MEM corresponding to a stack slot with mode MODE.
12362 Allocate a new slot if necessary.
12364 The RTL for a function can have several slots available: N is
12365 which slot to use. */
12368 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
12370 struct stack_local_entry *s;
12372 gcc_assert (n < MAX_386_STACK_LOCALS);
12374 for (s = ix86_stack_locals; s; s = s->next)
12375 if (s->mode == mode && s->n == n)
12376 return s->rtl;
12378 s = (struct stack_local_entry *)
12379 ggc_alloc (sizeof (struct stack_local_entry));
12380 s->n = n;
12381 s->mode = mode;
12382 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12384 s->next = ix86_stack_locals;
12385 ix86_stack_locals = s;
12386 return s->rtl;
12389 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12391 static GTY(()) rtx ix86_tls_symbol;
12393 ix86_tls_get_addr (void)
12396 if (!ix86_tls_symbol)
12398 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12399 (TARGET_GNU_TLS && !TARGET_64BIT)
12400 ? "___tls_get_addr"
12401 : "__tls_get_addr");
12404 return ix86_tls_symbol;
12407 /* Calculate the length of the memory address in the instruction
12408 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12411 memory_address_length (rtx addr)
12413 struct ix86_address parts;
12414 rtx base, index, disp;
12415 int len;
12416 int ok;
12418 if (GET_CODE (addr) == PRE_DEC
12419 || GET_CODE (addr) == POST_INC
12420 || GET_CODE (addr) == PRE_MODIFY
12421 || GET_CODE (addr) == POST_MODIFY)
12422 return 0;
12424 ok = ix86_decompose_address (addr, &parts);
12425 gcc_assert (ok);
12427 if (parts.base && GET_CODE (parts.base) == SUBREG)
12428 parts.base = SUBREG_REG (parts.base);
12429 if (parts.index && GET_CODE (parts.index) == SUBREG)
12430 parts.index = SUBREG_REG (parts.index);
12432 base = parts.base;
12433 index = parts.index;
12434 disp = parts.disp;
12435 len = 0;
12437 /* Rule of thumb:
12438 - esp as the base always wants an index,
12439 - ebp as the base always wants a displacement. */
12441 /* Register Indirect. */
12442 if (base && !index && !disp)
12444 /* esp (for its index) and ebp (for its displacement) need
12445 the two-byte modrm form. */
12446 if (addr == stack_pointer_rtx
12447 || addr == arg_pointer_rtx
12448 || addr == frame_pointer_rtx
12449 || addr == hard_frame_pointer_rtx)
12450 len = 1;
12453 /* Direct Addressing. */
12454 else if (disp && !base && !index)
12455 len = 4;
12457 else
12459 /* Find the length of the displacement constant. */
12460 if (disp)
12462 if (GET_CODE (disp) == CONST_INT
12463 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12464 && base)
12465 len = 1;
12466 else
12467 len = 4;
12469 /* ebp always wants a displacement. */
12470 else if (base == hard_frame_pointer_rtx)
12471 len = 1;
12473 /* An index requires the two-byte modrm form.... */
12474 if (index
12475 /* ...like esp, which always wants an index. */
12476 || base == stack_pointer_rtx
12477 || base == arg_pointer_rtx
12478 || base == frame_pointer_rtx)
12479 len += 1;
12482 return len;
12485 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12486 is set, expect that insn have 8bit immediate alternative. */
12488 ix86_attr_length_immediate_default (rtx insn, int shortform)
12490 int len = 0;
12491 int i;
12492 extract_insn_cached (insn);
12493 for (i = recog_data.n_operands - 1; i >= 0; --i)
12494 if (CONSTANT_P (recog_data.operand[i]))
12496 gcc_assert (!len);
12497 if (shortform
12498 && GET_CODE (recog_data.operand[i]) == CONST_INT
12499 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12500 len = 1;
12501 else
12503 switch (get_attr_mode (insn))
12505 case MODE_QI:
12506 len+=1;
12507 break;
12508 case MODE_HI:
12509 len+=2;
12510 break;
12511 case MODE_SI:
12512 len+=4;
12513 break;
12514 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12515 case MODE_DI:
12516 len+=4;
12517 break;
12518 default:
12519 fatal_insn ("unknown insn mode", insn);
12523 return len;
12525 /* Compute default value for "length_address" attribute. */
12527 ix86_attr_length_address_default (rtx insn)
12529 int i;
12531 if (get_attr_type (insn) == TYPE_LEA)
12533 rtx set = PATTERN (insn);
12535 if (GET_CODE (set) == PARALLEL)
12536 set = XVECEXP (set, 0, 0);
12538 gcc_assert (GET_CODE (set) == SET);
12540 return memory_address_length (SET_SRC (set));
12543 extract_insn_cached (insn);
12544 for (i = recog_data.n_operands - 1; i >= 0; --i)
12545 if (GET_CODE (recog_data.operand[i]) == MEM)
12547 return memory_address_length (XEXP (recog_data.operand[i], 0));
12548 break;
12550 return 0;
12553 /* Return the maximum number of instructions a cpu can issue. */
12555 static int
12556 ix86_issue_rate (void)
12558 switch (ix86_tune)
12560 case PROCESSOR_PENTIUM:
12561 case PROCESSOR_K6:
12562 return 2;
12564 case PROCESSOR_PENTIUMPRO:
12565 case PROCESSOR_PENTIUM4:
12566 case PROCESSOR_ATHLON:
12567 case PROCESSOR_K8:
12568 case PROCESSOR_NOCONA:
12569 return 3;
12571 default:
12572 return 1;
12576 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12577 by DEP_INSN and nothing set by DEP_INSN. */
12579 static int
12580 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12582 rtx set, set2;
12584 /* Simplify the test for uninteresting insns. */
12585 if (insn_type != TYPE_SETCC
12586 && insn_type != TYPE_ICMOV
12587 && insn_type != TYPE_FCMOV
12588 && insn_type != TYPE_IBR)
12589 return 0;
12591 if ((set = single_set (dep_insn)) != 0)
12593 set = SET_DEST (set);
12594 set2 = NULL_RTX;
12596 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12597 && XVECLEN (PATTERN (dep_insn), 0) == 2
12598 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12599 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12601 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12602 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12604 else
12605 return 0;
12607 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12608 return 0;
12610 /* This test is true if the dependent insn reads the flags but
12611 not any other potentially set register. */
12612 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12613 return 0;
12615 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12616 return 0;
12618 return 1;
12621 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12622 address with operands set by DEP_INSN. */
12624 static int
12625 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12627 rtx addr;
12629 if (insn_type == TYPE_LEA
12630 && TARGET_PENTIUM)
12632 addr = PATTERN (insn);
12634 if (GET_CODE (addr) == PARALLEL)
12635 addr = XVECEXP (addr, 0, 0);
12637 gcc_assert (GET_CODE (addr) == SET);
12639 addr = SET_SRC (addr);
12641 else
12643 int i;
12644 extract_insn_cached (insn);
12645 for (i = recog_data.n_operands - 1; i >= 0; --i)
12646 if (GET_CODE (recog_data.operand[i]) == MEM)
12648 addr = XEXP (recog_data.operand[i], 0);
12649 goto found;
12651 return 0;
12652 found:;
12655 return modified_in_p (addr, dep_insn);
12658 static int
12659 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12661 enum attr_type insn_type, dep_insn_type;
12662 enum attr_memory memory;
12663 rtx set, set2;
12664 int dep_insn_code_number;
12666 /* Anti and output dependencies have zero cost on all CPUs. */
12667 if (REG_NOTE_KIND (link) != 0)
12668 return 0;
12670 dep_insn_code_number = recog_memoized (dep_insn);
12672 /* If we can't recognize the insns, we can't really do anything. */
12673 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12674 return cost;
12676 insn_type = get_attr_type (insn);
12677 dep_insn_type = get_attr_type (dep_insn);
12679 switch (ix86_tune)
12681 case PROCESSOR_PENTIUM:
12682 /* Address Generation Interlock adds a cycle of latency. */
12683 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12684 cost += 1;
12686 /* ??? Compares pair with jump/setcc. */
12687 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12688 cost = 0;
12690 /* Floating point stores require value to be ready one cycle earlier. */
12691 if (insn_type == TYPE_FMOV
12692 && get_attr_memory (insn) == MEMORY_STORE
12693 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12694 cost += 1;
12695 break;
12697 case PROCESSOR_PENTIUMPRO:
12698 memory = get_attr_memory (insn);
12700 /* INT->FP conversion is expensive. */
12701 if (get_attr_fp_int_src (dep_insn))
12702 cost += 5;
12704 /* There is one cycle extra latency between an FP op and a store. */
12705 if (insn_type == TYPE_FMOV
12706 && (set = single_set (dep_insn)) != NULL_RTX
12707 && (set2 = single_set (insn)) != NULL_RTX
12708 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12709 && GET_CODE (SET_DEST (set2)) == MEM)
12710 cost += 1;
12712 /* Show ability of reorder buffer to hide latency of load by executing
12713 in parallel with previous instruction in case
12714 previous instruction is not needed to compute the address. */
12715 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12716 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12718 /* Claim moves to take one cycle, as core can issue one load
12719 at time and the next load can start cycle later. */
12720 if (dep_insn_type == TYPE_IMOV
12721 || dep_insn_type == TYPE_FMOV)
12722 cost = 1;
12723 else if (cost > 1)
12724 cost--;
12726 break;
12728 case PROCESSOR_K6:
12729 memory = get_attr_memory (insn);
12731 /* The esp dependency is resolved before the instruction is really
12732 finished. */
12733 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12734 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12735 return 1;
12737 /* INT->FP conversion is expensive. */
12738 if (get_attr_fp_int_src (dep_insn))
12739 cost += 5;
12741 /* Show ability of reorder buffer to hide latency of load by executing
12742 in parallel with previous instruction in case
12743 previous instruction is not needed to compute the address. */
12744 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12745 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12747 /* Claim moves to take one cycle, as core can issue one load
12748 at time and the next load can start cycle later. */
12749 if (dep_insn_type == TYPE_IMOV
12750 || dep_insn_type == TYPE_FMOV)
12751 cost = 1;
12752 else if (cost > 2)
12753 cost -= 2;
12754 else
12755 cost = 1;
12757 break;
12759 case PROCESSOR_ATHLON:
12760 case PROCESSOR_K8:
12761 memory = get_attr_memory (insn);
12763 /* Show ability of reorder buffer to hide latency of load by executing
12764 in parallel with previous instruction in case
12765 previous instruction is not needed to compute the address. */
12766 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12767 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12769 enum attr_unit unit = get_attr_unit (insn);
12770 int loadcost = 3;
12772 /* Because of the difference between the length of integer and
12773 floating unit pipeline preparation stages, the memory operands
12774 for floating point are cheaper.
12776 ??? For Athlon it the difference is most probably 2. */
12777 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12778 loadcost = 3;
12779 else
12780 loadcost = TARGET_ATHLON ? 2 : 0;
12782 if (cost >= loadcost)
12783 cost -= loadcost;
12784 else
12785 cost = 0;
12788 default:
12789 break;
12792 return cost;
12795 /* How many alternative schedules to try. This should be as wide as the
12796 scheduling freedom in the DFA, but no wider. Making this value too
12797 large results extra work for the scheduler. */
12799 static int
12800 ia32_multipass_dfa_lookahead (void)
12802 if (ix86_tune == PROCESSOR_PENTIUM)
12803 return 2;
12805 if (ix86_tune == PROCESSOR_PENTIUMPRO
12806 || ix86_tune == PROCESSOR_K6)
12807 return 1;
12809 else
12810 return 0;
12814 /* Compute the alignment given to a constant that is being placed in memory.
12815 EXP is the constant and ALIGN is the alignment that the object would
12816 ordinarily have.
12817 The value of this function is used instead of that alignment to align
12818 the object. */
12821 ix86_constant_alignment (tree exp, int align)
12823 if (TREE_CODE (exp) == REAL_CST)
12825 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12826 return 64;
12827 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12828 return 128;
12830 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12831 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12832 return BITS_PER_WORD;
12834 return align;
12837 /* Compute the alignment for a static variable.
12838 TYPE is the data type, and ALIGN is the alignment that
12839 the object would ordinarily have. The value of this function is used
12840 instead of that alignment to align the object. */
12843 ix86_data_alignment (tree type, int align)
12845 if (AGGREGATE_TYPE_P (type)
12846 && TYPE_SIZE (type)
12847 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12848 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12849 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12850 return 256;
12852 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12853 to 16byte boundary. */
12854 if (TARGET_64BIT)
12856 if (AGGREGATE_TYPE_P (type)
12857 && TYPE_SIZE (type)
12858 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12859 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12860 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12861 return 128;
12864 if (TREE_CODE (type) == ARRAY_TYPE)
12866 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12867 return 64;
12868 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12869 return 128;
12871 else if (TREE_CODE (type) == COMPLEX_TYPE)
12874 if (TYPE_MODE (type) == DCmode && align < 64)
12875 return 64;
12876 if (TYPE_MODE (type) == XCmode && align < 128)
12877 return 128;
12879 else if ((TREE_CODE (type) == RECORD_TYPE
12880 || TREE_CODE (type) == UNION_TYPE
12881 || TREE_CODE (type) == QUAL_UNION_TYPE)
12882 && TYPE_FIELDS (type))
12884 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12885 return 64;
12886 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12887 return 128;
12889 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12890 || TREE_CODE (type) == INTEGER_TYPE)
12892 if (TYPE_MODE (type) == DFmode && align < 64)
12893 return 64;
12894 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12895 return 128;
12898 return align;
12901 /* Compute the alignment for a local variable.
12902 TYPE is the data type, and ALIGN is the alignment that
12903 the object would ordinarily have. The value of this macro is used
12904 instead of that alignment to align the object. */
12907 ix86_local_alignment (tree type, int align)
12909 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12910 to 16byte boundary. */
12911 if (TARGET_64BIT)
12913 if (AGGREGATE_TYPE_P (type)
12914 && TYPE_SIZE (type)
12915 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12916 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12917 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12918 return 128;
12920 if (TREE_CODE (type) == ARRAY_TYPE)
12922 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12923 return 64;
12924 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12925 return 128;
12927 else if (TREE_CODE (type) == COMPLEX_TYPE)
12929 if (TYPE_MODE (type) == DCmode && align < 64)
12930 return 64;
12931 if (TYPE_MODE (type) == XCmode && align < 128)
12932 return 128;
12934 else if ((TREE_CODE (type) == RECORD_TYPE
12935 || TREE_CODE (type) == UNION_TYPE
12936 || TREE_CODE (type) == QUAL_UNION_TYPE)
12937 && TYPE_FIELDS (type))
12939 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12940 return 64;
12941 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12942 return 128;
12944 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12945 || TREE_CODE (type) == INTEGER_TYPE)
12948 if (TYPE_MODE (type) == DFmode && align < 64)
12949 return 64;
12950 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12951 return 128;
12953 return align;
12956 /* Emit RTL insns to initialize the variable parts of a trampoline.
12957 FNADDR is an RTX for the address of the function's pure code.
12958 CXT is an RTX for the static chain value for the function. */
12959 void
12960 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12962 if (!TARGET_64BIT)
12964 /* Compute offset from the end of the jmp to the target function. */
12965 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12966 plus_constant (tramp, 10),
12967 NULL_RTX, 1, OPTAB_DIRECT);
12968 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12969 gen_int_mode (0xb9, QImode));
12970 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12971 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12972 gen_int_mode (0xe9, QImode));
12973 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12975 else
12977 int offset = 0;
12978 /* Try to load address using shorter movl instead of movabs.
12979 We may want to support movq for kernel mode, but kernel does not use
12980 trampolines at the moment. */
12981 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12983 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12984 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12985 gen_int_mode (0xbb41, HImode));
12986 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12987 gen_lowpart (SImode, fnaddr));
12988 offset += 6;
12990 else
12992 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12993 gen_int_mode (0xbb49, HImode));
12994 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12995 fnaddr);
12996 offset += 10;
12998 /* Load static chain using movabs to r10. */
12999 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13000 gen_int_mode (0xba49, HImode));
13001 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13002 cxt);
13003 offset += 10;
13004 /* Jump to the r11 */
13005 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13006 gen_int_mode (0xff49, HImode));
13007 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13008 gen_int_mode (0xe3, QImode));
13009 offset += 3;
13010 gcc_assert (offset <= TRAMPOLINE_SIZE);
13013 #ifdef ENABLE_EXECUTE_STACK
13014 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13015 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13016 #endif
13019 /* Codes for all the SSE/MMX builtins. */
13020 enum ix86_builtins
13022 IX86_BUILTIN_ADDPS,
13023 IX86_BUILTIN_ADDSS,
13024 IX86_BUILTIN_DIVPS,
13025 IX86_BUILTIN_DIVSS,
13026 IX86_BUILTIN_MULPS,
13027 IX86_BUILTIN_MULSS,
13028 IX86_BUILTIN_SUBPS,
13029 IX86_BUILTIN_SUBSS,
13031 IX86_BUILTIN_CMPEQPS,
13032 IX86_BUILTIN_CMPLTPS,
13033 IX86_BUILTIN_CMPLEPS,
13034 IX86_BUILTIN_CMPGTPS,
13035 IX86_BUILTIN_CMPGEPS,
13036 IX86_BUILTIN_CMPNEQPS,
13037 IX86_BUILTIN_CMPNLTPS,
13038 IX86_BUILTIN_CMPNLEPS,
13039 IX86_BUILTIN_CMPNGTPS,
13040 IX86_BUILTIN_CMPNGEPS,
13041 IX86_BUILTIN_CMPORDPS,
13042 IX86_BUILTIN_CMPUNORDPS,
13043 IX86_BUILTIN_CMPNEPS,
13044 IX86_BUILTIN_CMPEQSS,
13045 IX86_BUILTIN_CMPLTSS,
13046 IX86_BUILTIN_CMPLESS,
13047 IX86_BUILTIN_CMPNEQSS,
13048 IX86_BUILTIN_CMPNLTSS,
13049 IX86_BUILTIN_CMPNLESS,
13050 IX86_BUILTIN_CMPNGTSS,
13051 IX86_BUILTIN_CMPNGESS,
13052 IX86_BUILTIN_CMPORDSS,
13053 IX86_BUILTIN_CMPUNORDSS,
13054 IX86_BUILTIN_CMPNESS,
13056 IX86_BUILTIN_COMIEQSS,
13057 IX86_BUILTIN_COMILTSS,
13058 IX86_BUILTIN_COMILESS,
13059 IX86_BUILTIN_COMIGTSS,
13060 IX86_BUILTIN_COMIGESS,
13061 IX86_BUILTIN_COMINEQSS,
13062 IX86_BUILTIN_UCOMIEQSS,
13063 IX86_BUILTIN_UCOMILTSS,
13064 IX86_BUILTIN_UCOMILESS,
13065 IX86_BUILTIN_UCOMIGTSS,
13066 IX86_BUILTIN_UCOMIGESS,
13067 IX86_BUILTIN_UCOMINEQSS,
13069 IX86_BUILTIN_CVTPI2PS,
13070 IX86_BUILTIN_CVTPS2PI,
13071 IX86_BUILTIN_CVTSI2SS,
13072 IX86_BUILTIN_CVTSI642SS,
13073 IX86_BUILTIN_CVTSS2SI,
13074 IX86_BUILTIN_CVTSS2SI64,
13075 IX86_BUILTIN_CVTTPS2PI,
13076 IX86_BUILTIN_CVTTSS2SI,
13077 IX86_BUILTIN_CVTTSS2SI64,
13079 IX86_BUILTIN_MAXPS,
13080 IX86_BUILTIN_MAXSS,
13081 IX86_BUILTIN_MINPS,
13082 IX86_BUILTIN_MINSS,
13084 IX86_BUILTIN_LOADUPS,
13085 IX86_BUILTIN_STOREUPS,
13086 IX86_BUILTIN_MOVSS,
13088 IX86_BUILTIN_MOVHLPS,
13089 IX86_BUILTIN_MOVLHPS,
13090 IX86_BUILTIN_LOADHPS,
13091 IX86_BUILTIN_LOADLPS,
13092 IX86_BUILTIN_STOREHPS,
13093 IX86_BUILTIN_STORELPS,
13095 IX86_BUILTIN_MASKMOVQ,
13096 IX86_BUILTIN_MOVMSKPS,
13097 IX86_BUILTIN_PMOVMSKB,
13099 IX86_BUILTIN_MOVNTPS,
13100 IX86_BUILTIN_MOVNTQ,
13102 IX86_BUILTIN_LOADDQU,
13103 IX86_BUILTIN_STOREDQU,
13105 IX86_BUILTIN_PACKSSWB,
13106 IX86_BUILTIN_PACKSSDW,
13107 IX86_BUILTIN_PACKUSWB,
13109 IX86_BUILTIN_PADDB,
13110 IX86_BUILTIN_PADDW,
13111 IX86_BUILTIN_PADDD,
13112 IX86_BUILTIN_PADDQ,
13113 IX86_BUILTIN_PADDSB,
13114 IX86_BUILTIN_PADDSW,
13115 IX86_BUILTIN_PADDUSB,
13116 IX86_BUILTIN_PADDUSW,
13117 IX86_BUILTIN_PSUBB,
13118 IX86_BUILTIN_PSUBW,
13119 IX86_BUILTIN_PSUBD,
13120 IX86_BUILTIN_PSUBQ,
13121 IX86_BUILTIN_PSUBSB,
13122 IX86_BUILTIN_PSUBSW,
13123 IX86_BUILTIN_PSUBUSB,
13124 IX86_BUILTIN_PSUBUSW,
13126 IX86_BUILTIN_PAND,
13127 IX86_BUILTIN_PANDN,
13128 IX86_BUILTIN_POR,
13129 IX86_BUILTIN_PXOR,
13131 IX86_BUILTIN_PAVGB,
13132 IX86_BUILTIN_PAVGW,
13134 IX86_BUILTIN_PCMPEQB,
13135 IX86_BUILTIN_PCMPEQW,
13136 IX86_BUILTIN_PCMPEQD,
13137 IX86_BUILTIN_PCMPGTB,
13138 IX86_BUILTIN_PCMPGTW,
13139 IX86_BUILTIN_PCMPGTD,
13141 IX86_BUILTIN_PMADDWD,
13143 IX86_BUILTIN_PMAXSW,
13144 IX86_BUILTIN_PMAXUB,
13145 IX86_BUILTIN_PMINSW,
13146 IX86_BUILTIN_PMINUB,
13148 IX86_BUILTIN_PMULHUW,
13149 IX86_BUILTIN_PMULHW,
13150 IX86_BUILTIN_PMULLW,
13152 IX86_BUILTIN_PSADBW,
13153 IX86_BUILTIN_PSHUFW,
13155 IX86_BUILTIN_PSLLW,
13156 IX86_BUILTIN_PSLLD,
13157 IX86_BUILTIN_PSLLQ,
13158 IX86_BUILTIN_PSRAW,
13159 IX86_BUILTIN_PSRAD,
13160 IX86_BUILTIN_PSRLW,
13161 IX86_BUILTIN_PSRLD,
13162 IX86_BUILTIN_PSRLQ,
13163 IX86_BUILTIN_PSLLWI,
13164 IX86_BUILTIN_PSLLDI,
13165 IX86_BUILTIN_PSLLQI,
13166 IX86_BUILTIN_PSRAWI,
13167 IX86_BUILTIN_PSRADI,
13168 IX86_BUILTIN_PSRLWI,
13169 IX86_BUILTIN_PSRLDI,
13170 IX86_BUILTIN_PSRLQI,
13172 IX86_BUILTIN_PUNPCKHBW,
13173 IX86_BUILTIN_PUNPCKHWD,
13174 IX86_BUILTIN_PUNPCKHDQ,
13175 IX86_BUILTIN_PUNPCKLBW,
13176 IX86_BUILTIN_PUNPCKLWD,
13177 IX86_BUILTIN_PUNPCKLDQ,
13179 IX86_BUILTIN_SHUFPS,
13181 IX86_BUILTIN_RCPPS,
13182 IX86_BUILTIN_RCPSS,
13183 IX86_BUILTIN_RSQRTPS,
13184 IX86_BUILTIN_RSQRTSS,
13185 IX86_BUILTIN_SQRTPS,
13186 IX86_BUILTIN_SQRTSS,
13188 IX86_BUILTIN_UNPCKHPS,
13189 IX86_BUILTIN_UNPCKLPS,
13191 IX86_BUILTIN_ANDPS,
13192 IX86_BUILTIN_ANDNPS,
13193 IX86_BUILTIN_ORPS,
13194 IX86_BUILTIN_XORPS,
13196 IX86_BUILTIN_EMMS,
13197 IX86_BUILTIN_LDMXCSR,
13198 IX86_BUILTIN_STMXCSR,
13199 IX86_BUILTIN_SFENCE,
13201 /* 3DNow! Original */
13202 IX86_BUILTIN_FEMMS,
13203 IX86_BUILTIN_PAVGUSB,
13204 IX86_BUILTIN_PF2ID,
13205 IX86_BUILTIN_PFACC,
13206 IX86_BUILTIN_PFADD,
13207 IX86_BUILTIN_PFCMPEQ,
13208 IX86_BUILTIN_PFCMPGE,
13209 IX86_BUILTIN_PFCMPGT,
13210 IX86_BUILTIN_PFMAX,
13211 IX86_BUILTIN_PFMIN,
13212 IX86_BUILTIN_PFMUL,
13213 IX86_BUILTIN_PFRCP,
13214 IX86_BUILTIN_PFRCPIT1,
13215 IX86_BUILTIN_PFRCPIT2,
13216 IX86_BUILTIN_PFRSQIT1,
13217 IX86_BUILTIN_PFRSQRT,
13218 IX86_BUILTIN_PFSUB,
13219 IX86_BUILTIN_PFSUBR,
13220 IX86_BUILTIN_PI2FD,
13221 IX86_BUILTIN_PMULHRW,
13223 /* 3DNow! Athlon Extensions */
13224 IX86_BUILTIN_PF2IW,
13225 IX86_BUILTIN_PFNACC,
13226 IX86_BUILTIN_PFPNACC,
13227 IX86_BUILTIN_PI2FW,
13228 IX86_BUILTIN_PSWAPDSI,
13229 IX86_BUILTIN_PSWAPDSF,
13231 /* SSE2 */
13232 IX86_BUILTIN_ADDPD,
13233 IX86_BUILTIN_ADDSD,
13234 IX86_BUILTIN_DIVPD,
13235 IX86_BUILTIN_DIVSD,
13236 IX86_BUILTIN_MULPD,
13237 IX86_BUILTIN_MULSD,
13238 IX86_BUILTIN_SUBPD,
13239 IX86_BUILTIN_SUBSD,
13241 IX86_BUILTIN_CMPEQPD,
13242 IX86_BUILTIN_CMPLTPD,
13243 IX86_BUILTIN_CMPLEPD,
13244 IX86_BUILTIN_CMPGTPD,
13245 IX86_BUILTIN_CMPGEPD,
13246 IX86_BUILTIN_CMPNEQPD,
13247 IX86_BUILTIN_CMPNLTPD,
13248 IX86_BUILTIN_CMPNLEPD,
13249 IX86_BUILTIN_CMPNGTPD,
13250 IX86_BUILTIN_CMPNGEPD,
13251 IX86_BUILTIN_CMPORDPD,
13252 IX86_BUILTIN_CMPUNORDPD,
13253 IX86_BUILTIN_CMPNEPD,
13254 IX86_BUILTIN_CMPEQSD,
13255 IX86_BUILTIN_CMPLTSD,
13256 IX86_BUILTIN_CMPLESD,
13257 IX86_BUILTIN_CMPNEQSD,
13258 IX86_BUILTIN_CMPNLTSD,
13259 IX86_BUILTIN_CMPNLESD,
13260 IX86_BUILTIN_CMPORDSD,
13261 IX86_BUILTIN_CMPUNORDSD,
13262 IX86_BUILTIN_CMPNESD,
13264 IX86_BUILTIN_COMIEQSD,
13265 IX86_BUILTIN_COMILTSD,
13266 IX86_BUILTIN_COMILESD,
13267 IX86_BUILTIN_COMIGTSD,
13268 IX86_BUILTIN_COMIGESD,
13269 IX86_BUILTIN_COMINEQSD,
13270 IX86_BUILTIN_UCOMIEQSD,
13271 IX86_BUILTIN_UCOMILTSD,
13272 IX86_BUILTIN_UCOMILESD,
13273 IX86_BUILTIN_UCOMIGTSD,
13274 IX86_BUILTIN_UCOMIGESD,
13275 IX86_BUILTIN_UCOMINEQSD,
13277 IX86_BUILTIN_MAXPD,
13278 IX86_BUILTIN_MAXSD,
13279 IX86_BUILTIN_MINPD,
13280 IX86_BUILTIN_MINSD,
13282 IX86_BUILTIN_ANDPD,
13283 IX86_BUILTIN_ANDNPD,
13284 IX86_BUILTIN_ORPD,
13285 IX86_BUILTIN_XORPD,
13287 IX86_BUILTIN_SQRTPD,
13288 IX86_BUILTIN_SQRTSD,
13290 IX86_BUILTIN_UNPCKHPD,
13291 IX86_BUILTIN_UNPCKLPD,
13293 IX86_BUILTIN_SHUFPD,
13295 IX86_BUILTIN_LOADUPD,
13296 IX86_BUILTIN_STOREUPD,
13297 IX86_BUILTIN_MOVSD,
13299 IX86_BUILTIN_LOADHPD,
13300 IX86_BUILTIN_LOADLPD,
13302 IX86_BUILTIN_CVTDQ2PD,
13303 IX86_BUILTIN_CVTDQ2PS,
13305 IX86_BUILTIN_CVTPD2DQ,
13306 IX86_BUILTIN_CVTPD2PI,
13307 IX86_BUILTIN_CVTPD2PS,
13308 IX86_BUILTIN_CVTTPD2DQ,
13309 IX86_BUILTIN_CVTTPD2PI,
13311 IX86_BUILTIN_CVTPI2PD,
13312 IX86_BUILTIN_CVTSI2SD,
13313 IX86_BUILTIN_CVTSI642SD,
13315 IX86_BUILTIN_CVTSD2SI,
13316 IX86_BUILTIN_CVTSD2SI64,
13317 IX86_BUILTIN_CVTSD2SS,
13318 IX86_BUILTIN_CVTSS2SD,
13319 IX86_BUILTIN_CVTTSD2SI,
13320 IX86_BUILTIN_CVTTSD2SI64,
13322 IX86_BUILTIN_CVTPS2DQ,
13323 IX86_BUILTIN_CVTPS2PD,
13324 IX86_BUILTIN_CVTTPS2DQ,
13326 IX86_BUILTIN_MOVNTI,
13327 IX86_BUILTIN_MOVNTPD,
13328 IX86_BUILTIN_MOVNTDQ,
13330 /* SSE2 MMX */
13331 IX86_BUILTIN_MASKMOVDQU,
13332 IX86_BUILTIN_MOVMSKPD,
13333 IX86_BUILTIN_PMOVMSKB128,
13335 IX86_BUILTIN_PACKSSWB128,
13336 IX86_BUILTIN_PACKSSDW128,
13337 IX86_BUILTIN_PACKUSWB128,
13339 IX86_BUILTIN_PADDB128,
13340 IX86_BUILTIN_PADDW128,
13341 IX86_BUILTIN_PADDD128,
13342 IX86_BUILTIN_PADDQ128,
13343 IX86_BUILTIN_PADDSB128,
13344 IX86_BUILTIN_PADDSW128,
13345 IX86_BUILTIN_PADDUSB128,
13346 IX86_BUILTIN_PADDUSW128,
13347 IX86_BUILTIN_PSUBB128,
13348 IX86_BUILTIN_PSUBW128,
13349 IX86_BUILTIN_PSUBD128,
13350 IX86_BUILTIN_PSUBQ128,
13351 IX86_BUILTIN_PSUBSB128,
13352 IX86_BUILTIN_PSUBSW128,
13353 IX86_BUILTIN_PSUBUSB128,
13354 IX86_BUILTIN_PSUBUSW128,
13356 IX86_BUILTIN_PAND128,
13357 IX86_BUILTIN_PANDN128,
13358 IX86_BUILTIN_POR128,
13359 IX86_BUILTIN_PXOR128,
13361 IX86_BUILTIN_PAVGB128,
13362 IX86_BUILTIN_PAVGW128,
13364 IX86_BUILTIN_PCMPEQB128,
13365 IX86_BUILTIN_PCMPEQW128,
13366 IX86_BUILTIN_PCMPEQD128,
13367 IX86_BUILTIN_PCMPGTB128,
13368 IX86_BUILTIN_PCMPGTW128,
13369 IX86_BUILTIN_PCMPGTD128,
13371 IX86_BUILTIN_PMADDWD128,
13373 IX86_BUILTIN_PMAXSW128,
13374 IX86_BUILTIN_PMAXUB128,
13375 IX86_BUILTIN_PMINSW128,
13376 IX86_BUILTIN_PMINUB128,
13378 IX86_BUILTIN_PMULUDQ,
13379 IX86_BUILTIN_PMULUDQ128,
13380 IX86_BUILTIN_PMULHUW128,
13381 IX86_BUILTIN_PMULHW128,
13382 IX86_BUILTIN_PMULLW128,
13384 IX86_BUILTIN_PSADBW128,
13385 IX86_BUILTIN_PSHUFHW,
13386 IX86_BUILTIN_PSHUFLW,
13387 IX86_BUILTIN_PSHUFD,
13389 IX86_BUILTIN_PSLLW128,
13390 IX86_BUILTIN_PSLLD128,
13391 IX86_BUILTIN_PSLLQ128,
13392 IX86_BUILTIN_PSRAW128,
13393 IX86_BUILTIN_PSRAD128,
13394 IX86_BUILTIN_PSRLW128,
13395 IX86_BUILTIN_PSRLD128,
13396 IX86_BUILTIN_PSRLQ128,
13397 IX86_BUILTIN_PSLLDQI128,
13398 IX86_BUILTIN_PSLLWI128,
13399 IX86_BUILTIN_PSLLDI128,
13400 IX86_BUILTIN_PSLLQI128,
13401 IX86_BUILTIN_PSRAWI128,
13402 IX86_BUILTIN_PSRADI128,
13403 IX86_BUILTIN_PSRLDQI128,
13404 IX86_BUILTIN_PSRLWI128,
13405 IX86_BUILTIN_PSRLDI128,
13406 IX86_BUILTIN_PSRLQI128,
13408 IX86_BUILTIN_PUNPCKHBW128,
13409 IX86_BUILTIN_PUNPCKHWD128,
13410 IX86_BUILTIN_PUNPCKHDQ128,
13411 IX86_BUILTIN_PUNPCKHQDQ128,
13412 IX86_BUILTIN_PUNPCKLBW128,
13413 IX86_BUILTIN_PUNPCKLWD128,
13414 IX86_BUILTIN_PUNPCKLDQ128,
13415 IX86_BUILTIN_PUNPCKLQDQ128,
13417 IX86_BUILTIN_CLFLUSH,
13418 IX86_BUILTIN_MFENCE,
13419 IX86_BUILTIN_LFENCE,
13421 /* Prescott New Instructions. */
13422 IX86_BUILTIN_ADDSUBPS,
13423 IX86_BUILTIN_HADDPS,
13424 IX86_BUILTIN_HSUBPS,
13425 IX86_BUILTIN_MOVSHDUP,
13426 IX86_BUILTIN_MOVSLDUP,
13427 IX86_BUILTIN_ADDSUBPD,
13428 IX86_BUILTIN_HADDPD,
13429 IX86_BUILTIN_HSUBPD,
13430 IX86_BUILTIN_LDDQU,
13432 IX86_BUILTIN_MONITOR,
13433 IX86_BUILTIN_MWAIT,
13435 IX86_BUILTIN_VEC_INIT_V2SI,
13436 IX86_BUILTIN_VEC_INIT_V4HI,
13437 IX86_BUILTIN_VEC_INIT_V8QI,
13438 IX86_BUILTIN_VEC_EXT_V2DF,
13439 IX86_BUILTIN_VEC_EXT_V2DI,
13440 IX86_BUILTIN_VEC_EXT_V4SF,
13441 IX86_BUILTIN_VEC_EXT_V4SI,
13442 IX86_BUILTIN_VEC_EXT_V8HI,
13443 IX86_BUILTIN_VEC_EXT_V2SI,
13444 IX86_BUILTIN_VEC_EXT_V4HI,
13445 IX86_BUILTIN_VEC_SET_V8HI,
13446 IX86_BUILTIN_VEC_SET_V4HI,
13448 IX86_BUILTIN_MAX
13451 #define def_builtin(MASK, NAME, TYPE, CODE) \
13452 do { \
13453 if ((MASK) & target_flags \
13454 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13455 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13456 NULL, NULL_TREE); \
13457 } while (0)
13459 /* Bits for builtin_description.flag. */
13461 /* Set when we don't support the comparison natively, and should
13462 swap_comparison in order to support it. */
13463 #define BUILTIN_DESC_SWAP_OPERANDS 1
13465 struct builtin_description
13467 const unsigned int mask;
13468 const enum insn_code icode;
13469 const char *const name;
13470 const enum ix86_builtins code;
13471 const enum rtx_code comparison;
13472 const unsigned int flag;
13475 static const struct builtin_description bdesc_comi[] =
13477 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13478 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13479 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13480 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13481 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13482 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13483 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13484 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13485 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13486 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13487 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13488 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13489 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13490 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13491 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13492 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13493 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13494 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13495 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13496 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13497 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13498 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13499 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13500 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13503 static const struct builtin_description bdesc_2arg[] =
13505 /* SSE */
13506 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13507 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13508 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13509 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13510 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13511 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13512 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13513 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13515 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13516 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13517 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13518 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13519 BUILTIN_DESC_SWAP_OPERANDS },
13520 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13521 BUILTIN_DESC_SWAP_OPERANDS },
13522 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13523 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13524 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13525 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13526 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13527 BUILTIN_DESC_SWAP_OPERANDS },
13528 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13529 BUILTIN_DESC_SWAP_OPERANDS },
13530 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13531 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13532 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13533 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13534 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13535 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13536 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13537 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13538 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13539 BUILTIN_DESC_SWAP_OPERANDS },
13540 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13541 BUILTIN_DESC_SWAP_OPERANDS },
13542 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13544 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13545 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13546 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13547 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13549 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13550 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13551 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13552 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13554 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13555 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13556 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13557 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13558 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13560 /* MMX */
13561 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13562 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13563 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13564 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13565 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13566 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13567 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13568 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13570 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13571 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13572 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13573 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13574 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13575 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13576 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13577 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13579 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13580 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13581 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13583 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13584 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13585 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13586 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13588 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13589 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13591 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13592 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13593 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13594 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13595 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13596 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13598 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13599 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13600 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13601 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13603 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13604 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13605 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13606 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13607 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13608 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13610 /* Special. */
13611 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13612 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13613 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13615 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13616 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13617 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13619 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13620 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13621 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13622 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13623 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13624 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13626 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13627 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13628 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13629 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13630 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13631 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13633 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13634 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13635 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13636 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13638 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13639 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13641 /* SSE2 */
13642 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13643 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13644 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13645 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13646 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13647 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13648 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13649 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13651 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13652 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13653 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13654 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13655 BUILTIN_DESC_SWAP_OPERANDS },
13656 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13657 BUILTIN_DESC_SWAP_OPERANDS },
13658 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13659 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13660 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13661 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13662 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13663 BUILTIN_DESC_SWAP_OPERANDS },
13664 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13665 BUILTIN_DESC_SWAP_OPERANDS },
13666 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13667 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13668 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13669 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13670 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13671 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13672 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13673 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13674 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13676 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13677 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13678 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13679 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13681 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13682 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13683 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13684 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13686 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13687 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13688 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13690 /* SSE2 MMX */
13691 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13692 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13693 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13694 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13695 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13696 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13697 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13698 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13700 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13701 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13702 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13703 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13704 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13705 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13706 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13707 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13709 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13710 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13712 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13713 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13714 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13715 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13717 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13718 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13720 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13721 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13722 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13723 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13724 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13725 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13727 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13728 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13729 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13730 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13732 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13733 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13734 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13735 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13736 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13737 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13738 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13739 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13741 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13742 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13743 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13745 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13746 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13748 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13749 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13751 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13752 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13753 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13755 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13756 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13757 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13759 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13760 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13762 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13764 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13765 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13766 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13767 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13769 /* SSE3 MMX */
13770 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13771 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13772 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13773 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13774 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13775 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13778 static const struct builtin_description bdesc_1arg[] =
13780 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13781 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13783 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13784 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13785 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13787 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13788 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13789 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13790 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13791 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13792 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13794 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13795 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13797 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13799 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13800 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13802 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13803 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13804 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13805 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13806 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13808 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13810 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13811 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13812 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13813 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13815 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13816 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13817 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13819 /* SSE3 */
13820 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13821 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13824 static void
13825 ix86_init_builtins (void)
13827 if (TARGET_MMX)
13828 ix86_init_mmx_sse_builtins ();
13831 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13832 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13833 builtins. */
13834 static void
13835 ix86_init_mmx_sse_builtins (void)
13837 const struct builtin_description * d;
13838 size_t i;
13840 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13841 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13842 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13843 tree V2DI_type_node
13844 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13845 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13846 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13847 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13848 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13849 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13850 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13852 tree pchar_type_node = build_pointer_type (char_type_node);
13853 tree pcchar_type_node = build_pointer_type (
13854 build_type_variant (char_type_node, 1, 0));
13855 tree pfloat_type_node = build_pointer_type (float_type_node);
13856 tree pcfloat_type_node = build_pointer_type (
13857 build_type_variant (float_type_node, 1, 0));
13858 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13859 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13860 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13862 /* Comparisons. */
13863 tree int_ftype_v4sf_v4sf
13864 = build_function_type_list (integer_type_node,
13865 V4SF_type_node, V4SF_type_node, NULL_TREE);
13866 tree v4si_ftype_v4sf_v4sf
13867 = build_function_type_list (V4SI_type_node,
13868 V4SF_type_node, V4SF_type_node, NULL_TREE);
13869 /* MMX/SSE/integer conversions. */
13870 tree int_ftype_v4sf
13871 = build_function_type_list (integer_type_node,
13872 V4SF_type_node, NULL_TREE);
13873 tree int64_ftype_v4sf
13874 = build_function_type_list (long_long_integer_type_node,
13875 V4SF_type_node, NULL_TREE);
13876 tree int_ftype_v8qi
13877 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13878 tree v4sf_ftype_v4sf_int
13879 = build_function_type_list (V4SF_type_node,
13880 V4SF_type_node, integer_type_node, NULL_TREE);
13881 tree v4sf_ftype_v4sf_int64
13882 = build_function_type_list (V4SF_type_node,
13883 V4SF_type_node, long_long_integer_type_node,
13884 NULL_TREE);
13885 tree v4sf_ftype_v4sf_v2si
13886 = build_function_type_list (V4SF_type_node,
13887 V4SF_type_node, V2SI_type_node, NULL_TREE);
13889 /* Miscellaneous. */
13890 tree v8qi_ftype_v4hi_v4hi
13891 = build_function_type_list (V8QI_type_node,
13892 V4HI_type_node, V4HI_type_node, NULL_TREE);
13893 tree v4hi_ftype_v2si_v2si
13894 = build_function_type_list (V4HI_type_node,
13895 V2SI_type_node, V2SI_type_node, NULL_TREE);
13896 tree v4sf_ftype_v4sf_v4sf_int
13897 = build_function_type_list (V4SF_type_node,
13898 V4SF_type_node, V4SF_type_node,
13899 integer_type_node, NULL_TREE);
13900 tree v2si_ftype_v4hi_v4hi
13901 = build_function_type_list (V2SI_type_node,
13902 V4HI_type_node, V4HI_type_node, NULL_TREE);
13903 tree v4hi_ftype_v4hi_int
13904 = build_function_type_list (V4HI_type_node,
13905 V4HI_type_node, integer_type_node, NULL_TREE);
13906 tree v4hi_ftype_v4hi_di
13907 = build_function_type_list (V4HI_type_node,
13908 V4HI_type_node, long_long_unsigned_type_node,
13909 NULL_TREE);
13910 tree v2si_ftype_v2si_di
13911 = build_function_type_list (V2SI_type_node,
13912 V2SI_type_node, long_long_unsigned_type_node,
13913 NULL_TREE);
13914 tree void_ftype_void
13915 = build_function_type (void_type_node, void_list_node);
13916 tree void_ftype_unsigned
13917 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13918 tree void_ftype_unsigned_unsigned
13919 = build_function_type_list (void_type_node, unsigned_type_node,
13920 unsigned_type_node, NULL_TREE);
13921 tree void_ftype_pcvoid_unsigned_unsigned
13922 = build_function_type_list (void_type_node, const_ptr_type_node,
13923 unsigned_type_node, unsigned_type_node,
13924 NULL_TREE);
13925 tree unsigned_ftype_void
13926 = build_function_type (unsigned_type_node, void_list_node);
13927 tree v2si_ftype_v4sf
13928 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13929 /* Loads/stores. */
13930 tree void_ftype_v8qi_v8qi_pchar
13931 = build_function_type_list (void_type_node,
13932 V8QI_type_node, V8QI_type_node,
13933 pchar_type_node, NULL_TREE);
13934 tree v4sf_ftype_pcfloat
13935 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13936 /* @@@ the type is bogus */
13937 tree v4sf_ftype_v4sf_pv2si
13938 = build_function_type_list (V4SF_type_node,
13939 V4SF_type_node, pv2si_type_node, NULL_TREE);
13940 tree void_ftype_pv2si_v4sf
13941 = build_function_type_list (void_type_node,
13942 pv2si_type_node, V4SF_type_node, NULL_TREE);
13943 tree void_ftype_pfloat_v4sf
13944 = build_function_type_list (void_type_node,
13945 pfloat_type_node, V4SF_type_node, NULL_TREE);
13946 tree void_ftype_pdi_di
13947 = build_function_type_list (void_type_node,
13948 pdi_type_node, long_long_unsigned_type_node,
13949 NULL_TREE);
13950 tree void_ftype_pv2di_v2di
13951 = build_function_type_list (void_type_node,
13952 pv2di_type_node, V2DI_type_node, NULL_TREE);
13953 /* Normal vector unops. */
13954 tree v4sf_ftype_v4sf
13955 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13957 /* Normal vector binops. */
13958 tree v4sf_ftype_v4sf_v4sf
13959 = build_function_type_list (V4SF_type_node,
13960 V4SF_type_node, V4SF_type_node, NULL_TREE);
13961 tree v8qi_ftype_v8qi_v8qi
13962 = build_function_type_list (V8QI_type_node,
13963 V8QI_type_node, V8QI_type_node, NULL_TREE);
13964 tree v4hi_ftype_v4hi_v4hi
13965 = build_function_type_list (V4HI_type_node,
13966 V4HI_type_node, V4HI_type_node, NULL_TREE);
13967 tree v2si_ftype_v2si_v2si
13968 = build_function_type_list (V2SI_type_node,
13969 V2SI_type_node, V2SI_type_node, NULL_TREE);
13970 tree di_ftype_di_di
13971 = build_function_type_list (long_long_unsigned_type_node,
13972 long_long_unsigned_type_node,
13973 long_long_unsigned_type_node, NULL_TREE);
13975 tree v2si_ftype_v2sf
13976 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13977 tree v2sf_ftype_v2si
13978 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13979 tree v2si_ftype_v2si
13980 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13981 tree v2sf_ftype_v2sf
13982 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13983 tree v2sf_ftype_v2sf_v2sf
13984 = build_function_type_list (V2SF_type_node,
13985 V2SF_type_node, V2SF_type_node, NULL_TREE);
13986 tree v2si_ftype_v2sf_v2sf
13987 = build_function_type_list (V2SI_type_node,
13988 V2SF_type_node, V2SF_type_node, NULL_TREE);
13989 tree pint_type_node = build_pointer_type (integer_type_node);
13990 tree pdouble_type_node = build_pointer_type (double_type_node);
13991 tree pcdouble_type_node = build_pointer_type (
13992 build_type_variant (double_type_node, 1, 0));
13993 tree int_ftype_v2df_v2df
13994 = build_function_type_list (integer_type_node,
13995 V2DF_type_node, V2DF_type_node, NULL_TREE);
13997 tree ti_ftype_ti_ti
13998 = build_function_type_list (intTI_type_node,
13999 intTI_type_node, intTI_type_node, NULL_TREE);
14000 tree void_ftype_pcvoid
14001 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14002 tree v4sf_ftype_v4si
14003 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14004 tree v4si_ftype_v4sf
14005 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14006 tree v2df_ftype_v4si
14007 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14008 tree v4si_ftype_v2df
14009 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14010 tree v2si_ftype_v2df
14011 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14012 tree v4sf_ftype_v2df
14013 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14014 tree v2df_ftype_v2si
14015 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14016 tree v2df_ftype_v4sf
14017 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14018 tree int_ftype_v2df
14019 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14020 tree int64_ftype_v2df
14021 = build_function_type_list (long_long_integer_type_node,
14022 V2DF_type_node, NULL_TREE);
14023 tree v2df_ftype_v2df_int
14024 = build_function_type_list (V2DF_type_node,
14025 V2DF_type_node, integer_type_node, NULL_TREE);
14026 tree v2df_ftype_v2df_int64
14027 = build_function_type_list (V2DF_type_node,
14028 V2DF_type_node, long_long_integer_type_node,
14029 NULL_TREE);
14030 tree v4sf_ftype_v4sf_v2df
14031 = build_function_type_list (V4SF_type_node,
14032 V4SF_type_node, V2DF_type_node, NULL_TREE);
14033 tree v2df_ftype_v2df_v4sf
14034 = build_function_type_list (V2DF_type_node,
14035 V2DF_type_node, V4SF_type_node, NULL_TREE);
14036 tree v2df_ftype_v2df_v2df_int
14037 = build_function_type_list (V2DF_type_node,
14038 V2DF_type_node, V2DF_type_node,
14039 integer_type_node,
14040 NULL_TREE);
14041 tree v2df_ftype_v2df_pcdouble
14042 = build_function_type_list (V2DF_type_node,
14043 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14044 tree void_ftype_pdouble_v2df
14045 = build_function_type_list (void_type_node,
14046 pdouble_type_node, V2DF_type_node, NULL_TREE);
14047 tree void_ftype_pint_int
14048 = build_function_type_list (void_type_node,
14049 pint_type_node, integer_type_node, NULL_TREE);
14050 tree void_ftype_v16qi_v16qi_pchar
14051 = build_function_type_list (void_type_node,
14052 V16QI_type_node, V16QI_type_node,
14053 pchar_type_node, NULL_TREE);
14054 tree v2df_ftype_pcdouble
14055 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14056 tree v2df_ftype_v2df_v2df
14057 = build_function_type_list (V2DF_type_node,
14058 V2DF_type_node, V2DF_type_node, NULL_TREE);
14059 tree v16qi_ftype_v16qi_v16qi
14060 = build_function_type_list (V16QI_type_node,
14061 V16QI_type_node, V16QI_type_node, NULL_TREE);
14062 tree v8hi_ftype_v8hi_v8hi
14063 = build_function_type_list (V8HI_type_node,
14064 V8HI_type_node, V8HI_type_node, NULL_TREE);
14065 tree v4si_ftype_v4si_v4si
14066 = build_function_type_list (V4SI_type_node,
14067 V4SI_type_node, V4SI_type_node, NULL_TREE);
14068 tree v2di_ftype_v2di_v2di
14069 = build_function_type_list (V2DI_type_node,
14070 V2DI_type_node, V2DI_type_node, NULL_TREE);
14071 tree v2di_ftype_v2df_v2df
14072 = build_function_type_list (V2DI_type_node,
14073 V2DF_type_node, V2DF_type_node, NULL_TREE);
14074 tree v2df_ftype_v2df
14075 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14076 tree v2di_ftype_v2di_int
14077 = build_function_type_list (V2DI_type_node,
14078 V2DI_type_node, integer_type_node, NULL_TREE);
14079 tree v4si_ftype_v4si_int
14080 = build_function_type_list (V4SI_type_node,
14081 V4SI_type_node, integer_type_node, NULL_TREE);
14082 tree v8hi_ftype_v8hi_int
14083 = build_function_type_list (V8HI_type_node,
14084 V8HI_type_node, integer_type_node, NULL_TREE);
14085 tree v8hi_ftype_v8hi_v2di
14086 = build_function_type_list (V8HI_type_node,
14087 V8HI_type_node, V2DI_type_node, NULL_TREE);
14088 tree v4si_ftype_v4si_v2di
14089 = build_function_type_list (V4SI_type_node,
14090 V4SI_type_node, V2DI_type_node, NULL_TREE);
14091 tree v4si_ftype_v8hi_v8hi
14092 = build_function_type_list (V4SI_type_node,
14093 V8HI_type_node, V8HI_type_node, NULL_TREE);
14094 tree di_ftype_v8qi_v8qi
14095 = build_function_type_list (long_long_unsigned_type_node,
14096 V8QI_type_node, V8QI_type_node, NULL_TREE);
14097 tree di_ftype_v2si_v2si
14098 = build_function_type_list (long_long_unsigned_type_node,
14099 V2SI_type_node, V2SI_type_node, NULL_TREE);
14100 tree v2di_ftype_v16qi_v16qi
14101 = build_function_type_list (V2DI_type_node,
14102 V16QI_type_node, V16QI_type_node, NULL_TREE);
14103 tree v2di_ftype_v4si_v4si
14104 = build_function_type_list (V2DI_type_node,
14105 V4SI_type_node, V4SI_type_node, NULL_TREE);
14106 tree int_ftype_v16qi
14107 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14108 tree v16qi_ftype_pcchar
14109 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14110 tree void_ftype_pchar_v16qi
14111 = build_function_type_list (void_type_node,
14112 pchar_type_node, V16QI_type_node, NULL_TREE);
14114 tree float80_type;
14115 tree float128_type;
14116 tree ftype;
14118 /* The __float80 type. */
14119 if (TYPE_MODE (long_double_type_node) == XFmode)
14120 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14121 "__float80");
14122 else
14124 /* The __float80 type. */
14125 float80_type = make_node (REAL_TYPE);
14126 TYPE_PRECISION (float80_type) = 80;
14127 layout_type (float80_type);
14128 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14131 float128_type = make_node (REAL_TYPE);
14132 TYPE_PRECISION (float128_type) = 128;
14133 layout_type (float128_type);
14134 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14136 /* Add all builtins that are more or less simple operations on two
14137 operands. */
14138 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14140 /* Use one of the operands; the target can have a different mode for
14141 mask-generating compares. */
14142 enum machine_mode mode;
14143 tree type;
14145 if (d->name == 0)
14146 continue;
14147 mode = insn_data[d->icode].operand[1].mode;
14149 switch (mode)
14151 case V16QImode:
14152 type = v16qi_ftype_v16qi_v16qi;
14153 break;
14154 case V8HImode:
14155 type = v8hi_ftype_v8hi_v8hi;
14156 break;
14157 case V4SImode:
14158 type = v4si_ftype_v4si_v4si;
14159 break;
14160 case V2DImode:
14161 type = v2di_ftype_v2di_v2di;
14162 break;
14163 case V2DFmode:
14164 type = v2df_ftype_v2df_v2df;
14165 break;
14166 case TImode:
14167 type = ti_ftype_ti_ti;
14168 break;
14169 case V4SFmode:
14170 type = v4sf_ftype_v4sf_v4sf;
14171 break;
14172 case V8QImode:
14173 type = v8qi_ftype_v8qi_v8qi;
14174 break;
14175 case V4HImode:
14176 type = v4hi_ftype_v4hi_v4hi;
14177 break;
14178 case V2SImode:
14179 type = v2si_ftype_v2si_v2si;
14180 break;
14181 case DImode:
14182 type = di_ftype_di_di;
14183 break;
14185 default:
14186 gcc_unreachable ();
14189 /* Override for comparisons. */
14190 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14191 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
14192 type = v4si_ftype_v4sf_v4sf;
14194 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
14195 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14196 type = v2di_ftype_v2df_v2df;
14198 def_builtin (d->mask, d->name, type, d->code);
14201 /* Add the remaining MMX insns with somewhat more complicated types. */
14202 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14203 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14204 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14205 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14207 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14208 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14209 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14211 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14212 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
14214 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
14215 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
14217 /* comi/ucomi insns. */
14218 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14219 if (d->mask == MASK_SSE2)
14220 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
14221 else
14222 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
14224 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
14225 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
14226 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
14228 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
14229 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
14230 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
14231 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
14232 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14233 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14234 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14235 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14236 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14237 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14238 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14240 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14242 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14243 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14245 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14246 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14247 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14248 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14250 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14251 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14252 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14253 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14255 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14257 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14259 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14260 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14261 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14262 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14263 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14264 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14266 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14268 /* Original 3DNow! */
14269 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14270 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14271 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14287 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14288 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14290 /* 3DNow! extension as used in the Athlon CPU. */
14291 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14295 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14296 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14298 /* SSE2 */
14299 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14301 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14302 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14304 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14305 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14307 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14308 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14309 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14310 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14311 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14313 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14314 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14315 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14316 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14318 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14319 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14321 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14323 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14324 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14326 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14327 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14328 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14329 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14330 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14332 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14334 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14335 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14336 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14337 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14339 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14340 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14341 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14343 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14344 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14345 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14346 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14348 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14349 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14350 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14352 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14353 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14355 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14356 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14358 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
14359 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
14360 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14362 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
14363 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
14364 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14366 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
14367 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
14369 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14370 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14371 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14372 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14374 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14375 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14376 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14377 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14379 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14380 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14382 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14384 /* Prescott New Instructions. */
14385 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14386 void_ftype_pcvoid_unsigned_unsigned,
14387 IX86_BUILTIN_MONITOR);
14388 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14389 void_ftype_unsigned_unsigned,
14390 IX86_BUILTIN_MWAIT);
14391 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14392 v4sf_ftype_v4sf,
14393 IX86_BUILTIN_MOVSHDUP);
14394 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14395 v4sf_ftype_v4sf,
14396 IX86_BUILTIN_MOVSLDUP);
14397 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14398 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14400 /* Access to the vec_init patterns. */
14401 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14402 integer_type_node, NULL_TREE);
14403 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14404 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14406 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14407 short_integer_type_node,
14408 short_integer_type_node,
14409 short_integer_type_node, NULL_TREE);
14410 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14411 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14413 ftype = build_function_type_list (V8QI_type_node, char_type_node,
14414 char_type_node, char_type_node,
14415 char_type_node, char_type_node,
14416 char_type_node, char_type_node,
14417 char_type_node, NULL_TREE);
14418 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14419 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14421 /* Access to the vec_extract patterns. */
14422 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14423 integer_type_node, NULL_TREE);
14424 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14425 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14427 ftype = build_function_type_list (long_long_integer_type_node,
14428 V2DI_type_node, integer_type_node,
14429 NULL_TREE);
14430 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14431 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14433 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14434 integer_type_node, NULL_TREE);
14435 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14436 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14438 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14439 integer_type_node, NULL_TREE);
14440 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14441 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14443 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14444 integer_type_node, NULL_TREE);
14445 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14446 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14448 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14449 integer_type_node, NULL_TREE);
14450 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14451 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14453 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14454 integer_type_node, NULL_TREE);
14455 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14456 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14458 /* Access to the vec_set patterns. */
14459 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14460 intHI_type_node,
14461 integer_type_node, NULL_TREE);
14462 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14463 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14465 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14466 intHI_type_node,
14467 integer_type_node, NULL_TREE);
14468 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14469 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14472 /* Errors in the source file can cause expand_expr to return const0_rtx
14473 where we expect a vector. To avoid crashing, use one of the vector
14474 clear instructions. */
14475 static rtx
14476 safe_vector_operand (rtx x, enum machine_mode mode)
14478 if (x == const0_rtx)
14479 x = CONST0_RTX (mode);
14480 return x;
14483 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14485 static rtx
14486 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14488 rtx pat, xops[3];
14489 tree arg0 = TREE_VALUE (arglist);
14490 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14491 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14492 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14493 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14494 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14495 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14497 if (VECTOR_MODE_P (mode0))
14498 op0 = safe_vector_operand (op0, mode0);
14499 if (VECTOR_MODE_P (mode1))
14500 op1 = safe_vector_operand (op1, mode1);
14502 if (optimize || !target
14503 || GET_MODE (target) != tmode
14504 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14505 target = gen_reg_rtx (tmode);
14507 if (GET_MODE (op1) == SImode && mode1 == TImode)
14509 rtx x = gen_reg_rtx (V4SImode);
14510 emit_insn (gen_sse2_loadd (x, op1));
14511 op1 = gen_lowpart (TImode, x);
14514 /* The insn must want input operands in the same modes as the
14515 result. */
14516 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
14517 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
14519 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14520 op0 = copy_to_mode_reg (mode0, op0);
14521 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14522 op1 = copy_to_mode_reg (mode1, op1);
14524 /* ??? Using ix86_fixup_binary_operands is problematic when
14525 we've got mismatched modes. Fake it. */
14527 xops[0] = target;
14528 xops[1] = op0;
14529 xops[2] = op1;
14531 if (tmode == mode0 && tmode == mode1)
14533 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14534 op0 = xops[1];
14535 op1 = xops[2];
14537 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14539 op0 = force_reg (mode0, op0);
14540 op1 = force_reg (mode1, op1);
14541 target = gen_reg_rtx (tmode);
14544 pat = GEN_FCN (icode) (target, op0, op1);
14545 if (! pat)
14546 return 0;
14547 emit_insn (pat);
14548 return target;
14551 /* Subroutine of ix86_expand_builtin to take care of stores. */
14553 static rtx
14554 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14556 rtx pat;
14557 tree arg0 = TREE_VALUE (arglist);
14558 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14559 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14560 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14561 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14562 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14564 if (VECTOR_MODE_P (mode1))
14565 op1 = safe_vector_operand (op1, mode1);
14567 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14568 op1 = copy_to_mode_reg (mode1, op1);
14570 pat = GEN_FCN (icode) (op0, op1);
14571 if (pat)
14572 emit_insn (pat);
14573 return 0;
14576 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14578 static rtx
14579 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14580 rtx target, int do_load)
14582 rtx pat;
14583 tree arg0 = TREE_VALUE (arglist);
14584 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14585 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14586 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14588 if (optimize || !target
14589 || GET_MODE (target) != tmode
14590 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14591 target = gen_reg_rtx (tmode);
14592 if (do_load)
14593 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14594 else
14596 if (VECTOR_MODE_P (mode0))
14597 op0 = safe_vector_operand (op0, mode0);
14599 if ((optimize && !register_operand (op0, mode0))
14600 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14601 op0 = copy_to_mode_reg (mode0, op0);
14604 pat = GEN_FCN (icode) (target, op0);
14605 if (! pat)
14606 return 0;
14607 emit_insn (pat);
14608 return target;
14611 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14612 sqrtss, rsqrtss, rcpss. */
14614 static rtx
14615 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14617 rtx pat;
14618 tree arg0 = TREE_VALUE (arglist);
14619 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14620 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14621 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14623 if (optimize || !target
14624 || GET_MODE (target) != tmode
14625 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14626 target = gen_reg_rtx (tmode);
14628 if (VECTOR_MODE_P (mode0))
14629 op0 = safe_vector_operand (op0, mode0);
14631 if ((optimize && !register_operand (op0, mode0))
14632 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14633 op0 = copy_to_mode_reg (mode0, op0);
14635 op1 = op0;
14636 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14637 op1 = copy_to_mode_reg (mode0, op1);
14639 pat = GEN_FCN (icode) (target, op0, op1);
14640 if (! pat)
14641 return 0;
14642 emit_insn (pat);
14643 return target;
14646 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14648 static rtx
14649 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14650 rtx target)
14652 rtx pat;
14653 tree arg0 = TREE_VALUE (arglist);
14654 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14655 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14656 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14657 rtx op2;
14658 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14659 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14660 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14661 enum rtx_code comparison = d->comparison;
14663 if (VECTOR_MODE_P (mode0))
14664 op0 = safe_vector_operand (op0, mode0);
14665 if (VECTOR_MODE_P (mode1))
14666 op1 = safe_vector_operand (op1, mode1);
14668 /* Swap operands if we have a comparison that isn't available in
14669 hardware. */
14670 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14672 rtx tmp = gen_reg_rtx (mode1);
14673 emit_move_insn (tmp, op1);
14674 op1 = op0;
14675 op0 = tmp;
14678 if (optimize || !target
14679 || GET_MODE (target) != tmode
14680 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14681 target = gen_reg_rtx (tmode);
14683 if ((optimize && !register_operand (op0, mode0))
14684 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14685 op0 = copy_to_mode_reg (mode0, op0);
14686 if ((optimize && !register_operand (op1, mode1))
14687 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14688 op1 = copy_to_mode_reg (mode1, op1);
14690 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14691 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14692 if (! pat)
14693 return 0;
14694 emit_insn (pat);
14695 return target;
14698 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14700 static rtx
14701 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14702 rtx target)
14704 rtx pat;
14705 tree arg0 = TREE_VALUE (arglist);
14706 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14707 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14708 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14709 rtx op2;
14710 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14711 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14712 enum rtx_code comparison = d->comparison;
14714 if (VECTOR_MODE_P (mode0))
14715 op0 = safe_vector_operand (op0, mode0);
14716 if (VECTOR_MODE_P (mode1))
14717 op1 = safe_vector_operand (op1, mode1);
14719 /* Swap operands if we have a comparison that isn't available in
14720 hardware. */
14721 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14723 rtx tmp = op1;
14724 op1 = op0;
14725 op0 = tmp;
14728 target = gen_reg_rtx (SImode);
14729 emit_move_insn (target, const0_rtx);
14730 target = gen_rtx_SUBREG (QImode, target, 0);
14732 if ((optimize && !register_operand (op0, mode0))
14733 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14734 op0 = copy_to_mode_reg (mode0, op0);
14735 if ((optimize && !register_operand (op1, mode1))
14736 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14737 op1 = copy_to_mode_reg (mode1, op1);
14739 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14740 pat = GEN_FCN (d->icode) (op0, op1);
14741 if (! pat)
14742 return 0;
14743 emit_insn (pat);
14744 emit_insn (gen_rtx_SET (VOIDmode,
14745 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14746 gen_rtx_fmt_ee (comparison, QImode,
14747 SET_DEST (pat),
14748 const0_rtx)));
14750 return SUBREG_REG (target);
14753 /* Return the integer constant in ARG. Constrain it to be in the range
14754 of the subparts of VEC_TYPE; issue an error if not. */
14756 static int
14757 get_element_number (tree vec_type, tree arg)
14759 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14761 if (!host_integerp (arg, 1)
14762 || (elt = tree_low_cst (arg, 1), elt > max))
14764 error ("selector must be an integer constant in the range 0..%wi", max);
14765 return 0;
14768 return elt;
14771 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14772 ix86_expand_vector_init. We DO have language-level syntax for this, in
14773 the form of (type){ init-list }. Except that since we can't place emms
14774 instructions from inside the compiler, we can't allow the use of MMX
14775 registers unless the user explicitly asks for it. So we do *not* define
14776 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14777 we have builtins invoked by mmintrin.h that gives us license to emit
14778 these sorts of instructions. */
14780 static rtx
14781 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14783 enum machine_mode tmode = TYPE_MODE (type);
14784 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14785 int i, n_elt = GET_MODE_NUNITS (tmode);
14786 rtvec v = rtvec_alloc (n_elt);
14788 gcc_assert (VECTOR_MODE_P (tmode));
14790 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14792 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14793 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14796 gcc_assert (arglist == NULL);
14798 if (!target || !register_operand (target, tmode))
14799 target = gen_reg_rtx (tmode);
14801 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14802 return target;
14805 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14806 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14807 had a language-level syntax for referencing vector elements. */
14809 static rtx
14810 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14812 enum machine_mode tmode, mode0;
14813 tree arg0, arg1;
14814 int elt;
14815 rtx op0;
14817 arg0 = TREE_VALUE (arglist);
14818 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14820 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14821 elt = get_element_number (TREE_TYPE (arg0), arg1);
14823 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14824 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14825 gcc_assert (VECTOR_MODE_P (mode0));
14827 op0 = force_reg (mode0, op0);
14829 if (optimize || !target || !register_operand (target, tmode))
14830 target = gen_reg_rtx (tmode);
14832 ix86_expand_vector_extract (true, target, op0, elt);
14834 return target;
14837 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14838 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14839 a language-level syntax for referencing vector elements. */
14841 static rtx
14842 ix86_expand_vec_set_builtin (tree arglist)
14844 enum machine_mode tmode, mode1;
14845 tree arg0, arg1, arg2;
14846 int elt;
14847 rtx op0, op1;
14849 arg0 = TREE_VALUE (arglist);
14850 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14851 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14853 tmode = TYPE_MODE (TREE_TYPE (arg0));
14854 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14855 gcc_assert (VECTOR_MODE_P (tmode));
14857 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14858 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14859 elt = get_element_number (TREE_TYPE (arg0), arg2);
14861 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14862 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14864 op0 = force_reg (tmode, op0);
14865 op1 = force_reg (mode1, op1);
14867 ix86_expand_vector_set (true, op0, op1, elt);
14869 return op0;
14872 /* Expand an expression EXP that calls a built-in function,
14873 with result going to TARGET if that's convenient
14874 (and in mode MODE if that's convenient).
14875 SUBTARGET may be used as the target for computing one of EXP's operands.
14876 IGNORE is nonzero if the value is to be ignored. */
14878 static rtx
14879 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14880 enum machine_mode mode ATTRIBUTE_UNUSED,
14881 int ignore ATTRIBUTE_UNUSED)
14883 const struct builtin_description *d;
14884 size_t i;
14885 enum insn_code icode;
14886 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14887 tree arglist = TREE_OPERAND (exp, 1);
14888 tree arg0, arg1, arg2;
14889 rtx op0, op1, op2, pat;
14890 enum machine_mode tmode, mode0, mode1, mode2;
14891 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14893 switch (fcode)
14895 case IX86_BUILTIN_EMMS:
14896 emit_insn (gen_mmx_emms ());
14897 return 0;
14899 case IX86_BUILTIN_SFENCE:
14900 emit_insn (gen_sse_sfence ());
14901 return 0;
14903 case IX86_BUILTIN_MASKMOVQ:
14904 case IX86_BUILTIN_MASKMOVDQU:
14905 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14906 ? CODE_FOR_mmx_maskmovq
14907 : CODE_FOR_sse2_maskmovdqu);
14908 /* Note the arg order is different from the operand order. */
14909 arg1 = TREE_VALUE (arglist);
14910 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14911 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14912 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14913 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14914 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14915 mode0 = insn_data[icode].operand[0].mode;
14916 mode1 = insn_data[icode].operand[1].mode;
14917 mode2 = insn_data[icode].operand[2].mode;
14919 op0 = force_reg (Pmode, op0);
14920 op0 = gen_rtx_MEM (mode1, op0);
14922 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14923 op0 = copy_to_mode_reg (mode0, op0);
14924 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14925 op1 = copy_to_mode_reg (mode1, op1);
14926 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14927 op2 = copy_to_mode_reg (mode2, op2);
14928 pat = GEN_FCN (icode) (op0, op1, op2);
14929 if (! pat)
14930 return 0;
14931 emit_insn (pat);
14932 return 0;
14934 case IX86_BUILTIN_SQRTSS:
14935 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14936 case IX86_BUILTIN_RSQRTSS:
14937 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14938 case IX86_BUILTIN_RCPSS:
14939 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14941 case IX86_BUILTIN_LOADUPS:
14942 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14944 case IX86_BUILTIN_STOREUPS:
14945 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14947 case IX86_BUILTIN_LOADHPS:
14948 case IX86_BUILTIN_LOADLPS:
14949 case IX86_BUILTIN_LOADHPD:
14950 case IX86_BUILTIN_LOADLPD:
14951 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14952 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14953 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14954 : CODE_FOR_sse2_loadlpd);
14955 arg0 = TREE_VALUE (arglist);
14956 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14957 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14958 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14959 tmode = insn_data[icode].operand[0].mode;
14960 mode0 = insn_data[icode].operand[1].mode;
14961 mode1 = insn_data[icode].operand[2].mode;
14963 op0 = force_reg (mode0, op0);
14964 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14965 if (optimize || target == 0
14966 || GET_MODE (target) != tmode
14967 || !register_operand (target, tmode))
14968 target = gen_reg_rtx (tmode);
14969 pat = GEN_FCN (icode) (target, op0, op1);
14970 if (! pat)
14971 return 0;
14972 emit_insn (pat);
14973 return target;
14975 case IX86_BUILTIN_STOREHPS:
14976 case IX86_BUILTIN_STORELPS:
14977 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14978 : CODE_FOR_sse_storelps);
14979 arg0 = TREE_VALUE (arglist);
14980 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14981 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14982 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14983 mode0 = insn_data[icode].operand[0].mode;
14984 mode1 = insn_data[icode].operand[1].mode;
14986 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14987 op1 = force_reg (mode1, op1);
14989 pat = GEN_FCN (icode) (op0, op1);
14990 if (! pat)
14991 return 0;
14992 emit_insn (pat);
14993 return const0_rtx;
14995 case IX86_BUILTIN_MOVNTPS:
14996 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14997 case IX86_BUILTIN_MOVNTQ:
14998 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15000 case IX86_BUILTIN_LDMXCSR:
15001 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15002 target = assign_386_stack_local (SImode, SLOT_TEMP);
15003 emit_move_insn (target, op0);
15004 emit_insn (gen_sse_ldmxcsr (target));
15005 return 0;
15007 case IX86_BUILTIN_STMXCSR:
15008 target = assign_386_stack_local (SImode, SLOT_TEMP);
15009 emit_insn (gen_sse_stmxcsr (target));
15010 return copy_to_mode_reg (SImode, target);
15012 case IX86_BUILTIN_SHUFPS:
15013 case IX86_BUILTIN_SHUFPD:
15014 icode = (fcode == IX86_BUILTIN_SHUFPS
15015 ? CODE_FOR_sse_shufps
15016 : CODE_FOR_sse2_shufpd);
15017 arg0 = TREE_VALUE (arglist);
15018 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15019 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15020 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15021 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15022 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15023 tmode = insn_data[icode].operand[0].mode;
15024 mode0 = insn_data[icode].operand[1].mode;
15025 mode1 = insn_data[icode].operand[2].mode;
15026 mode2 = insn_data[icode].operand[3].mode;
15028 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15029 op0 = copy_to_mode_reg (mode0, op0);
15030 if ((optimize && !register_operand (op1, mode1))
15031 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15032 op1 = copy_to_mode_reg (mode1, op1);
15033 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15035 /* @@@ better error message */
15036 error ("mask must be an immediate");
15037 return gen_reg_rtx (tmode);
15039 if (optimize || target == 0
15040 || GET_MODE (target) != tmode
15041 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15042 target = gen_reg_rtx (tmode);
15043 pat = GEN_FCN (icode) (target, op0, op1, op2);
15044 if (! pat)
15045 return 0;
15046 emit_insn (pat);
15047 return target;
15049 case IX86_BUILTIN_PSHUFW:
15050 case IX86_BUILTIN_PSHUFD:
15051 case IX86_BUILTIN_PSHUFHW:
15052 case IX86_BUILTIN_PSHUFLW:
15053 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15054 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15055 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15056 : CODE_FOR_mmx_pshufw);
15057 arg0 = TREE_VALUE (arglist);
15058 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15059 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15060 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15061 tmode = insn_data[icode].operand[0].mode;
15062 mode1 = insn_data[icode].operand[1].mode;
15063 mode2 = insn_data[icode].operand[2].mode;
15065 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15066 op0 = copy_to_mode_reg (mode1, op0);
15067 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15069 /* @@@ better error message */
15070 error ("mask must be an immediate");
15071 return const0_rtx;
15073 if (target == 0
15074 || GET_MODE (target) != tmode
15075 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15076 target = gen_reg_rtx (tmode);
15077 pat = GEN_FCN (icode) (target, op0, op1);
15078 if (! pat)
15079 return 0;
15080 emit_insn (pat);
15081 return target;
15083 case IX86_BUILTIN_PSLLDQI128:
15084 case IX86_BUILTIN_PSRLDQI128:
15085 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
15086 : CODE_FOR_sse2_lshrti3);
15087 arg0 = TREE_VALUE (arglist);
15088 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15089 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15090 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15091 tmode = insn_data[icode].operand[0].mode;
15092 mode1 = insn_data[icode].operand[1].mode;
15093 mode2 = insn_data[icode].operand[2].mode;
15095 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15097 op0 = copy_to_reg (op0);
15098 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
15100 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15102 error ("shift must be an immediate");
15103 return const0_rtx;
15105 target = gen_reg_rtx (V2DImode);
15106 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
15107 if (! pat)
15108 return 0;
15109 emit_insn (pat);
15110 return target;
15112 case IX86_BUILTIN_FEMMS:
15113 emit_insn (gen_mmx_femms ());
15114 return NULL_RTX;
15116 case IX86_BUILTIN_PAVGUSB:
15117 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
15119 case IX86_BUILTIN_PF2ID:
15120 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
15122 case IX86_BUILTIN_PFACC:
15123 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
15125 case IX86_BUILTIN_PFADD:
15126 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
15128 case IX86_BUILTIN_PFCMPEQ:
15129 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
15131 case IX86_BUILTIN_PFCMPGE:
15132 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
15134 case IX86_BUILTIN_PFCMPGT:
15135 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
15137 case IX86_BUILTIN_PFMAX:
15138 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
15140 case IX86_BUILTIN_PFMIN:
15141 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
15143 case IX86_BUILTIN_PFMUL:
15144 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
15146 case IX86_BUILTIN_PFRCP:
15147 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
15149 case IX86_BUILTIN_PFRCPIT1:
15150 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
15152 case IX86_BUILTIN_PFRCPIT2:
15153 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
15155 case IX86_BUILTIN_PFRSQIT1:
15156 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
15158 case IX86_BUILTIN_PFRSQRT:
15159 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
15161 case IX86_BUILTIN_PFSUB:
15162 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
15164 case IX86_BUILTIN_PFSUBR:
15165 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
15167 case IX86_BUILTIN_PI2FD:
15168 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
15170 case IX86_BUILTIN_PMULHRW:
15171 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
15173 case IX86_BUILTIN_PF2IW:
15174 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
15176 case IX86_BUILTIN_PFNACC:
15177 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
15179 case IX86_BUILTIN_PFPNACC:
15180 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
15182 case IX86_BUILTIN_PI2FW:
15183 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
15185 case IX86_BUILTIN_PSWAPDSI:
15186 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
15188 case IX86_BUILTIN_PSWAPDSF:
15189 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
15191 case IX86_BUILTIN_SQRTSD:
15192 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
15193 case IX86_BUILTIN_LOADUPD:
15194 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
15195 case IX86_BUILTIN_STOREUPD:
15196 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
15198 case IX86_BUILTIN_MFENCE:
15199 emit_insn (gen_sse2_mfence ());
15200 return 0;
15201 case IX86_BUILTIN_LFENCE:
15202 emit_insn (gen_sse2_lfence ());
15203 return 0;
15205 case IX86_BUILTIN_CLFLUSH:
15206 arg0 = TREE_VALUE (arglist);
15207 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15208 icode = CODE_FOR_sse2_clflush;
15209 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15210 op0 = copy_to_mode_reg (Pmode, op0);
15212 emit_insn (gen_sse2_clflush (op0));
15213 return 0;
15215 case IX86_BUILTIN_MOVNTPD:
15216 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
15217 case IX86_BUILTIN_MOVNTDQ:
15218 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
15219 case IX86_BUILTIN_MOVNTI:
15220 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
15222 case IX86_BUILTIN_LOADDQU:
15223 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
15224 case IX86_BUILTIN_STOREDQU:
15225 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
15227 case IX86_BUILTIN_MONITOR:
15228 arg0 = TREE_VALUE (arglist);
15229 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15230 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15231 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15232 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15233 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15234 if (!REG_P (op0))
15235 op0 = copy_to_mode_reg (SImode, op0);
15236 if (!REG_P (op1))
15237 op1 = copy_to_mode_reg (SImode, op1);
15238 if (!REG_P (op2))
15239 op2 = copy_to_mode_reg (SImode, op2);
15240 emit_insn (gen_sse3_monitor (op0, op1, op2));
15241 return 0;
15243 case IX86_BUILTIN_MWAIT:
15244 arg0 = TREE_VALUE (arglist);
15245 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15246 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15247 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15248 if (!REG_P (op0))
15249 op0 = copy_to_mode_reg (SImode, op0);
15250 if (!REG_P (op1))
15251 op1 = copy_to_mode_reg (SImode, op1);
15252 emit_insn (gen_sse3_mwait (op0, op1));
15253 return 0;
15255 case IX86_BUILTIN_LDDQU:
15256 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15257 target, 1);
15259 case IX86_BUILTIN_VEC_INIT_V2SI:
15260 case IX86_BUILTIN_VEC_INIT_V4HI:
15261 case IX86_BUILTIN_VEC_INIT_V8QI:
15262 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15264 case IX86_BUILTIN_VEC_EXT_V2DF:
15265 case IX86_BUILTIN_VEC_EXT_V2DI:
15266 case IX86_BUILTIN_VEC_EXT_V4SF:
15267 case IX86_BUILTIN_VEC_EXT_V4SI:
15268 case IX86_BUILTIN_VEC_EXT_V8HI:
15269 case IX86_BUILTIN_VEC_EXT_V2SI:
15270 case IX86_BUILTIN_VEC_EXT_V4HI:
15271 return ix86_expand_vec_ext_builtin (arglist, target);
15273 case IX86_BUILTIN_VEC_SET_V8HI:
15274 case IX86_BUILTIN_VEC_SET_V4HI:
15275 return ix86_expand_vec_set_builtin (arglist);
15277 default:
15278 break;
15281 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15282 if (d->code == fcode)
15284 /* Compares are treated specially. */
15285 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15286 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15287 || d->icode == CODE_FOR_sse2_maskcmpv2df3
15288 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15289 return ix86_expand_sse_compare (d, arglist, target);
15291 return ix86_expand_binop_builtin (d->icode, arglist, target);
15294 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15295 if (d->code == fcode)
15296 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15298 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15299 if (d->code == fcode)
15300 return ix86_expand_sse_comi (d, arglist, target);
15302 gcc_unreachable ();
15305 /* Store OPERAND to the memory after reload is completed. This means
15306 that we can't easily use assign_stack_local. */
15308 ix86_force_to_memory (enum machine_mode mode, rtx operand)
15310 rtx result;
15312 gcc_assert (reload_completed);
15313 if (TARGET_RED_ZONE)
15315 result = gen_rtx_MEM (mode,
15316 gen_rtx_PLUS (Pmode,
15317 stack_pointer_rtx,
15318 GEN_INT (-RED_ZONE_SIZE)));
15319 emit_move_insn (result, operand);
15321 else if (!TARGET_RED_ZONE && TARGET_64BIT)
15323 switch (mode)
15325 case HImode:
15326 case SImode:
15327 operand = gen_lowpart (DImode, operand);
15328 /* FALLTHRU */
15329 case DImode:
15330 emit_insn (
15331 gen_rtx_SET (VOIDmode,
15332 gen_rtx_MEM (DImode,
15333 gen_rtx_PRE_DEC (DImode,
15334 stack_pointer_rtx)),
15335 operand));
15336 break;
15337 default:
15338 gcc_unreachable ();
15340 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15342 else
15344 switch (mode)
15346 case DImode:
15348 rtx operands[2];
15349 split_di (&operand, 1, operands, operands + 1);
15350 emit_insn (
15351 gen_rtx_SET (VOIDmode,
15352 gen_rtx_MEM (SImode,
15353 gen_rtx_PRE_DEC (Pmode,
15354 stack_pointer_rtx)),
15355 operands[1]));
15356 emit_insn (
15357 gen_rtx_SET (VOIDmode,
15358 gen_rtx_MEM (SImode,
15359 gen_rtx_PRE_DEC (Pmode,
15360 stack_pointer_rtx)),
15361 operands[0]));
15363 break;
15364 case HImode:
15365 /* It is better to store HImodes as SImodes. */
15366 if (!TARGET_PARTIAL_REG_STALL)
15367 operand = gen_lowpart (SImode, operand);
15368 /* FALLTHRU */
15369 case SImode:
15370 emit_insn (
15371 gen_rtx_SET (VOIDmode,
15372 gen_rtx_MEM (GET_MODE (operand),
15373 gen_rtx_PRE_DEC (SImode,
15374 stack_pointer_rtx)),
15375 operand));
15376 break;
15377 default:
15378 gcc_unreachable ();
15380 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15382 return result;
15385 /* Free operand from the memory. */
15386 void
15387 ix86_free_from_memory (enum machine_mode mode)
15389 if (!TARGET_RED_ZONE)
15391 int size;
15393 if (mode == DImode || TARGET_64BIT)
15394 size = 8;
15395 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
15396 size = 2;
15397 else
15398 size = 4;
15399 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15400 to pop or add instruction if registers are available. */
15401 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15402 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15403 GEN_INT (size))));
15407 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15408 QImode must go into class Q_REGS.
15409 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
15410 movdf to do mem-to-mem moves through integer regs. */
15411 enum reg_class
15412 ix86_preferred_reload_class (rtx x, enum reg_class class)
15414 /* We're only allowed to return a subclass of CLASS. Many of the
15415 following checks fail for NO_REGS, so eliminate that early. */
15416 if (class == NO_REGS)
15417 return NO_REGS;
15419 /* All classes can load zeros. */
15420 if (x == CONST0_RTX (GET_MODE (x)))
15421 return class;
15423 /* Floating-point constants need more complex checks. */
15424 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
15426 /* General regs can load everything. */
15427 if (reg_class_subset_p (class, GENERAL_REGS))
15428 return class;
15430 /* Floats can load 0 and 1 plus some others. Note that we eliminated
15431 zero above. We only want to wind up preferring 80387 registers if
15432 we plan on doing computation with them. */
15433 if (TARGET_80387
15434 && (TARGET_MIX_SSE_I387
15435 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
15436 && standard_80387_constant_p (x))
15438 /* Limit class to non-sse. */
15439 if (class == FLOAT_SSE_REGS)
15440 return FLOAT_REGS;
15441 if (class == FP_TOP_SSE_REGS)
15442 return FP_TOP_REG;
15443 if (class == FP_SECOND_SSE_REGS)
15444 return FP_SECOND_REG;
15445 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
15446 return class;
15449 return NO_REGS;
15451 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
15452 return NO_REGS;
15453 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
15454 return NO_REGS;
15456 /* Generally when we see PLUS here, it's the function invariant
15457 (plus soft-fp const_int). Which can only be computed into general
15458 regs. */
15459 if (GET_CODE (x) == PLUS)
15460 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
15462 /* QImode constants are easy to load, but non-constant QImode data
15463 must go into Q_REGS. */
15464 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15466 if (reg_class_subset_p (class, Q_REGS))
15467 return class;
15468 if (reg_class_subset_p (Q_REGS, class))
15469 return Q_REGS;
15470 return NO_REGS;
15473 return class;
15476 /* If we are copying between general and FP registers, we need a memory
15477 location. The same is true for SSE and MMX registers.
15479 The macro can't work reliably when one of the CLASSES is class containing
15480 registers from multiple units (SSE, MMX, integer). We avoid this by never
15481 combining those units in single alternative in the machine description.
15482 Ensure that this constraint holds to avoid unexpected surprises.
15484 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15485 enforce these sanity checks. */
15488 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15489 enum machine_mode mode, int strict)
15491 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15492 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15493 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15494 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15495 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15496 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15498 gcc_assert (!strict);
15499 return true;
15502 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15503 return true;
15505 /* ??? This is a lie. We do have moves between mmx/general, and for
15506 mmx/sse2. But by saying we need secondary memory we discourage the
15507 register allocator from using the mmx registers unless needed. */
15508 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15509 return true;
15511 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15513 /* SSE1 doesn't have any direct moves from other classes. */
15514 if (!TARGET_SSE2)
15515 return true;
15517 /* If the target says that inter-unit moves are more expensive
15518 than moving through memory, then don't generate them. */
15519 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15520 return true;
15522 /* Between SSE and general, we have moves no larger than word size. */
15523 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15524 return true;
15526 /* ??? For the cost of one register reformat penalty, we could use
15527 the same instructions to move SFmode and DFmode data, but the
15528 relevant move patterns don't support those alternatives. */
15529 if (mode == SFmode || mode == DFmode)
15530 return true;
15533 return false;
15536 /* Return true if the registers in CLASS cannot represent the change from
15537 modes FROM to TO. */
15539 bool
15540 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
15541 enum reg_class class)
15543 if (from == to)
15544 return false;
15546 /* x87 registers can't do subreg at all, as all values are reformatted
15547 to extended precision. */
15548 if (MAYBE_FLOAT_CLASS_P (class))
15549 return true;
15551 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
15553 /* Vector registers do not support QI or HImode loads. If we don't
15554 disallow a change to these modes, reload will assume it's ok to
15555 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
15556 the vec_dupv4hi pattern. */
15557 if (GET_MODE_SIZE (from) < 4)
15558 return true;
15560 /* Vector registers do not support subreg with nonzero offsets, which
15561 are otherwise valid for integer registers. Since we can't see
15562 whether we have a nonzero offset from here, prohibit all
15563 nonparadoxical subregs changing size. */
15564 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
15565 return true;
15568 return false;
15571 /* Return the cost of moving data from a register in class CLASS1 to
15572 one in class CLASS2.
15574 It is not required that the cost always equal 2 when FROM is the same as TO;
15575 on some machines it is expensive to move between registers if they are not
15576 general registers. */
15579 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15580 enum reg_class class2)
15582 /* In case we require secondary memory, compute cost of the store followed
15583 by load. In order to avoid bad register allocation choices, we need
15584 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15586 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15588 int cost = 1;
15590 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15591 MEMORY_MOVE_COST (mode, class1, 1));
15592 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15593 MEMORY_MOVE_COST (mode, class2, 1));
15595 /* In case of copying from general_purpose_register we may emit multiple
15596 stores followed by single load causing memory size mismatch stall.
15597 Count this as arbitrarily high cost of 20. */
15598 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15599 cost += 20;
15601 /* In the case of FP/MMX moves, the registers actually overlap, and we
15602 have to switch modes in order to treat them differently. */
15603 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15604 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15605 cost += 20;
15607 return cost;
15610 /* Moves between SSE/MMX and integer unit are expensive. */
15611 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15612 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15613 return ix86_cost->mmxsse_to_integer;
15614 if (MAYBE_FLOAT_CLASS_P (class1))
15615 return ix86_cost->fp_move;
15616 if (MAYBE_SSE_CLASS_P (class1))
15617 return ix86_cost->sse_move;
15618 if (MAYBE_MMX_CLASS_P (class1))
15619 return ix86_cost->mmx_move;
15620 return 2;
15623 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15625 bool
15626 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15628 /* Flags and only flags can only hold CCmode values. */
15629 if (CC_REGNO_P (regno))
15630 return GET_MODE_CLASS (mode) == MODE_CC;
15631 if (GET_MODE_CLASS (mode) == MODE_CC
15632 || GET_MODE_CLASS (mode) == MODE_RANDOM
15633 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15634 return 0;
15635 if (FP_REGNO_P (regno))
15636 return VALID_FP_MODE_P (mode);
15637 if (SSE_REGNO_P (regno))
15639 /* We implement the move patterns for all vector modes into and
15640 out of SSE registers, even when no operation instructions
15641 are available. */
15642 return (VALID_SSE_REG_MODE (mode)
15643 || VALID_SSE2_REG_MODE (mode)
15644 || VALID_MMX_REG_MODE (mode)
15645 || VALID_MMX_REG_MODE_3DNOW (mode));
15647 if (MMX_REGNO_P (regno))
15649 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15650 so if the register is available at all, then we can move data of
15651 the given mode into or out of it. */
15652 return (VALID_MMX_REG_MODE (mode)
15653 || VALID_MMX_REG_MODE_3DNOW (mode));
15656 if (mode == QImode)
15658 /* Take care for QImode values - they can be in non-QI regs,
15659 but then they do cause partial register stalls. */
15660 if (regno < 4 || TARGET_64BIT)
15661 return 1;
15662 if (!TARGET_PARTIAL_REG_STALL)
15663 return 1;
15664 return reload_in_progress || reload_completed;
15666 /* We handle both integer and floats in the general purpose registers. */
15667 else if (VALID_INT_MODE_P (mode))
15668 return 1;
15669 else if (VALID_FP_MODE_P (mode))
15670 return 1;
15671 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15672 on to use that value in smaller contexts, this can easily force a
15673 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15674 supporting DImode, allow it. */
15675 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15676 return 1;
15678 return 0;
15681 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15682 tieable integer mode. */
15684 static bool
15685 ix86_tieable_integer_mode_p (enum machine_mode mode)
15687 switch (mode)
15689 case HImode:
15690 case SImode:
15691 return true;
15693 case QImode:
15694 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15696 case DImode:
15697 return TARGET_64BIT;
15699 default:
15700 return false;
15704 /* Return true if MODE1 is accessible in a register that can hold MODE2
15705 without copying. That is, all register classes that can hold MODE2
15706 can also hold MODE1. */
15708 bool
15709 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15711 if (mode1 == mode2)
15712 return true;
15714 if (ix86_tieable_integer_mode_p (mode1)
15715 && ix86_tieable_integer_mode_p (mode2))
15716 return true;
15718 /* MODE2 being XFmode implies fp stack or general regs, which means we
15719 can tie any smaller floating point modes to it. Note that we do not
15720 tie this with TFmode. */
15721 if (mode2 == XFmode)
15722 return mode1 == SFmode || mode1 == DFmode;
15724 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15725 that we can tie it with SFmode. */
15726 if (mode2 == DFmode)
15727 return mode1 == SFmode;
15729 /* If MODE2 is only appropriate for an SSE register, then tie with
15730 any other mode acceptable to SSE registers. */
15731 if (GET_MODE_SIZE (mode2) >= 8
15732 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
15733 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15735 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15736 with any other mode acceptable to MMX registers. */
15737 if (GET_MODE_SIZE (mode2) == 8
15738 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
15739 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15741 return false;
15744 /* Return the cost of moving data of mode M between a
15745 register and memory. A value of 2 is the default; this cost is
15746 relative to those in `REGISTER_MOVE_COST'.
15748 If moving between registers and memory is more expensive than
15749 between two registers, you should define this macro to express the
15750 relative cost.
15752 Model also increased moving costs of QImode registers in non
15753 Q_REGS classes.
15756 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15758 if (FLOAT_CLASS_P (class))
15760 int index;
15761 switch (mode)
15763 case SFmode:
15764 index = 0;
15765 break;
15766 case DFmode:
15767 index = 1;
15768 break;
15769 case XFmode:
15770 index = 2;
15771 break;
15772 default:
15773 return 100;
15775 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15777 if (SSE_CLASS_P (class))
15779 int index;
15780 switch (GET_MODE_SIZE (mode))
15782 case 4:
15783 index = 0;
15784 break;
15785 case 8:
15786 index = 1;
15787 break;
15788 case 16:
15789 index = 2;
15790 break;
15791 default:
15792 return 100;
15794 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15796 if (MMX_CLASS_P (class))
15798 int index;
15799 switch (GET_MODE_SIZE (mode))
15801 case 4:
15802 index = 0;
15803 break;
15804 case 8:
15805 index = 1;
15806 break;
15807 default:
15808 return 100;
15810 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15812 switch (GET_MODE_SIZE (mode))
15814 case 1:
15815 if (in)
15816 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15817 : ix86_cost->movzbl_load);
15818 else
15819 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15820 : ix86_cost->int_store[0] + 4);
15821 break;
15822 case 2:
15823 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15824 default:
15825 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15826 if (mode == TFmode)
15827 mode = XFmode;
15828 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15829 * (((int) GET_MODE_SIZE (mode)
15830 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15834 /* Compute a (partial) cost for rtx X. Return true if the complete
15835 cost has been computed, and false if subexpressions should be
15836 scanned. In either case, *TOTAL contains the cost result. */
15838 static bool
15839 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15841 enum machine_mode mode = GET_MODE (x);
15843 switch (code)
15845 case CONST_INT:
15846 case CONST:
15847 case LABEL_REF:
15848 case SYMBOL_REF:
15849 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15850 *total = 3;
15851 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15852 *total = 2;
15853 else if (flag_pic && SYMBOLIC_CONST (x)
15854 && (!TARGET_64BIT
15855 || (!GET_CODE (x) != LABEL_REF
15856 && (GET_CODE (x) != SYMBOL_REF
15857 || !SYMBOL_REF_LOCAL_P (x)))))
15858 *total = 1;
15859 else
15860 *total = 0;
15861 return true;
15863 case CONST_DOUBLE:
15864 if (mode == VOIDmode)
15865 *total = 0;
15866 else
15867 switch (standard_80387_constant_p (x))
15869 case 1: /* 0.0 */
15870 *total = 1;
15871 break;
15872 default: /* Other constants */
15873 *total = 2;
15874 break;
15875 case 0:
15876 case -1:
15877 /* Start with (MEM (SYMBOL_REF)), since that's where
15878 it'll probably end up. Add a penalty for size. */
15879 *total = (COSTS_N_INSNS (1)
15880 + (flag_pic != 0 && !TARGET_64BIT)
15881 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15882 break;
15884 return true;
15886 case ZERO_EXTEND:
15887 /* The zero extensions is often completely free on x86_64, so make
15888 it as cheap as possible. */
15889 if (TARGET_64BIT && mode == DImode
15890 && GET_MODE (XEXP (x, 0)) == SImode)
15891 *total = 1;
15892 else if (TARGET_ZERO_EXTEND_WITH_AND)
15893 *total = COSTS_N_INSNS (ix86_cost->add);
15894 else
15895 *total = COSTS_N_INSNS (ix86_cost->movzx);
15896 return false;
15898 case SIGN_EXTEND:
15899 *total = COSTS_N_INSNS (ix86_cost->movsx);
15900 return false;
15902 case ASHIFT:
15903 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15904 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15906 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15907 if (value == 1)
15909 *total = COSTS_N_INSNS (ix86_cost->add);
15910 return false;
15912 if ((value == 2 || value == 3)
15913 && ix86_cost->lea <= ix86_cost->shift_const)
15915 *total = COSTS_N_INSNS (ix86_cost->lea);
15916 return false;
15919 /* FALLTHRU */
15921 case ROTATE:
15922 case ASHIFTRT:
15923 case LSHIFTRT:
15924 case ROTATERT:
15925 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15927 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15929 if (INTVAL (XEXP (x, 1)) > 32)
15930 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15931 else
15932 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15934 else
15936 if (GET_CODE (XEXP (x, 1)) == AND)
15937 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15938 else
15939 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15942 else
15944 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15945 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15946 else
15947 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15949 return false;
15951 case MULT:
15952 if (FLOAT_MODE_P (mode))
15954 *total = COSTS_N_INSNS (ix86_cost->fmul);
15955 return false;
15957 else
15959 rtx op0 = XEXP (x, 0);
15960 rtx op1 = XEXP (x, 1);
15961 int nbits;
15962 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15964 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15965 for (nbits = 0; value != 0; value &= value - 1)
15966 nbits++;
15968 else
15969 /* This is arbitrary. */
15970 nbits = 7;
15972 /* Compute costs correctly for widening multiplication. */
15973 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15974 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15975 == GET_MODE_SIZE (mode))
15977 int is_mulwiden = 0;
15978 enum machine_mode inner_mode = GET_MODE (op0);
15980 if (GET_CODE (op0) == GET_CODE (op1))
15981 is_mulwiden = 1, op1 = XEXP (op1, 0);
15982 else if (GET_CODE (op1) == CONST_INT)
15984 if (GET_CODE (op0) == SIGN_EXTEND)
15985 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15986 == INTVAL (op1);
15987 else
15988 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15991 if (is_mulwiden)
15992 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15995 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15996 + nbits * ix86_cost->mult_bit)
15997 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15999 return true;
16002 case DIV:
16003 case UDIV:
16004 case MOD:
16005 case UMOD:
16006 if (FLOAT_MODE_P (mode))
16007 *total = COSTS_N_INSNS (ix86_cost->fdiv);
16008 else
16009 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
16010 return false;
16012 case PLUS:
16013 if (FLOAT_MODE_P (mode))
16014 *total = COSTS_N_INSNS (ix86_cost->fadd);
16015 else if (GET_MODE_CLASS (mode) == MODE_INT
16016 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16018 if (GET_CODE (XEXP (x, 0)) == PLUS
16019 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16020 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16021 && CONSTANT_P (XEXP (x, 1)))
16023 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16024 if (val == 2 || val == 4 || val == 8)
16026 *total = COSTS_N_INSNS (ix86_cost->lea);
16027 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16028 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16029 outer_code);
16030 *total += rtx_cost (XEXP (x, 1), outer_code);
16031 return true;
16034 else if (GET_CODE (XEXP (x, 0)) == MULT
16035 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16037 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16038 if (val == 2 || val == 4 || val == 8)
16040 *total = COSTS_N_INSNS (ix86_cost->lea);
16041 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16042 *total += rtx_cost (XEXP (x, 1), outer_code);
16043 return true;
16046 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16048 *total = COSTS_N_INSNS (ix86_cost->lea);
16049 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16050 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16051 *total += rtx_cost (XEXP (x, 1), outer_code);
16052 return true;
16055 /* FALLTHRU */
16057 case MINUS:
16058 if (FLOAT_MODE_P (mode))
16060 *total = COSTS_N_INSNS (ix86_cost->fadd);
16061 return false;
16063 /* FALLTHRU */
16065 case AND:
16066 case IOR:
16067 case XOR:
16068 if (!TARGET_64BIT && mode == DImode)
16070 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
16071 + (rtx_cost (XEXP (x, 0), outer_code)
16072 << (GET_MODE (XEXP (x, 0)) != DImode))
16073 + (rtx_cost (XEXP (x, 1), outer_code)
16074 << (GET_MODE (XEXP (x, 1)) != DImode)));
16075 return true;
16077 /* FALLTHRU */
16079 case NEG:
16080 if (FLOAT_MODE_P (mode))
16082 *total = COSTS_N_INSNS (ix86_cost->fchs);
16083 return false;
16085 /* FALLTHRU */
16087 case NOT:
16088 if (!TARGET_64BIT && mode == DImode)
16089 *total = COSTS_N_INSNS (ix86_cost->add * 2);
16090 else
16091 *total = COSTS_N_INSNS (ix86_cost->add);
16092 return false;
16094 case COMPARE:
16095 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
16096 && XEXP (XEXP (x, 0), 1) == const1_rtx
16097 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
16098 && XEXP (x, 1) == const0_rtx)
16100 /* This kind of construct is implemented using test[bwl].
16101 Treat it as if we had an AND. */
16102 *total = (COSTS_N_INSNS (ix86_cost->add)
16103 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
16104 + rtx_cost (const1_rtx, outer_code));
16105 return true;
16107 return false;
16109 case FLOAT_EXTEND:
16110 if (!TARGET_SSE_MATH
16111 || mode == XFmode
16112 || (mode == DFmode && !TARGET_SSE2))
16113 *total = 0;
16114 return false;
16116 case ABS:
16117 if (FLOAT_MODE_P (mode))
16118 *total = COSTS_N_INSNS (ix86_cost->fabs);
16119 return false;
16121 case SQRT:
16122 if (FLOAT_MODE_P (mode))
16123 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
16124 return false;
16126 case UNSPEC:
16127 if (XINT (x, 1) == UNSPEC_TP)
16128 *total = 0;
16129 return false;
16131 default:
16132 return false;
16136 #if TARGET_MACHO
16138 static int current_machopic_label_num;
16140 /* Given a symbol name and its associated stub, write out the
16141 definition of the stub. */
16143 void
16144 machopic_output_stub (FILE *file, const char *symb, const char *stub)
16146 unsigned int length;
16147 char *binder_name, *symbol_name, lazy_ptr_name[32];
16148 int label = ++current_machopic_label_num;
16150 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
16151 symb = (*targetm.strip_name_encoding) (symb);
16153 length = strlen (stub);
16154 binder_name = alloca (length + 32);
16155 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
16157 length = strlen (symb);
16158 symbol_name = alloca (length + 32);
16159 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
16161 sprintf (lazy_ptr_name, "L%d$lz", label);
16163 if (MACHOPIC_PURE)
16164 machopic_picsymbol_stub_section ();
16165 else
16166 machopic_symbol_stub_section ();
16168 fprintf (file, "%s:\n", stub);
16169 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16171 if (MACHOPIC_PURE)
16173 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
16174 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
16175 fprintf (file, "\tjmp %%edx\n");
16177 else
16178 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
16180 fprintf (file, "%s:\n", binder_name);
16182 if (MACHOPIC_PURE)
16184 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
16185 fprintf (file, "\tpushl %%eax\n");
16187 else
16188 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
16190 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
16192 machopic_lazy_symbol_ptr_section ();
16193 fprintf (file, "%s:\n", lazy_ptr_name);
16194 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16195 fprintf (file, "\t.long %s\n", binder_name);
16197 #endif /* TARGET_MACHO */
16199 /* Order the registers for register allocator. */
16201 void
16202 x86_order_regs_for_local_alloc (void)
16204 int pos = 0;
16205 int i;
16207 /* First allocate the local general purpose registers. */
16208 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16209 if (GENERAL_REGNO_P (i) && call_used_regs[i])
16210 reg_alloc_order [pos++] = i;
16212 /* Global general purpose registers. */
16213 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16214 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
16215 reg_alloc_order [pos++] = i;
16217 /* x87 registers come first in case we are doing FP math
16218 using them. */
16219 if (!TARGET_SSE_MATH)
16220 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16221 reg_alloc_order [pos++] = i;
16223 /* SSE registers. */
16224 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16225 reg_alloc_order [pos++] = i;
16226 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16227 reg_alloc_order [pos++] = i;
16229 /* x87 registers. */
16230 if (TARGET_SSE_MATH)
16231 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16232 reg_alloc_order [pos++] = i;
16234 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16235 reg_alloc_order [pos++] = i;
16237 /* Initialize the rest of array as we do not allocate some registers
16238 at all. */
16239 while (pos < FIRST_PSEUDO_REGISTER)
16240 reg_alloc_order [pos++] = 0;
16243 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16244 struct attribute_spec.handler. */
16245 static tree
16246 ix86_handle_struct_attribute (tree *node, tree name,
16247 tree args ATTRIBUTE_UNUSED,
16248 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16250 tree *type = NULL;
16251 if (DECL_P (*node))
16253 if (TREE_CODE (*node) == TYPE_DECL)
16254 type = &TREE_TYPE (*node);
16256 else
16257 type = node;
16259 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16260 || TREE_CODE (*type) == UNION_TYPE)))
16262 warning (OPT_Wattributes, "%qs attribute ignored",
16263 IDENTIFIER_POINTER (name));
16264 *no_add_attrs = true;
16267 else if ((is_attribute_p ("ms_struct", name)
16268 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16269 || ((is_attribute_p ("gcc_struct", name)
16270 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16272 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
16273 IDENTIFIER_POINTER (name));
16274 *no_add_attrs = true;
16277 return NULL_TREE;
16280 static bool
16281 ix86_ms_bitfield_layout_p (tree record_type)
16283 return (TARGET_MS_BITFIELD_LAYOUT &&
16284 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16285 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16288 /* Returns an expression indicating where the this parameter is
16289 located on entry to the FUNCTION. */
16291 static rtx
16292 x86_this_parameter (tree function)
16294 tree type = TREE_TYPE (function);
16296 if (TARGET_64BIT)
16298 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16299 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16302 if (ix86_function_regparm (type, function) > 0)
16304 tree parm;
16306 parm = TYPE_ARG_TYPES (type);
16307 /* Figure out whether or not the function has a variable number of
16308 arguments. */
16309 for (; parm; parm = TREE_CHAIN (parm))
16310 if (TREE_VALUE (parm) == void_type_node)
16311 break;
16312 /* If not, the this parameter is in the first argument. */
16313 if (parm)
16315 int regno = 0;
16316 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16317 regno = 2;
16318 return gen_rtx_REG (SImode, regno);
16322 if (aggregate_value_p (TREE_TYPE (type), type))
16323 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16324 else
16325 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16328 /* Determine whether x86_output_mi_thunk can succeed. */
16330 static bool
16331 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16332 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16333 HOST_WIDE_INT vcall_offset, tree function)
16335 /* 64-bit can handle anything. */
16336 if (TARGET_64BIT)
16337 return true;
16339 /* For 32-bit, everything's fine if we have one free register. */
16340 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16341 return true;
16343 /* Need a free register for vcall_offset. */
16344 if (vcall_offset)
16345 return false;
16347 /* Need a free register for GOT references. */
16348 if (flag_pic && !(*targetm.binds_local_p) (function))
16349 return false;
16351 /* Otherwise ok. */
16352 return true;
16355 /* Output the assembler code for a thunk function. THUNK_DECL is the
16356 declaration for the thunk function itself, FUNCTION is the decl for
16357 the target function. DELTA is an immediate constant offset to be
16358 added to THIS. If VCALL_OFFSET is nonzero, the word at
16359 *(*this + vcall_offset) should be added to THIS. */
16361 static void
16362 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16363 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16364 HOST_WIDE_INT vcall_offset, tree function)
16366 rtx xops[3];
16367 rtx this = x86_this_parameter (function);
16368 rtx this_reg, tmp;
16370 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16371 pull it in now and let DELTA benefit. */
16372 if (REG_P (this))
16373 this_reg = this;
16374 else if (vcall_offset)
16376 /* Put the this parameter into %eax. */
16377 xops[0] = this;
16378 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16379 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16381 else
16382 this_reg = NULL_RTX;
16384 /* Adjust the this parameter by a fixed constant. */
16385 if (delta)
16387 xops[0] = GEN_INT (delta);
16388 xops[1] = this_reg ? this_reg : this;
16389 if (TARGET_64BIT)
16391 if (!x86_64_general_operand (xops[0], DImode))
16393 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16394 xops[1] = tmp;
16395 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16396 xops[0] = tmp;
16397 xops[1] = this;
16399 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16401 else
16402 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16405 /* Adjust the this parameter by a value stored in the vtable. */
16406 if (vcall_offset)
16408 if (TARGET_64BIT)
16409 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16410 else
16412 int tmp_regno = 2 /* ECX */;
16413 if (lookup_attribute ("fastcall",
16414 TYPE_ATTRIBUTES (TREE_TYPE (function))))
16415 tmp_regno = 0 /* EAX */;
16416 tmp = gen_rtx_REG (SImode, tmp_regno);
16419 xops[0] = gen_rtx_MEM (Pmode, this_reg);
16420 xops[1] = tmp;
16421 if (TARGET_64BIT)
16422 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16423 else
16424 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16426 /* Adjust the this parameter. */
16427 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
16428 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
16430 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
16431 xops[0] = GEN_INT (vcall_offset);
16432 xops[1] = tmp2;
16433 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16434 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
16436 xops[1] = this_reg;
16437 if (TARGET_64BIT)
16438 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16439 else
16440 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16443 /* If necessary, drop THIS back to its stack slot. */
16444 if (this_reg && this_reg != this)
16446 xops[0] = this_reg;
16447 xops[1] = this;
16448 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16451 xops[0] = XEXP (DECL_RTL (function), 0);
16452 if (TARGET_64BIT)
16454 if (!flag_pic || (*targetm.binds_local_p) (function))
16455 output_asm_insn ("jmp\t%P0", xops);
16456 else
16458 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
16459 tmp = gen_rtx_CONST (Pmode, tmp);
16460 tmp = gen_rtx_MEM (QImode, tmp);
16461 xops[0] = tmp;
16462 output_asm_insn ("jmp\t%A0", xops);
16465 else
16467 if (!flag_pic || (*targetm.binds_local_p) (function))
16468 output_asm_insn ("jmp\t%P0", xops);
16469 else
16470 #if TARGET_MACHO
16471 if (TARGET_MACHO)
16473 rtx sym_ref = XEXP (DECL_RTL (function), 0);
16474 tmp = (gen_rtx_SYMBOL_REF
16475 (Pmode,
16476 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
16477 tmp = gen_rtx_MEM (QImode, tmp);
16478 xops[0] = tmp;
16479 output_asm_insn ("jmp\t%0", xops);
16481 else
16482 #endif /* TARGET_MACHO */
16484 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
16485 output_set_got (tmp);
16487 xops[1] = tmp;
16488 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
16489 output_asm_insn ("jmp\t{*}%1", xops);
16494 static void
16495 x86_file_start (void)
16497 default_file_start ();
16498 if (X86_FILE_START_VERSION_DIRECTIVE)
16499 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16500 if (X86_FILE_START_FLTUSED)
16501 fputs ("\t.global\t__fltused\n", asm_out_file);
16502 if (ix86_asm_dialect == ASM_INTEL)
16503 fputs ("\t.intel_syntax\n", asm_out_file);
16507 x86_field_alignment (tree field, int computed)
16509 enum machine_mode mode;
16510 tree type = TREE_TYPE (field);
16512 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16513 return computed;
16514 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16515 ? get_inner_array_type (type) : type);
16516 if (mode == DFmode || mode == DCmode
16517 || GET_MODE_CLASS (mode) == MODE_INT
16518 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16519 return MIN (32, computed);
16520 return computed;
16523 /* Output assembler code to FILE to increment profiler label # LABELNO
16524 for profiling a function entry. */
16525 void
16526 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16528 if (TARGET_64BIT)
16529 if (flag_pic)
16531 #ifndef NO_PROFILE_COUNTERS
16532 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16533 #endif
16534 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
16536 else
16538 #ifndef NO_PROFILE_COUNTERS
16539 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
16540 #endif
16541 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16543 else if (flag_pic)
16545 #ifndef NO_PROFILE_COUNTERS
16546 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16547 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
16548 #endif
16549 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
16551 else
16553 #ifndef NO_PROFILE_COUNTERS
16554 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16555 PROFILE_COUNT_REGISTER);
16556 #endif
16557 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16561 /* We don't have exact information about the insn sizes, but we may assume
16562 quite safely that we are informed about all 1 byte insns and memory
16563 address sizes. This is enough to eliminate unnecessary padding in
16564 99% of cases. */
16566 static int
16567 min_insn_size (rtx insn)
16569 int l = 0;
16571 if (!INSN_P (insn) || !active_insn_p (insn))
16572 return 0;
16574 /* Discard alignments we've emit and jump instructions. */
16575 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16576 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16577 return 0;
16578 if (GET_CODE (insn) == JUMP_INSN
16579 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16580 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16581 return 0;
16583 /* Important case - calls are always 5 bytes.
16584 It is common to have many calls in the row. */
16585 if (GET_CODE (insn) == CALL_INSN
16586 && symbolic_reference_mentioned_p (PATTERN (insn))
16587 && !SIBLING_CALL_P (insn))
16588 return 5;
16589 if (get_attr_length (insn) <= 1)
16590 return 1;
16592 /* For normal instructions we may rely on the sizes of addresses
16593 and the presence of symbol to require 4 bytes of encoding.
16594 This is not the case for jumps where references are PC relative. */
16595 if (GET_CODE (insn) != JUMP_INSN)
16597 l = get_attr_length_address (insn);
16598 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16599 l = 4;
16601 if (l)
16602 return 1+l;
16603 else
16604 return 2;
16607 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16608 window. */
16610 static void
16611 ix86_avoid_jump_misspredicts (void)
16613 rtx insn, start = get_insns ();
16614 int nbytes = 0, njumps = 0;
16615 int isjump = 0;
16617 /* Look for all minimal intervals of instructions containing 4 jumps.
16618 The intervals are bounded by START and INSN. NBYTES is the total
16619 size of instructions in the interval including INSN and not including
16620 START. When the NBYTES is smaller than 16 bytes, it is possible
16621 that the end of START and INSN ends up in the same 16byte page.
16623 The smallest offset in the page INSN can start is the case where START
16624 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16625 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16627 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16630 nbytes += min_insn_size (insn);
16631 if (dump_file)
16632 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16633 INSN_UID (insn), min_insn_size (insn));
16634 if ((GET_CODE (insn) == JUMP_INSN
16635 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16636 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16637 || GET_CODE (insn) == CALL_INSN)
16638 njumps++;
16639 else
16640 continue;
16642 while (njumps > 3)
16644 start = NEXT_INSN (start);
16645 if ((GET_CODE (start) == JUMP_INSN
16646 && GET_CODE (PATTERN (start)) != ADDR_VEC
16647 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16648 || GET_CODE (start) == CALL_INSN)
16649 njumps--, isjump = 1;
16650 else
16651 isjump = 0;
16652 nbytes -= min_insn_size (start);
16654 gcc_assert (njumps >= 0);
16655 if (dump_file)
16656 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16657 INSN_UID (start), INSN_UID (insn), nbytes);
16659 if (njumps == 3 && isjump && nbytes < 16)
16661 int padsize = 15 - nbytes + min_insn_size (insn);
16663 if (dump_file)
16664 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16665 INSN_UID (insn), padsize);
16666 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16671 /* AMD Athlon works faster
16672 when RET is not destination of conditional jump or directly preceded
16673 by other jump instruction. We avoid the penalty by inserting NOP just
16674 before the RET instructions in such cases. */
16675 static void
16676 ix86_pad_returns (void)
16678 edge e;
16679 edge_iterator ei;
16681 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16683 basic_block bb = e->src;
16684 rtx ret = BB_END (bb);
16685 rtx prev;
16686 bool replace = false;
16688 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16689 || !maybe_hot_bb_p (bb))
16690 continue;
16691 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16692 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16693 break;
16694 if (prev && GET_CODE (prev) == CODE_LABEL)
16696 edge e;
16697 edge_iterator ei;
16699 FOR_EACH_EDGE (e, ei, bb->preds)
16700 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16701 && !(e->flags & EDGE_FALLTHRU))
16702 replace = true;
16704 if (!replace)
16706 prev = prev_active_insn (ret);
16707 if (prev
16708 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16709 || GET_CODE (prev) == CALL_INSN))
16710 replace = true;
16711 /* Empty functions get branch mispredict even when the jump destination
16712 is not visible to us. */
16713 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16714 replace = true;
16716 if (replace)
16718 emit_insn_before (gen_return_internal_long (), ret);
16719 delete_insn (ret);
16724 /* Implement machine specific optimizations. We implement padding of returns
16725 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16726 static void
16727 ix86_reorg (void)
16729 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16730 ix86_pad_returns ();
16731 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16732 ix86_avoid_jump_misspredicts ();
16735 /* Return nonzero when QImode register that must be represented via REX prefix
16736 is used. */
16737 bool
16738 x86_extended_QIreg_mentioned_p (rtx insn)
16740 int i;
16741 extract_insn_cached (insn);
16742 for (i = 0; i < recog_data.n_operands; i++)
16743 if (REG_P (recog_data.operand[i])
16744 && REGNO (recog_data.operand[i]) >= 4)
16745 return true;
16746 return false;
16749 /* Return nonzero when P points to register encoded via REX prefix.
16750 Called via for_each_rtx. */
16751 static int
16752 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16754 unsigned int regno;
16755 if (!REG_P (*p))
16756 return 0;
16757 regno = REGNO (*p);
16758 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16761 /* Return true when INSN mentions register that must be encoded using REX
16762 prefix. */
16763 bool
16764 x86_extended_reg_mentioned_p (rtx insn)
16766 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16769 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16770 optabs would emit if we didn't have TFmode patterns. */
16772 void
16773 x86_emit_floatuns (rtx operands[2])
16775 rtx neglab, donelab, i0, i1, f0, in, out;
16776 enum machine_mode mode, inmode;
16778 inmode = GET_MODE (operands[1]);
16779 gcc_assert (inmode == SImode || inmode == DImode);
16781 out = operands[0];
16782 in = force_reg (inmode, operands[1]);
16783 mode = GET_MODE (out);
16784 neglab = gen_label_rtx ();
16785 donelab = gen_label_rtx ();
16786 i1 = gen_reg_rtx (Pmode);
16787 f0 = gen_reg_rtx (mode);
16789 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16791 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16792 emit_jump_insn (gen_jump (donelab));
16793 emit_barrier ();
16795 emit_label (neglab);
16797 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16798 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16799 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16800 expand_float (f0, i0, 0);
16801 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16803 emit_label (donelab);
16806 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16807 with all elements equal to VAR. Return true if successful. */
16809 static bool
16810 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16811 rtx target, rtx val)
16813 enum machine_mode smode, wsmode, wvmode;
16814 rtx x;
16816 switch (mode)
16818 case V2SImode:
16819 case V2SFmode:
16820 if (!mmx_ok && !TARGET_SSE)
16821 return false;
16822 /* FALLTHRU */
16824 case V2DFmode:
16825 case V2DImode:
16826 case V4SFmode:
16827 case V4SImode:
16828 val = force_reg (GET_MODE_INNER (mode), val);
16829 x = gen_rtx_VEC_DUPLICATE (mode, val);
16830 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16831 return true;
16833 case V4HImode:
16834 if (!mmx_ok)
16835 return false;
16836 if (TARGET_SSE || TARGET_3DNOW_A)
16838 val = gen_lowpart (SImode, val);
16839 x = gen_rtx_TRUNCATE (HImode, val);
16840 x = gen_rtx_VEC_DUPLICATE (mode, x);
16841 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16842 return true;
16844 else
16846 smode = HImode;
16847 wsmode = SImode;
16848 wvmode = V2SImode;
16849 goto widen;
16852 case V8QImode:
16853 if (!mmx_ok)
16854 return false;
16855 smode = QImode;
16856 wsmode = HImode;
16857 wvmode = V4HImode;
16858 goto widen;
16859 case V8HImode:
16860 smode = HImode;
16861 wsmode = SImode;
16862 wvmode = V4SImode;
16863 goto widen;
16864 case V16QImode:
16865 smode = QImode;
16866 wsmode = HImode;
16867 wvmode = V8HImode;
16868 goto widen;
16869 widen:
16870 /* Replicate the value once into the next wider mode and recurse. */
16871 val = convert_modes (wsmode, smode, val, true);
16872 x = expand_simple_binop (wsmode, ASHIFT, val,
16873 GEN_INT (GET_MODE_BITSIZE (smode)),
16874 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16875 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16877 x = gen_reg_rtx (wvmode);
16878 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16879 gcc_unreachable ();
16880 emit_move_insn (target, gen_lowpart (mode, x));
16881 return true;
16883 default:
16884 return false;
16888 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16889 whose low element is VAR, and other elements are zero. Return true
16890 if successful. */
16892 static bool
16893 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16894 rtx target, rtx var)
16896 enum machine_mode vsimode;
16897 rtx x;
16899 switch (mode)
16901 case V2SFmode:
16902 case V2SImode:
16903 if (!mmx_ok && !TARGET_SSE)
16904 return false;
16905 /* FALLTHRU */
16907 case V2DFmode:
16908 case V2DImode:
16909 var = force_reg (GET_MODE_INNER (mode), var);
16910 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16911 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16912 return true;
16914 case V4SFmode:
16915 case V4SImode:
16916 var = force_reg (GET_MODE_INNER (mode), var);
16917 x = gen_rtx_VEC_DUPLICATE (mode, var);
16918 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16919 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16920 return true;
16922 case V8HImode:
16923 case V16QImode:
16924 vsimode = V4SImode;
16925 goto widen;
16926 case V4HImode:
16927 case V8QImode:
16928 if (!mmx_ok)
16929 return false;
16930 vsimode = V2SImode;
16931 goto widen;
16932 widen:
16933 /* Zero extend the variable element to SImode and recurse. */
16934 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16936 x = gen_reg_rtx (vsimode);
16937 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16938 gcc_unreachable ();
16940 emit_move_insn (target, gen_lowpart (mode, x));
16941 return true;
16943 default:
16944 return false;
16948 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16949 consisting of the values in VALS. It is known that all elements
16950 except ONE_VAR are constants. Return true if successful. */
16952 static bool
16953 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16954 rtx target, rtx vals, int one_var)
16956 rtx var = XVECEXP (vals, 0, one_var);
16957 enum machine_mode wmode;
16958 rtx const_vec, x;
16960 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16961 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16963 switch (mode)
16965 case V2DFmode:
16966 case V2DImode:
16967 case V2SFmode:
16968 case V2SImode:
16969 /* For the two element vectors, it's just as easy to use
16970 the general case. */
16971 return false;
16973 case V4SFmode:
16974 case V4SImode:
16975 case V8HImode:
16976 case V4HImode:
16977 break;
16979 case V16QImode:
16980 wmode = V8HImode;
16981 goto widen;
16982 case V8QImode:
16983 wmode = V4HImode;
16984 goto widen;
16985 widen:
16986 /* There's no way to set one QImode entry easily. Combine
16987 the variable value with its adjacent constant value, and
16988 promote to an HImode set. */
16989 x = XVECEXP (vals, 0, one_var ^ 1);
16990 if (one_var & 1)
16992 var = convert_modes (HImode, QImode, var, true);
16993 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16994 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16995 x = GEN_INT (INTVAL (x) & 0xff);
16997 else
16999 var = convert_modes (HImode, QImode, var, true);
17000 x = gen_int_mode (INTVAL (x) << 8, HImode);
17002 if (x != const0_rtx)
17003 var = expand_simple_binop (HImode, IOR, var, x, var,
17004 1, OPTAB_LIB_WIDEN);
17006 x = gen_reg_rtx (wmode);
17007 emit_move_insn (x, gen_lowpart (wmode, const_vec));
17008 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17010 emit_move_insn (target, gen_lowpart (mode, x));
17011 return true;
17013 default:
17014 return false;
17017 emit_move_insn (target, const_vec);
17018 ix86_expand_vector_set (mmx_ok, target, var, one_var);
17019 return true;
17022 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17023 all values variable, and none identical. */
17025 static void
17026 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17027 rtx target, rtx vals)
17029 enum machine_mode half_mode = GET_MODE_INNER (mode);
17030 rtx op0 = NULL, op1 = NULL;
17031 bool use_vec_concat = false;
17033 switch (mode)
17035 case V2SFmode:
17036 case V2SImode:
17037 if (!mmx_ok && !TARGET_SSE)
17038 break;
17039 /* FALLTHRU */
17041 case V2DFmode:
17042 case V2DImode:
17043 /* For the two element vectors, we always implement VEC_CONCAT. */
17044 op0 = XVECEXP (vals, 0, 0);
17045 op1 = XVECEXP (vals, 0, 1);
17046 use_vec_concat = true;
17047 break;
17049 case V4SFmode:
17050 half_mode = V2SFmode;
17051 goto half;
17052 case V4SImode:
17053 half_mode = V2SImode;
17054 goto half;
17055 half:
17057 rtvec v;
17059 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
17060 Recurse to load the two halves. */
17062 op0 = gen_reg_rtx (half_mode);
17063 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
17064 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
17066 op1 = gen_reg_rtx (half_mode);
17067 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
17068 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
17070 use_vec_concat = true;
17072 break;
17074 case V8HImode:
17075 case V16QImode:
17076 case V4HImode:
17077 case V8QImode:
17078 break;
17080 default:
17081 gcc_unreachable ();
17084 if (use_vec_concat)
17086 if (!register_operand (op0, half_mode))
17087 op0 = force_reg (half_mode, op0);
17088 if (!register_operand (op1, half_mode))
17089 op1 = force_reg (half_mode, op1);
17091 emit_insn (gen_rtx_SET (VOIDmode, target,
17092 gen_rtx_VEC_CONCAT (mode, op0, op1)));
17094 else
17096 int i, j, n_elts, n_words, n_elt_per_word;
17097 enum machine_mode inner_mode;
17098 rtx words[4], shift;
17100 inner_mode = GET_MODE_INNER (mode);
17101 n_elts = GET_MODE_NUNITS (mode);
17102 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
17103 n_elt_per_word = n_elts / n_words;
17104 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
17106 for (i = 0; i < n_words; ++i)
17108 rtx word = NULL_RTX;
17110 for (j = 0; j < n_elt_per_word; ++j)
17112 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
17113 elt = convert_modes (word_mode, inner_mode, elt, true);
17115 if (j == 0)
17116 word = elt;
17117 else
17119 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
17120 word, 1, OPTAB_LIB_WIDEN);
17121 word = expand_simple_binop (word_mode, IOR, word, elt,
17122 word, 1, OPTAB_LIB_WIDEN);
17126 words[i] = word;
17129 if (n_words == 1)
17130 emit_move_insn (target, gen_lowpart (mode, words[0]));
17131 else if (n_words == 2)
17133 rtx tmp = gen_reg_rtx (mode);
17134 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
17135 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
17136 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
17137 emit_move_insn (target, tmp);
17139 else if (n_words == 4)
17141 rtx tmp = gen_reg_rtx (V4SImode);
17142 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
17143 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
17144 emit_move_insn (target, gen_lowpart (mode, tmp));
17146 else
17147 gcc_unreachable ();
17151 /* Initialize vector TARGET via VALS. Suppress the use of MMX
17152 instructions unless MMX_OK is true. */
17154 void
17155 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
17157 enum machine_mode mode = GET_MODE (target);
17158 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17159 int n_elts = GET_MODE_NUNITS (mode);
17160 int n_var = 0, one_var = -1;
17161 bool all_same = true, all_const_zero = true;
17162 int i;
17163 rtx x;
17165 for (i = 0; i < n_elts; ++i)
17167 x = XVECEXP (vals, 0, i);
17168 if (!CONSTANT_P (x))
17169 n_var++, one_var = i;
17170 else if (x != CONST0_RTX (inner_mode))
17171 all_const_zero = false;
17172 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
17173 all_same = false;
17176 /* Constants are best loaded from the constant pool. */
17177 if (n_var == 0)
17179 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
17180 return;
17183 /* If all values are identical, broadcast the value. */
17184 if (all_same
17185 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
17186 XVECEXP (vals, 0, 0)))
17187 return;
17189 /* Values where only one field is non-constant are best loaded from
17190 the pool and overwritten via move later. */
17191 if (n_var == 1)
17193 if (all_const_zero && one_var == 0
17194 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
17195 XVECEXP (vals, 0, 0)))
17196 return;
17198 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
17199 return;
17202 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17205 void
17206 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
17208 enum machine_mode mode = GET_MODE (target);
17209 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17210 bool use_vec_merge = false;
17211 rtx tmp;
17213 switch (mode)
17215 case V2SFmode:
17216 case V2SImode:
17217 if (mmx_ok)
17219 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
17220 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
17221 if (elt == 0)
17222 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
17223 else
17224 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
17225 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17226 return;
17228 break;
17230 case V2DFmode:
17231 case V2DImode:
17233 rtx op0, op1;
17235 /* For the two element vectors, we implement a VEC_CONCAT with
17236 the extraction of the other element. */
17238 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17239 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17241 if (elt == 0)
17242 op0 = val, op1 = tmp;
17243 else
17244 op0 = tmp, op1 = val;
17246 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17247 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17249 return;
17251 case V4SFmode:
17252 switch (elt)
17254 case 0:
17255 use_vec_merge = true;
17256 break;
17258 case 1:
17259 /* tmp = target = A B C D */
17260 tmp = copy_to_reg (target);
17261 /* target = A A B B */
17262 emit_insn (gen_sse_unpcklps (target, target, target));
17263 /* target = X A B B */
17264 ix86_expand_vector_set (false, target, val, 0);
17265 /* target = A X C D */
17266 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17267 GEN_INT (1), GEN_INT (0),
17268 GEN_INT (2+4), GEN_INT (3+4)));
17269 return;
17271 case 2:
17272 /* tmp = target = A B C D */
17273 tmp = copy_to_reg (target);
17274 /* tmp = X B C D */
17275 ix86_expand_vector_set (false, tmp, val, 0);
17276 /* target = A B X D */
17277 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17278 GEN_INT (0), GEN_INT (1),
17279 GEN_INT (0+4), GEN_INT (3+4)));
17280 return;
17282 case 3:
17283 /* tmp = target = A B C D */
17284 tmp = copy_to_reg (target);
17285 /* tmp = X B C D */
17286 ix86_expand_vector_set (false, tmp, val, 0);
17287 /* target = A B X D */
17288 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17289 GEN_INT (0), GEN_INT (1),
17290 GEN_INT (2+4), GEN_INT (0+4)));
17291 return;
17293 default:
17294 gcc_unreachable ();
17296 break;
17298 case V4SImode:
17299 /* Element 0 handled by vec_merge below. */
17300 if (elt == 0)
17302 use_vec_merge = true;
17303 break;
17306 if (TARGET_SSE2)
17308 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17309 store into element 0, then shuffle them back. */
17311 rtx order[4];
17313 order[0] = GEN_INT (elt);
17314 order[1] = const1_rtx;
17315 order[2] = const2_rtx;
17316 order[3] = GEN_INT (3);
17317 order[elt] = const0_rtx;
17319 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17320 order[1], order[2], order[3]));
17322 ix86_expand_vector_set (false, target, val, 0);
17324 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17325 order[1], order[2], order[3]));
17327 else
17329 /* For SSE1, we have to reuse the V4SF code. */
17330 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17331 gen_lowpart (SFmode, val), elt);
17333 return;
17335 case V8HImode:
17336 use_vec_merge = TARGET_SSE2;
17337 break;
17338 case V4HImode:
17339 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17340 break;
17342 case V16QImode:
17343 case V8QImode:
17344 default:
17345 break;
17348 if (use_vec_merge)
17350 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17351 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17352 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17354 else
17356 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17358 emit_move_insn (mem, target);
17360 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17361 emit_move_insn (tmp, val);
17363 emit_move_insn (target, mem);
17367 void
17368 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17370 enum machine_mode mode = GET_MODE (vec);
17371 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17372 bool use_vec_extr = false;
17373 rtx tmp;
17375 switch (mode)
17377 case V2SImode:
17378 case V2SFmode:
17379 if (!mmx_ok)
17380 break;
17381 /* FALLTHRU */
17383 case V2DFmode:
17384 case V2DImode:
17385 use_vec_extr = true;
17386 break;
17388 case V4SFmode:
17389 switch (elt)
17391 case 0:
17392 tmp = vec;
17393 break;
17395 case 1:
17396 case 3:
17397 tmp = gen_reg_rtx (mode);
17398 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17399 GEN_INT (elt), GEN_INT (elt),
17400 GEN_INT (elt+4), GEN_INT (elt+4)));
17401 break;
17403 case 2:
17404 tmp = gen_reg_rtx (mode);
17405 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
17406 break;
17408 default:
17409 gcc_unreachable ();
17411 vec = tmp;
17412 use_vec_extr = true;
17413 elt = 0;
17414 break;
17416 case V4SImode:
17417 if (TARGET_SSE2)
17419 switch (elt)
17421 case 0:
17422 tmp = vec;
17423 break;
17425 case 1:
17426 case 3:
17427 tmp = gen_reg_rtx (mode);
17428 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
17429 GEN_INT (elt), GEN_INT (elt),
17430 GEN_INT (elt), GEN_INT (elt)));
17431 break;
17433 case 2:
17434 tmp = gen_reg_rtx (mode);
17435 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
17436 break;
17438 default:
17439 gcc_unreachable ();
17441 vec = tmp;
17442 use_vec_extr = true;
17443 elt = 0;
17445 else
17447 /* For SSE1, we have to reuse the V4SF code. */
17448 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
17449 gen_lowpart (V4SFmode, vec), elt);
17450 return;
17452 break;
17454 case V8HImode:
17455 use_vec_extr = TARGET_SSE2;
17456 break;
17457 case V4HImode:
17458 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17459 break;
17461 case V16QImode:
17462 case V8QImode:
17463 /* ??? Could extract the appropriate HImode element and shift. */
17464 default:
17465 break;
17468 if (use_vec_extr)
17470 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
17471 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
17473 /* Let the rtl optimizers know about the zero extension performed. */
17474 if (inner_mode == HImode)
17476 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
17477 target = gen_lowpart (SImode, target);
17480 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17482 else
17484 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17486 emit_move_insn (mem, vec);
17488 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17489 emit_move_insn (target, tmp);
17493 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
17494 pattern to reduce; DEST is the destination; IN is the input vector. */
17496 void
17497 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
17499 rtx tmp1, tmp2, tmp3;
17501 tmp1 = gen_reg_rtx (V4SFmode);
17502 tmp2 = gen_reg_rtx (V4SFmode);
17503 tmp3 = gen_reg_rtx (V4SFmode);
17505 emit_insn (gen_sse_movhlps (tmp1, in, in));
17506 emit_insn (fn (tmp2, tmp1, in));
17508 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
17509 GEN_INT (1), GEN_INT (1),
17510 GEN_INT (1+4), GEN_INT (1+4)));
17511 emit_insn (fn (dest, tmp2, tmp3));
17514 /* Implements target hook vector_mode_supported_p. */
17515 static bool
17516 ix86_vector_mode_supported_p (enum machine_mode mode)
17518 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17519 return true;
17520 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17521 return true;
17522 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17523 return true;
17524 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17525 return true;
17526 return false;
17529 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17531 We do this in the new i386 backend to maintain source compatibility
17532 with the old cc0-based compiler. */
17534 static tree
17535 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
17536 tree inputs ATTRIBUTE_UNUSED,
17537 tree clobbers)
17539 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
17540 clobbers);
17541 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
17542 clobbers);
17543 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
17544 clobbers);
17545 return clobbers;
17548 /* Worker function for REVERSE_CONDITION. */
17550 enum rtx_code
17551 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
17553 return (mode != CCFPmode && mode != CCFPUmode
17554 ? reverse_condition (code)
17555 : reverse_condition_maybe_unordered (code));
17558 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17559 to OPERANDS[0]. */
17561 const char *
17562 output_387_reg_move (rtx insn, rtx *operands)
17564 if (REG_P (operands[1])
17565 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17567 if (REGNO (operands[0]) == FIRST_STACK_REG
17568 && TARGET_USE_FFREEP)
17569 return "ffreep\t%y0";
17570 return "fstp\t%y0";
17572 if (STACK_TOP_P (operands[0]))
17573 return "fld%z1\t%y1";
17574 return "fst\t%y0";
17577 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17578 FP status register is set. */
17580 void
17581 ix86_emit_fp_unordered_jump (rtx label)
17583 rtx reg = gen_reg_rtx (HImode);
17584 rtx temp;
17586 emit_insn (gen_x86_fnstsw_1 (reg));
17588 if (TARGET_USE_SAHF)
17590 emit_insn (gen_x86_sahf_1 (reg));
17592 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17593 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17595 else
17597 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17599 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17600 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17603 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17604 gen_rtx_LABEL_REF (VOIDmode, label),
17605 pc_rtx);
17606 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17607 emit_jump_insn (temp);
17610 /* Output code to perform a log1p XFmode calculation. */
17612 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17614 rtx label1 = gen_label_rtx ();
17615 rtx label2 = gen_label_rtx ();
17617 rtx tmp = gen_reg_rtx (XFmode);
17618 rtx tmp2 = gen_reg_rtx (XFmode);
17620 emit_insn (gen_absxf2 (tmp, op1));
17621 emit_insn (gen_cmpxf (tmp,
17622 CONST_DOUBLE_FROM_REAL_VALUE (
17623 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17624 XFmode)));
17625 emit_jump_insn (gen_bge (label1));
17627 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17628 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17629 emit_jump (label2);
17631 emit_label (label1);
17632 emit_move_insn (tmp, CONST1_RTX (XFmode));
17633 emit_insn (gen_addxf3 (tmp, op1, tmp));
17634 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17635 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17637 emit_label (label2);
17640 /* Solaris named-section hook. Parameters are as for
17641 named_section_real. */
17643 static void
17644 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17645 tree decl)
17647 /* With Binutils 2.15, the "@unwind" marker must be specified on
17648 every occurrence of the ".eh_frame" section, not just the first
17649 one. */
17650 if (TARGET_64BIT
17651 && strcmp (name, ".eh_frame") == 0)
17653 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17654 flags & SECTION_WRITE ? "aw" : "a");
17655 return;
17657 default_elf_asm_named_section (name, flags, decl);
17660 /* Return the mangling of TYPE if it is an extended fundamental type. */
17662 static const char *
17663 ix86_mangle_fundamental_type (tree type)
17665 switch (TYPE_MODE (type))
17667 case TFmode:
17668 /* __float128 is "g". */
17669 return "g";
17670 case XFmode:
17671 /* "long double" or __float80 is "e". */
17672 return "e";
17673 default:
17674 return NULL;
17678 /* For 32-bit code we can save PIC register setup by using
17679 __stack_chk_fail_local hidden function instead of calling
17680 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
17681 register, so it is better to call __stack_chk_fail directly. */
17683 static tree
17684 ix86_stack_protect_fail (void)
17686 return TARGET_64BIT
17687 ? default_external_stack_protect_fail ()
17688 : default_hidden_stack_protect_fail ();
17691 #include "gt-i386.h"