* common.opt (-Wattributes): New. Default true.
[official-gcc.git] / gcc / config / i386 / i386.c
blob43fcd2ef4e4175150d1f2013c9634b500292e6be
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_loop = m_K6;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
570 undefined. */
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
587 /* Compare and exchange was added for 80486. */
588 const int x86_cmpxchg = ~m_386;
589 /* Exchange and add was added for 80486. */
590 const int x86_xadd = ~m_386;
592 /* In case the average insn count for single function invocation is
593 lower than this constant, emit fast (but longer) prologue and
594 epilogue code. */
595 #define FAST_PROLOGUE_INSN_COUNT 20
597 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
598 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
602 /* Array of the smallest class containing reg number REGNO, indexed by
603 REGNO. Used by REGNO_REG_CLASS in i386.h. */
605 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
607 /* ax, dx, cx, bx */
608 AREG, DREG, CREG, BREG,
609 /* si, di, bp, sp */
610 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
611 /* FP registers */
612 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
614 /* arg pointer */
615 NON_Q_REGS,
616 /* flags, fpsr, dirflag, frame */
617 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
619 SSE_REGS, SSE_REGS,
620 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
621 MMX_REGS, MMX_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
625 SSE_REGS, SSE_REGS,
628 /* The "default" register map used in 32bit mode. */
630 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
632 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
633 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
634 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
635 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
636 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
638 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
641 static int const x86_64_int_parameter_registers[6] =
643 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
647 static int const x86_64_int_return_registers[4] =
649 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
652 /* The "default" register map used in 64bit mode. */
653 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
655 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
656 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
657 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
658 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
659 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
660 8,9,10,11,12,13,14,15, /* extended integer registers */
661 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
664 /* Define the register numbers to be used in Dwarf debugging information.
665 The SVR4 reference port C compiler uses the following register numbers
666 in its Dwarf output code:
667 0 for %eax (gcc regno = 0)
668 1 for %ecx (gcc regno = 2)
669 2 for %edx (gcc regno = 1)
670 3 for %ebx (gcc regno = 3)
671 4 for %esp (gcc regno = 7)
672 5 for %ebp (gcc regno = 6)
673 6 for %esi (gcc regno = 4)
674 7 for %edi (gcc regno = 5)
675 The following three DWARF register numbers are never generated by
676 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677 believes these numbers have these meanings.
678 8 for %eip (no gcc equivalent)
679 9 for %eflags (gcc regno = 17)
680 10 for %trapno (no gcc equivalent)
681 It is not at all clear how we should number the FP stack registers
682 for the x86 architecture. If the version of SDB on x86/svr4 were
683 a bit less brain dead with respect to floating-point then we would
684 have a precedent to follow with respect to DWARF register numbers
685 for x86 FP registers, but the SDB on x86/svr4 is so completely
686 broken with respect to FP registers that it is hardly worth thinking
687 of it as something to strive for compatibility with.
688 The version of x86/svr4 SDB I have at the moment does (partially)
689 seem to believe that DWARF register number 11 is associated with
690 the x86 register %st(0), but that's about all. Higher DWARF
691 register numbers don't seem to be associated with anything in
692 particular, and even for DWARF regno 11, SDB only seems to under-
693 stand that it should say that a variable lives in %st(0) (when
694 asked via an `=' command) if we said it was in DWARF regno 11,
695 but SDB still prints garbage when asked for the value of the
696 variable in question (via a `/' command).
697 (Also note that the labels SDB prints for various FP stack regs
698 when doing an `x' command are all wrong.)
699 Note that these problems generally don't affect the native SVR4
700 C compiler because it doesn't allow the use of -O with -g and
701 because when it is *not* optimizing, it allocates a memory
702 location for each floating-point variable, and the memory
703 location is what gets described in the DWARF AT_location
704 attribute for the variable in question.
705 Regardless of the severe mental illness of the x86/svr4 SDB, we
706 do something sensible here and we use the following DWARF
707 register numbers. Note that these are all stack-top-relative
708 numbers.
709 11 for %st(0) (gcc regno = 8)
710 12 for %st(1) (gcc regno = 9)
711 13 for %st(2) (gcc regno = 10)
712 14 for %st(3) (gcc regno = 11)
713 15 for %st(4) (gcc regno = 12)
714 16 for %st(5) (gcc regno = 13)
715 17 for %st(6) (gcc regno = 14)
716 18 for %st(7) (gcc regno = 15)
718 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
720 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
721 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
722 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
723 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
724 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
726 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
729 /* Test and compare insns in i386.md store the information needed to
730 generate branch and scc insns here. */
732 rtx ix86_compare_op0 = NULL_RTX;
733 rtx ix86_compare_op1 = NULL_RTX;
734 rtx ix86_compare_emitted = NULL_RTX;
736 #define MAX_386_STACK_LOCALS 3
737 /* Size of the register save area. */
738 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
740 /* Define the structure for the machine field in struct function. */
742 struct stack_local_entry GTY(())
744 unsigned short mode;
745 unsigned short n;
746 rtx rtl;
747 struct stack_local_entry *next;
750 /* Structure describing stack frame layout.
751 Stack grows downward:
753 [arguments]
754 <- ARG_POINTER
755 saved pc
757 saved frame pointer if frame_pointer_needed
758 <- HARD_FRAME_POINTER
759 [saved regs]
761 [padding1] \
763 [va_arg registers] (
764 > to_allocate <- FRAME_POINTER
765 [frame] (
767 [padding2] /
769 struct ix86_frame
771 int nregs;
772 int padding1;
773 int va_arg_size;
774 HOST_WIDE_INT frame;
775 int padding2;
776 int outgoing_arguments_size;
777 int red_zone_size;
779 HOST_WIDE_INT to_allocate;
780 /* The offsets relative to ARG_POINTER. */
781 HOST_WIDE_INT frame_pointer_offset;
782 HOST_WIDE_INT hard_frame_pointer_offset;
783 HOST_WIDE_INT stack_pointer_offset;
785 /* When save_regs_using_mov is set, emit prologue using
786 move instead of push instructions. */
787 bool save_regs_using_mov;
790 /* Code model option as passed by user. */
791 static const char *ix86_cmodel_string;
792 /* Parsed value. */
793 enum cmodel ix86_cmodel;
794 /* Asm dialect. */
795 static const char *ix86_asm_string;
796 enum asm_dialect ix86_asm_dialect = ASM_ATT;
797 /* TLS dialext. */
798 static const char *ix86_tls_dialect_string;
799 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
801 /* Which unit we are generating floating point math for. */
802 enum fpmath_unit ix86_fpmath;
804 /* Which cpu are we scheduling for. */
805 enum processor_type ix86_tune;
806 /* Which instruction set architecture to use. */
807 enum processor_type ix86_arch;
809 /* Strings to hold which cpu and instruction set architecture to use. */
810 const char *ix86_tune_string; /* for -mtune=<xxx> */
811 const char *ix86_arch_string; /* for -march=<xxx> */
812 static const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
814 /* # of registers to use to pass arguments. */
815 static const char *ix86_regparm_string;
817 /* true if sse prefetch instruction is not NOOP. */
818 int x86_prefetch_sse;
820 /* ix86_regparm_string as a number */
821 static int ix86_regparm;
823 /* Alignment to use for loops and jumps: */
825 /* Power of two alignment for loops. */
826 static const char *ix86_align_loops_string;
828 /* Power of two alignment for non-loop jumps. */
829 static const char *ix86_align_jumps_string;
831 /* Power of two alignment for stack boundary in bytes. */
832 static const char *ix86_preferred_stack_boundary_string;
834 /* Preferred alignment for stack boundary in bits. */
835 unsigned int ix86_preferred_stack_boundary;
837 /* Values 1-5: see jump.c */
838 int ix86_branch_cost;
839 static const char *ix86_branch_cost_string;
841 /* Power of two alignment for functions. */
842 static const char *ix86_align_funcs_string;
844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
845 char internal_label_prefix[16];
846 int internal_label_prefix_len;
848 static bool ix86_handle_option (size_t, const char *, int);
849 static void output_pic_addr_const (FILE *, rtx, int);
850 static void put_condition_code (enum rtx_code, enum machine_mode,
851 int, int, FILE *);
852 static const char *get_some_local_dynamic_name (void);
853 static int get_some_local_dynamic_name_1 (rtx *, void *);
854 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
855 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
856 rtx *);
857 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
858 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
859 enum machine_mode);
860 static rtx get_thread_pointer (int);
861 static rtx legitimize_tls_address (rtx, enum tls_model, int);
862 static void get_pc_thunk_name (char [32], unsigned int);
863 static rtx gen_push (rtx);
864 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
865 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
866 static struct machine_function * ix86_init_machine_status (void);
867 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
868 static int ix86_nsaved_regs (void);
869 static void ix86_emit_save_regs (void);
870 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
871 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
872 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
873 static HOST_WIDE_INT ix86_GOT_alias_set (void);
874 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
875 static rtx ix86_expand_aligntest (rtx, int);
876 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
877 static int ix86_issue_rate (void);
878 static int ix86_adjust_cost (rtx, rtx, rtx, int);
879 static int ia32_multipass_dfa_lookahead (void);
880 static void ix86_init_mmx_sse_builtins (void);
881 static rtx x86_this_parameter (tree);
882 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
883 HOST_WIDE_INT, tree);
884 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
885 static void x86_file_start (void);
886 static void ix86_reorg (void);
887 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
888 static tree ix86_build_builtin_va_list (void);
889 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
890 tree, int *, int);
891 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
892 static bool ix86_vector_mode_supported_p (enum machine_mode);
894 static int ix86_address_cost (rtx);
895 static bool ix86_cannot_force_const_mem (rtx);
896 static rtx ix86_delegitimize_address (rtx);
898 struct builtin_description;
899 static rtx ix86_expand_sse_comi (const struct builtin_description *,
900 tree, rtx);
901 static rtx ix86_expand_sse_compare (const struct builtin_description *,
902 tree, rtx);
903 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
904 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
905 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
906 static rtx ix86_expand_store_builtin (enum insn_code, tree);
907 static rtx safe_vector_operand (rtx, enum machine_mode);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode, tree);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
931 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
932 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
933 tree, bool);
934 static void ix86_init_builtins (void);
935 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
937 /* This function is only used on Solaris. */
938 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
939 ATTRIBUTE_UNUSED;
941 /* Register class used for passing given 64bit part of the argument.
942 These represent classes as documented by the PS ABI, with the exception
943 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
944 use SF or DFmode move instead of DImode to avoid reformatting penalties.
946 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
947 whenever possible (upper half does contain padding).
949 enum x86_64_reg_class
951 X86_64_NO_CLASS,
952 X86_64_INTEGER_CLASS,
953 X86_64_INTEGERSI_CLASS,
954 X86_64_SSE_CLASS,
955 X86_64_SSESF_CLASS,
956 X86_64_SSEDF_CLASS,
957 X86_64_SSEUP_CLASS,
958 X86_64_X87_CLASS,
959 X86_64_X87UP_CLASS,
960 X86_64_COMPLEX_X87_CLASS,
961 X86_64_MEMORY_CLASS
963 static const char * const x86_64_reg_class_name[] = {
964 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
965 "sseup", "x87", "x87up", "cplx87", "no"
968 #define MAX_CLASSES 4
970 /* Table of constants used by fldpi, fldln2, etc.... */
971 static REAL_VALUE_TYPE ext_80387_constants_table [5];
972 static bool ext_80387_constants_init = 0;
973 static void init_ext_80387_constants (void);
975 /* Initialize the GCC target structure. */
976 #undef TARGET_ATTRIBUTE_TABLE
977 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
978 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
979 # undef TARGET_MERGE_DECL_ATTRIBUTES
980 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
981 #endif
983 #undef TARGET_COMP_TYPE_ATTRIBUTES
984 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
986 #undef TARGET_INIT_BUILTINS
987 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1003 #ifdef ASM_QUAD
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1006 #endif
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1020 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1021 ia32_multipass_dfa_lookahead
1023 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1024 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1026 #ifdef HAVE_AS_TLS
1027 #undef TARGET_HAVE_TLS
1028 #define TARGET_HAVE_TLS true
1029 #endif
1030 #undef TARGET_CANNOT_FORCE_CONST_MEM
1031 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1033 #undef TARGET_DELEGITIMIZE_ADDRESS
1034 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1036 #undef TARGET_MS_BITFIELD_LAYOUT_P
1037 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1039 #undef TARGET_ASM_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1044 #undef TARGET_ASM_FILE_START
1045 #define TARGET_ASM_FILE_START x86_file_start
1047 #undef TARGET_DEFAULT_TARGET_FLAGS
1048 #define TARGET_DEFAULT_TARGET_FLAGS \
1049 (TARGET_DEFAULT \
1050 | TARGET_64BIT_DEFAULT \
1051 | TARGET_SUBTARGET_DEFAULT \
1052 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1054 #undef TARGET_HANDLE_OPTION
1055 #define TARGET_HANDLE_OPTION ix86_handle_option
1057 #undef TARGET_RTX_COSTS
1058 #define TARGET_RTX_COSTS ix86_rtx_costs
1059 #undef TARGET_ADDRESS_COST
1060 #define TARGET_ADDRESS_COST ix86_address_cost
1062 #undef TARGET_FIXED_CONDITION_CODE_REGS
1063 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1064 #undef TARGET_CC_MODES_COMPATIBLE
1065 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1067 #undef TARGET_MACHINE_DEPENDENT_REORG
1068 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1070 #undef TARGET_BUILD_BUILTIN_VA_LIST
1071 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1073 #undef TARGET_MD_ASM_CLOBBERS
1074 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1076 #undef TARGET_PROMOTE_PROTOTYPES
1077 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1078 #undef TARGET_STRUCT_VALUE_RTX
1079 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1080 #undef TARGET_SETUP_INCOMING_VARARGS
1081 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1082 #undef TARGET_MUST_PASS_IN_STACK
1083 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1084 #undef TARGET_PASS_BY_REFERENCE
1085 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1087 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1088 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1090 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1091 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1093 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1094 #undef TARGET_INSERT_ATTRIBUTES
1095 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1096 #endif
1098 struct gcc_target targetm = TARGET_INITIALIZER;
1101 /* The svr4 ABI for the i386 says that records and unions are returned
1102 in memory. */
1103 #ifndef DEFAULT_PCC_STRUCT_RETURN
1104 #define DEFAULT_PCC_STRUCT_RETURN 1
1105 #endif
1107 /* Implement TARGET_HANDLE_OPTION. */
1109 static bool
1110 ix86_handle_option (size_t code, const char *arg, int value)
1112 switch (code)
1114 case OPT_m3dnow:
1115 if (!value)
1117 target_flags &= ~MASK_3DNOW_A;
1118 target_flags_explicit |= MASK_3DNOW_A;
1120 return true;
1122 case OPT_malign_functions_:
1123 ix86_align_funcs_string = arg;
1124 return true;
1126 case OPT_malign_jumps_:
1127 ix86_align_jumps_string = arg;
1128 return true;
1130 case OPT_malign_loops_:
1131 ix86_align_loops_string = arg;
1132 return true;
1134 case OPT_march_:
1135 ix86_arch_string = arg;
1136 return true;
1138 case OPT_masm_:
1139 ix86_asm_string = arg;
1140 return true;
1142 case OPT_mbranch_cost_:
1143 ix86_branch_cost_string = arg;
1144 return true;
1146 case OPT_mcmodel_:
1147 ix86_cmodel_string = arg;
1148 return true;
1150 case OPT_mfpmath_:
1151 ix86_fpmath_string = arg;
1152 return true;
1154 case OPT_mmmx:
1155 if (!value)
1157 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1158 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1160 return true;
1162 case OPT_mpreferred_stack_boundary_:
1163 ix86_preferred_stack_boundary_string = arg;
1164 return true;
1166 case OPT_mregparm_:
1167 ix86_regparm_string = arg;
1168 return true;
1170 case OPT_msse:
1171 if (!value)
1173 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1174 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1176 return true;
1178 case OPT_msse2:
1179 if (!value)
1181 target_flags &= ~MASK_SSE3;
1182 target_flags_explicit |= MASK_SSE3;
1184 return true;
1186 case OPT_mtls_dialect_:
1187 ix86_tls_dialect_string = arg;
1188 return true;
1190 case OPT_mtune_:
1191 ix86_tune_string = arg;
1192 return true;
1194 default:
1195 return true;
1199 /* Sometimes certain combinations of command options do not make
1200 sense on a particular target machine. You can define a macro
1201 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1202 defined, is executed once just after all the command options have
1203 been parsed.
1205 Don't use this macro to turn on various extra optimizations for
1206 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1208 void
1209 override_options (void)
1211 int i;
1212 int ix86_tune_defaulted = 0;
1214 /* Comes from final.c -- no real reason to change it. */
1215 #define MAX_CODE_ALIGN 16
1217 static struct ptt
1219 const struct processor_costs *cost; /* Processor costs */
1220 const int target_enable; /* Target flags to enable. */
1221 const int target_disable; /* Target flags to disable. */
1222 const int align_loop; /* Default alignments. */
1223 const int align_loop_max_skip;
1224 const int align_jump;
1225 const int align_jump_max_skip;
1226 const int align_func;
1228 const processor_target_table[PROCESSOR_max] =
1230 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1231 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1232 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1233 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1234 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1235 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1236 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1237 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1238 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1241 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1242 static struct pta
1244 const char *const name; /* processor name or nickname. */
1245 const enum processor_type processor;
1246 const enum pta_flags
1248 PTA_SSE = 1,
1249 PTA_SSE2 = 2,
1250 PTA_SSE3 = 4,
1251 PTA_MMX = 8,
1252 PTA_PREFETCH_SSE = 16,
1253 PTA_3DNOW = 32,
1254 PTA_3DNOW_A = 64,
1255 PTA_64BIT = 128
1256 } flags;
1258 const processor_alias_table[] =
1260 {"i386", PROCESSOR_I386, 0},
1261 {"i486", PROCESSOR_I486, 0},
1262 {"i586", PROCESSOR_PENTIUM, 0},
1263 {"pentium", PROCESSOR_PENTIUM, 0},
1264 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1265 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1266 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1267 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1268 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1269 {"i686", PROCESSOR_PENTIUMPRO, 0},
1270 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1271 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1272 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1273 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1274 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1275 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1276 | PTA_MMX | PTA_PREFETCH_SSE},
1277 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1278 | PTA_MMX | PTA_PREFETCH_SSE},
1279 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1280 | PTA_MMX | PTA_PREFETCH_SSE},
1281 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1282 | PTA_MMX | PTA_PREFETCH_SSE},
1283 {"k6", PROCESSOR_K6, PTA_MMX},
1284 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1285 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1286 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1287 | PTA_3DNOW_A},
1288 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1289 | PTA_3DNOW | PTA_3DNOW_A},
1290 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1291 | PTA_3DNOW_A | PTA_SSE},
1292 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1293 | PTA_3DNOW_A | PTA_SSE},
1294 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1295 | PTA_3DNOW_A | PTA_SSE},
1296 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1297 | PTA_SSE | PTA_SSE2 },
1298 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1299 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1300 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1301 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1302 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1303 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1304 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1305 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1308 int const pta_size = ARRAY_SIZE (processor_alias_table);
1310 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1311 SUBTARGET_OVERRIDE_OPTIONS;
1312 #endif
1314 /* Set the default values for switches whose default depends on TARGET_64BIT
1315 in case they weren't overwritten by command line options. */
1316 if (TARGET_64BIT)
1318 if (flag_omit_frame_pointer == 2)
1319 flag_omit_frame_pointer = 1;
1320 if (flag_asynchronous_unwind_tables == 2)
1321 flag_asynchronous_unwind_tables = 1;
1322 if (flag_pcc_struct_return == 2)
1323 flag_pcc_struct_return = 0;
1325 else
1327 if (flag_omit_frame_pointer == 2)
1328 flag_omit_frame_pointer = 0;
1329 if (flag_asynchronous_unwind_tables == 2)
1330 flag_asynchronous_unwind_tables = 0;
1331 if (flag_pcc_struct_return == 2)
1332 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1335 if (!ix86_tune_string && ix86_arch_string)
1336 ix86_tune_string = ix86_arch_string;
1337 if (!ix86_tune_string)
1339 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1340 ix86_tune_defaulted = 1;
1342 if (!ix86_arch_string)
1343 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1345 if (ix86_cmodel_string != 0)
1347 if (!strcmp (ix86_cmodel_string, "small"))
1348 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1349 else if (flag_pic)
1350 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1351 else if (!strcmp (ix86_cmodel_string, "32"))
1352 ix86_cmodel = CM_32;
1353 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1354 ix86_cmodel = CM_KERNEL;
1355 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1356 ix86_cmodel = CM_MEDIUM;
1357 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1358 ix86_cmodel = CM_LARGE;
1359 else
1360 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1362 else
1364 ix86_cmodel = CM_32;
1365 if (TARGET_64BIT)
1366 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1368 if (ix86_asm_string != 0)
1370 if (!strcmp (ix86_asm_string, "intel"))
1371 ix86_asm_dialect = ASM_INTEL;
1372 else if (!strcmp (ix86_asm_string, "att"))
1373 ix86_asm_dialect = ASM_ATT;
1374 else
1375 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1377 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1378 error ("code model %qs not supported in the %s bit mode",
1379 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1380 if (ix86_cmodel == CM_LARGE)
1381 sorry ("code model %<large%> not supported yet");
1382 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1383 sorry ("%i-bit mode not compiled in",
1384 (target_flags & MASK_64BIT) ? 64 : 32);
1386 for (i = 0; i < pta_size; i++)
1387 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1389 ix86_arch = processor_alias_table[i].processor;
1390 /* Default cpu tuning to the architecture. */
1391 ix86_tune = ix86_arch;
1392 if (processor_alias_table[i].flags & PTA_MMX
1393 && !(target_flags_explicit & MASK_MMX))
1394 target_flags |= MASK_MMX;
1395 if (processor_alias_table[i].flags & PTA_3DNOW
1396 && !(target_flags_explicit & MASK_3DNOW))
1397 target_flags |= MASK_3DNOW;
1398 if (processor_alias_table[i].flags & PTA_3DNOW_A
1399 && !(target_flags_explicit & MASK_3DNOW_A))
1400 target_flags |= MASK_3DNOW_A;
1401 if (processor_alias_table[i].flags & PTA_SSE
1402 && !(target_flags_explicit & MASK_SSE))
1403 target_flags |= MASK_SSE;
1404 if (processor_alias_table[i].flags & PTA_SSE2
1405 && !(target_flags_explicit & MASK_SSE2))
1406 target_flags |= MASK_SSE2;
1407 if (processor_alias_table[i].flags & PTA_SSE3
1408 && !(target_flags_explicit & MASK_SSE3))
1409 target_flags |= MASK_SSE3;
1410 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1411 x86_prefetch_sse = true;
1412 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1413 error ("CPU you selected does not support x86-64 "
1414 "instruction set");
1415 break;
1418 if (i == pta_size)
1419 error ("bad value (%s) for -march= switch", ix86_arch_string);
1421 for (i = 0; i < pta_size; i++)
1422 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1424 ix86_tune = processor_alias_table[i].processor;
1425 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1427 if (ix86_tune_defaulted)
1429 ix86_tune_string = "x86-64";
1430 for (i = 0; i < pta_size; i++)
1431 if (! strcmp (ix86_tune_string,
1432 processor_alias_table[i].name))
1433 break;
1434 ix86_tune = processor_alias_table[i].processor;
1436 else
1437 error ("CPU you selected does not support x86-64 "
1438 "instruction set");
1440 /* Intel CPUs have always interpreted SSE prefetch instructions as
1441 NOPs; so, we can enable SSE prefetch instructions even when
1442 -mtune (rather than -march) points us to a processor that has them.
1443 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1444 higher processors. */
1445 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1446 x86_prefetch_sse = true;
1447 break;
1449 if (i == pta_size)
1450 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1452 if (optimize_size)
1453 ix86_cost = &size_cost;
1454 else
1455 ix86_cost = processor_target_table[ix86_tune].cost;
1456 target_flags |= processor_target_table[ix86_tune].target_enable;
1457 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1459 /* Arrange to set up i386_stack_locals for all functions. */
1460 init_machine_status = ix86_init_machine_status;
1462 /* Validate -mregparm= value. */
1463 if (ix86_regparm_string)
1465 i = atoi (ix86_regparm_string);
1466 if (i < 0 || i > REGPARM_MAX)
1467 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1468 else
1469 ix86_regparm = i;
1471 else
1472 if (TARGET_64BIT)
1473 ix86_regparm = REGPARM_MAX;
1475 /* If the user has provided any of the -malign-* options,
1476 warn and use that value only if -falign-* is not set.
1477 Remove this code in GCC 3.2 or later. */
1478 if (ix86_align_loops_string)
1480 warning (0, "-malign-loops is obsolete, use -falign-loops");
1481 if (align_loops == 0)
1483 i = atoi (ix86_align_loops_string);
1484 if (i < 0 || i > MAX_CODE_ALIGN)
1485 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1486 else
1487 align_loops = 1 << i;
1491 if (ix86_align_jumps_string)
1493 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1494 if (align_jumps == 0)
1496 i = atoi (ix86_align_jumps_string);
1497 if (i < 0 || i > MAX_CODE_ALIGN)
1498 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1499 else
1500 align_jumps = 1 << i;
1504 if (ix86_align_funcs_string)
1506 warning (0, "-malign-functions is obsolete, use -falign-functions");
1507 if (align_functions == 0)
1509 i = atoi (ix86_align_funcs_string);
1510 if (i < 0 || i > MAX_CODE_ALIGN)
1511 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1512 else
1513 align_functions = 1 << i;
1517 /* Default align_* from the processor table. */
1518 if (align_loops == 0)
1520 align_loops = processor_target_table[ix86_tune].align_loop;
1521 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1523 if (align_jumps == 0)
1525 align_jumps = processor_target_table[ix86_tune].align_jump;
1526 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1528 if (align_functions == 0)
1530 align_functions = processor_target_table[ix86_tune].align_func;
1533 /* Validate -mpreferred-stack-boundary= value, or provide default.
1534 The default of 128 bits is for Pentium III's SSE __m128, but we
1535 don't want additional code to keep the stack aligned when
1536 optimizing for code size. */
1537 ix86_preferred_stack_boundary = (optimize_size
1538 ? TARGET_64BIT ? 128 : 32
1539 : 128);
1540 if (ix86_preferred_stack_boundary_string)
1542 i = atoi (ix86_preferred_stack_boundary_string);
1543 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1544 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1545 TARGET_64BIT ? 4 : 2);
1546 else
1547 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1550 /* Validate -mbranch-cost= value, or provide default. */
1551 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1552 if (ix86_branch_cost_string)
1554 i = atoi (ix86_branch_cost_string);
1555 if (i < 0 || i > 5)
1556 error ("-mbranch-cost=%d is not between 0 and 5", i);
1557 else
1558 ix86_branch_cost = i;
1561 if (ix86_tls_dialect_string)
1563 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1564 ix86_tls_dialect = TLS_DIALECT_GNU;
1565 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1566 ix86_tls_dialect = TLS_DIALECT_SUN;
1567 else
1568 error ("bad value (%s) for -mtls-dialect= switch",
1569 ix86_tls_dialect_string);
1572 /* Keep nonleaf frame pointers. */
1573 if (flag_omit_frame_pointer)
1574 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1575 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1576 flag_omit_frame_pointer = 1;
1578 /* If we're doing fast math, we don't care about comparison order
1579 wrt NaNs. This lets us use a shorter comparison sequence. */
1580 if (flag_unsafe_math_optimizations)
1581 target_flags &= ~MASK_IEEE_FP;
1583 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1584 since the insns won't need emulation. */
1585 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1586 target_flags &= ~MASK_NO_FANCY_MATH_387;
1588 /* Likewise, if the target doesn't have a 387, or we've specified
1589 software floating point, don't use 387 inline instrinsics. */
1590 if (!TARGET_80387)
1591 target_flags |= MASK_NO_FANCY_MATH_387;
1593 /* Turn on SSE2 builtins for -msse3. */
1594 if (TARGET_SSE3)
1595 target_flags |= MASK_SSE2;
1597 /* Turn on SSE builtins for -msse2. */
1598 if (TARGET_SSE2)
1599 target_flags |= MASK_SSE;
1601 /* Turn on MMX builtins for -msse. */
1602 if (TARGET_SSE)
1604 target_flags |= MASK_MMX & ~target_flags_explicit;
1605 x86_prefetch_sse = true;
1608 /* Turn on MMX builtins for 3Dnow. */
1609 if (TARGET_3DNOW)
1610 target_flags |= MASK_MMX;
1612 if (TARGET_64BIT)
1614 if (TARGET_ALIGN_DOUBLE)
1615 error ("-malign-double makes no sense in the 64bit mode");
1616 if (TARGET_RTD)
1617 error ("-mrtd calling convention not supported in the 64bit mode");
1619 /* Enable by default the SSE and MMX builtins. Do allow the user to
1620 explicitly disable any of these. In particular, disabling SSE and
1621 MMX for kernel code is extremely useful. */
1622 target_flags
1623 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1624 & ~target_flags_explicit);
1626 else
1628 /* i386 ABI does not specify red zone. It still makes sense to use it
1629 when programmer takes care to stack from being destroyed. */
1630 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1631 target_flags |= MASK_NO_RED_ZONE;
1634 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1636 if (ix86_fpmath_string != 0)
1638 if (! strcmp (ix86_fpmath_string, "387"))
1639 ix86_fpmath = FPMATH_387;
1640 else if (! strcmp (ix86_fpmath_string, "sse"))
1642 if (!TARGET_SSE)
1644 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1645 ix86_fpmath = FPMATH_387;
1647 else
1648 ix86_fpmath = FPMATH_SSE;
1650 else if (! strcmp (ix86_fpmath_string, "387,sse")
1651 || ! strcmp (ix86_fpmath_string, "sse,387"))
1653 if (!TARGET_SSE)
1655 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1656 ix86_fpmath = FPMATH_387;
1658 else if (!TARGET_80387)
1660 warning (0, "387 instruction set disabled, using SSE arithmetics");
1661 ix86_fpmath = FPMATH_SSE;
1663 else
1664 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1666 else
1667 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1670 /* If the i387 is disabled, then do not return values in it. */
1671 if (!TARGET_80387)
1672 target_flags &= ~MASK_FLOAT_RETURNS;
1674 if ((x86_accumulate_outgoing_args & TUNEMASK)
1675 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1676 && !optimize_size)
1677 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1679 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1681 char *p;
1682 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1683 p = strchr (internal_label_prefix, 'X');
1684 internal_label_prefix_len = p - internal_label_prefix;
1685 *p = '\0';
1688 /* When scheduling description is not available, disable scheduler pass
1689 so it won't slow down the compilation and make x87 code slower. */
1690 if (!TARGET_SCHEDULE)
1691 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1694 void
1695 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1697 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1698 make the problem with not enough registers even worse. */
1699 #ifdef INSN_SCHEDULING
1700 if (level > 1)
1701 flag_schedule_insns = 0;
1702 #endif
1704 /* The default values of these switches depend on the TARGET_64BIT
1705 that is not known at this moment. Mark these values with 2 and
1706 let user the to override these. In case there is no command line option
1707 specifying them, we will set the defaults in override_options. */
1708 if (optimize >= 1)
1709 flag_omit_frame_pointer = 2;
1710 flag_pcc_struct_return = 2;
1711 flag_asynchronous_unwind_tables = 2;
1712 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1713 SUBTARGET_OPTIMIZATION_OPTIONS;
1714 #endif
1717 /* Table of valid machine attributes. */
1718 const struct attribute_spec ix86_attribute_table[] =
1720 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1721 /* Stdcall attribute says callee is responsible for popping arguments
1722 if they are not variable. */
1723 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1724 /* Fastcall attribute says callee is responsible for popping arguments
1725 if they are not variable. */
1726 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1727 /* Cdecl attribute says the callee is a normal C declaration */
1728 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1729 /* Regparm attribute specifies how many integer arguments are to be
1730 passed in registers. */
1731 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1732 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1733 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1734 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1735 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1736 #endif
1737 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1738 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1739 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1740 SUBTARGET_ATTRIBUTE_TABLE,
1741 #endif
1742 { NULL, 0, 0, false, false, false, NULL }
1745 /* Decide whether we can make a sibling call to a function. DECL is the
1746 declaration of the function being targeted by the call and EXP is the
1747 CALL_EXPR representing the call. */
1749 static bool
1750 ix86_function_ok_for_sibcall (tree decl, tree exp)
1752 tree func;
1754 /* If we are generating position-independent code, we cannot sibcall
1755 optimize any indirect call, or a direct call to a global function,
1756 as the PLT requires %ebx be live. */
1757 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1758 return false;
1760 if (decl)
1761 func = decl;
1762 else
1763 func = NULL;
1765 /* If we are returning floats on the 80387 register stack, we cannot
1766 make a sibcall from a function that doesn't return a float to a
1767 function that does or, conversely, from a function that does return
1768 a float to a function that doesn't; the necessary stack adjustment
1769 would not be executed. */
1770 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
1771 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1772 cfun->decl)))
1773 return false;
1775 /* If this call is indirect, we'll need to be able to use a call-clobbered
1776 register for the address of the target function. Make sure that all
1777 such registers are not used for passing parameters. */
1778 if (!decl && !TARGET_64BIT)
1780 tree type;
1782 /* We're looking at the CALL_EXPR, we need the type of the function. */
1783 type = TREE_OPERAND (exp, 0); /* pointer expression */
1784 type = TREE_TYPE (type); /* pointer type */
1785 type = TREE_TYPE (type); /* function type */
1787 if (ix86_function_regparm (type, NULL) >= 3)
1789 /* ??? Need to count the actual number of registers to be used,
1790 not the possible number of registers. Fix later. */
1791 return false;
1795 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1796 /* Dllimport'd functions are also called indirectly. */
1797 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1798 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1799 return false;
1800 #endif
1802 /* Otherwise okay. That also includes certain types of indirect calls. */
1803 return true;
1806 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1807 arguments as in struct attribute_spec.handler. */
1808 static tree
1809 ix86_handle_cdecl_attribute (tree *node, tree name,
1810 tree args ATTRIBUTE_UNUSED,
1811 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1813 if (TREE_CODE (*node) != FUNCTION_TYPE
1814 && TREE_CODE (*node) != METHOD_TYPE
1815 && TREE_CODE (*node) != FIELD_DECL
1816 && TREE_CODE (*node) != TYPE_DECL)
1818 warning (OPT_Wattributes, "%qs attribute only applies to functions",
1819 IDENTIFIER_POINTER (name));
1820 *no_add_attrs = true;
1822 else
1824 if (is_attribute_p ("fastcall", name))
1826 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1828 error ("fastcall and stdcall attributes are not compatible");
1830 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1832 error ("fastcall and regparm attributes are not compatible");
1835 else if (is_attribute_p ("stdcall", name))
1837 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1839 error ("fastcall and stdcall attributes are not compatible");
1844 if (TARGET_64BIT)
1846 warning (OPT_Wattributes, "%qs attribute ignored",
1847 IDENTIFIER_POINTER (name));
1848 *no_add_attrs = true;
1851 return NULL_TREE;
1854 /* Handle a "regparm" attribute;
1855 arguments as in struct attribute_spec.handler. */
1856 static tree
1857 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1858 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1860 if (TREE_CODE (*node) != FUNCTION_TYPE
1861 && TREE_CODE (*node) != METHOD_TYPE
1862 && TREE_CODE (*node) != FIELD_DECL
1863 && TREE_CODE (*node) != TYPE_DECL)
1865 warning (OPT_Wattributes, "%qs attribute only applies to functions",
1866 IDENTIFIER_POINTER (name));
1867 *no_add_attrs = true;
1869 else
1871 tree cst;
1873 cst = TREE_VALUE (args);
1874 if (TREE_CODE (cst) != INTEGER_CST)
1876 warning (OPT_Wattributes,
1877 "%qs attribute requires an integer constant argument",
1878 IDENTIFIER_POINTER (name));
1879 *no_add_attrs = true;
1881 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1883 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
1884 IDENTIFIER_POINTER (name), REGPARM_MAX);
1885 *no_add_attrs = true;
1888 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1890 error ("fastcall and regparm attributes are not compatible");
1894 return NULL_TREE;
1897 /* Return 0 if the attributes for two types are incompatible, 1 if they
1898 are compatible, and 2 if they are nearly compatible (which causes a
1899 warning to be generated). */
1901 static int
1902 ix86_comp_type_attributes (tree type1, tree type2)
1904 /* Check for mismatch of non-default calling convention. */
1905 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1907 if (TREE_CODE (type1) != FUNCTION_TYPE)
1908 return 1;
1910 /* Check for mismatched fastcall types */
1911 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1912 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1913 return 0;
1915 /* Check for mismatched return types (cdecl vs stdcall). */
1916 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1917 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1918 return 0;
1919 if (ix86_function_regparm (type1, NULL)
1920 != ix86_function_regparm (type2, NULL))
1921 return 0;
1922 return 1;
1925 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1926 DECL may be NULL when calling function indirectly
1927 or considering a libcall. */
1929 static int
1930 ix86_function_regparm (tree type, tree decl)
1932 tree attr;
1933 int regparm = ix86_regparm;
1934 bool user_convention = false;
1936 if (!TARGET_64BIT)
1938 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1939 if (attr)
1941 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1942 user_convention = true;
1945 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1947 regparm = 2;
1948 user_convention = true;
1951 /* Use register calling convention for local functions when possible. */
1952 if (!TARGET_64BIT && !user_convention && decl
1953 && flag_unit_at_a_time && !profile_flag)
1955 struct cgraph_local_info *i = cgraph_local_info (decl);
1956 if (i && i->local)
1958 /* We can't use regparm(3) for nested functions as these use
1959 static chain pointer in third argument. */
1960 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1961 regparm = 2;
1962 else
1963 regparm = 3;
1967 return regparm;
1970 /* Return true if EAX is live at the start of the function. Used by
1971 ix86_expand_prologue to determine if we need special help before
1972 calling allocate_stack_worker. */
1974 static bool
1975 ix86_eax_live_at_start_p (void)
1977 /* Cheat. Don't bother working forward from ix86_function_regparm
1978 to the function type to whether an actual argument is located in
1979 eax. Instead just look at cfg info, which is still close enough
1980 to correct at this point. This gives false positives for broken
1981 functions that might use uninitialized data that happens to be
1982 allocated in eax, but who cares? */
1983 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1986 /* Value is the number of bytes of arguments automatically
1987 popped when returning from a subroutine call.
1988 FUNDECL is the declaration node of the function (as a tree),
1989 FUNTYPE is the data type of the function (as a tree),
1990 or for a library call it is an identifier node for the subroutine name.
1991 SIZE is the number of bytes of arguments passed on the stack.
1993 On the 80386, the RTD insn may be used to pop them if the number
1994 of args is fixed, but if the number is variable then the caller
1995 must pop them all. RTD can't be used for library calls now
1996 because the library is compiled with the Unix compiler.
1997 Use of RTD is a selectable option, since it is incompatible with
1998 standard Unix calling sequences. If the option is not selected,
1999 the caller must always pop the args.
2001 The attribute stdcall is equivalent to RTD on a per module basis. */
2004 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2006 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2008 /* Cdecl functions override -mrtd, and never pop the stack. */
2009 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2011 /* Stdcall and fastcall functions will pop the stack if not
2012 variable args. */
2013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2014 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2015 rtd = 1;
2017 if (rtd
2018 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2019 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2020 == void_type_node)))
2021 return size;
2024 /* Lose any fake structure return argument if it is passed on the stack. */
2025 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2026 && !TARGET_64BIT
2027 && !KEEP_AGGREGATE_RETURN_POINTER)
2029 int nregs = ix86_function_regparm (funtype, fundecl);
2031 if (!nregs)
2032 return GET_MODE_SIZE (Pmode);
2035 return 0;
2038 /* Argument support functions. */
2040 /* Return true when register may be used to pass function parameters. */
2041 bool
2042 ix86_function_arg_regno_p (int regno)
2044 int i;
2045 if (!TARGET_64BIT)
2046 return (regno < REGPARM_MAX
2047 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2048 if (SSE_REGNO_P (regno) && TARGET_SSE)
2049 return true;
2050 /* RAX is used as hidden argument to va_arg functions. */
2051 if (!regno)
2052 return true;
2053 for (i = 0; i < REGPARM_MAX; i++)
2054 if (regno == x86_64_int_parameter_registers[i])
2055 return true;
2056 return false;
2059 /* Return if we do not know how to pass TYPE solely in registers. */
2061 static bool
2062 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2064 if (must_pass_in_stack_var_size_or_pad (mode, type))
2065 return true;
2067 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2068 The layout_type routine is crafty and tries to trick us into passing
2069 currently unsupported vector types on the stack by using TImode. */
2070 return (!TARGET_64BIT && mode == TImode
2071 && type && TREE_CODE (type) != VECTOR_TYPE);
2074 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2075 for a call to a function whose data type is FNTYPE.
2076 For a library call, FNTYPE is 0. */
2078 void
2079 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2080 tree fntype, /* tree ptr for function decl */
2081 rtx libname, /* SYMBOL_REF of library name or 0 */
2082 tree fndecl)
2084 static CUMULATIVE_ARGS zero_cum;
2085 tree param, next_param;
2087 if (TARGET_DEBUG_ARG)
2089 fprintf (stderr, "\ninit_cumulative_args (");
2090 if (fntype)
2091 fprintf (stderr, "fntype code = %s, ret code = %s",
2092 tree_code_name[(int) TREE_CODE (fntype)],
2093 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2094 else
2095 fprintf (stderr, "no fntype");
2097 if (libname)
2098 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2101 *cum = zero_cum;
2103 /* Set up the number of registers to use for passing arguments. */
2104 if (fntype)
2105 cum->nregs = ix86_function_regparm (fntype, fndecl);
2106 else
2107 cum->nregs = ix86_regparm;
2108 if (TARGET_SSE)
2109 cum->sse_nregs = SSE_REGPARM_MAX;
2110 if (TARGET_MMX)
2111 cum->mmx_nregs = MMX_REGPARM_MAX;
2112 cum->warn_sse = true;
2113 cum->warn_mmx = true;
2114 cum->maybe_vaarg = false;
2116 /* Use ecx and edx registers if function has fastcall attribute */
2117 if (fntype && !TARGET_64BIT)
2119 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2121 cum->nregs = 2;
2122 cum->fastcall = 1;
2126 /* Determine if this function has variable arguments. This is
2127 indicated by the last argument being 'void_type_mode' if there
2128 are no variable arguments. If there are variable arguments, then
2129 we won't pass anything in registers in 32-bit mode. */
2131 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2133 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2134 param != 0; param = next_param)
2136 next_param = TREE_CHAIN (param);
2137 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2139 if (!TARGET_64BIT)
2141 cum->nregs = 0;
2142 cum->sse_nregs = 0;
2143 cum->mmx_nregs = 0;
2144 cum->warn_sse = 0;
2145 cum->warn_mmx = 0;
2146 cum->fastcall = 0;
2148 cum->maybe_vaarg = true;
2152 if ((!fntype && !libname)
2153 || (fntype && !TYPE_ARG_TYPES (fntype)))
2154 cum->maybe_vaarg = true;
2156 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2157 in SSE registers even for 32-bit mode and not just 3, but up to
2158 8 SSE arguments in registers. */
2159 if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
2160 && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
2161 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2163 struct cgraph_local_info *i = cgraph_local_info (fndecl);
2164 if (i && i->local)
2166 cum->sse_nregs = 8;
2167 cum->float_in_sse = true;
2171 if (TARGET_DEBUG_ARG)
2172 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2174 return;
2177 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2178 But in the case of vector types, it is some vector mode.
2180 When we have only some of our vector isa extensions enabled, then there
2181 are some modes for which vector_mode_supported_p is false. For these
2182 modes, the generic vector support in gcc will choose some non-vector mode
2183 in order to implement the type. By computing the natural mode, we'll
2184 select the proper ABI location for the operand and not depend on whatever
2185 the middle-end decides to do with these vector types. */
2187 static enum machine_mode
2188 type_natural_mode (tree type)
2190 enum machine_mode mode = TYPE_MODE (type);
2192 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2194 HOST_WIDE_INT size = int_size_in_bytes (type);
2195 if ((size == 8 || size == 16)
2196 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2197 && TYPE_VECTOR_SUBPARTS (type) > 1)
2199 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2201 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2202 mode = MIN_MODE_VECTOR_FLOAT;
2203 else
2204 mode = MIN_MODE_VECTOR_INT;
2206 /* Get the mode which has this inner mode and number of units. */
2207 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2208 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2209 && GET_MODE_INNER (mode) == innermode)
2210 return mode;
2212 gcc_unreachable ();
2216 return mode;
2219 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2220 this may not agree with the mode that the type system has chosen for the
2221 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2222 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2224 static rtx
2225 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2226 unsigned int regno)
2228 rtx tmp;
2230 if (orig_mode != BLKmode)
2231 tmp = gen_rtx_REG (orig_mode, regno);
2232 else
2234 tmp = gen_rtx_REG (mode, regno);
2235 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2236 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2239 return tmp;
2242 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2243 of this code is to classify each 8bytes of incoming argument by the register
2244 class and assign registers accordingly. */
2246 /* Return the union class of CLASS1 and CLASS2.
2247 See the x86-64 PS ABI for details. */
2249 static enum x86_64_reg_class
2250 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2252 /* Rule #1: If both classes are equal, this is the resulting class. */
2253 if (class1 == class2)
2254 return class1;
2256 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2257 the other class. */
2258 if (class1 == X86_64_NO_CLASS)
2259 return class2;
2260 if (class2 == X86_64_NO_CLASS)
2261 return class1;
2263 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2264 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2265 return X86_64_MEMORY_CLASS;
2267 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2268 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2269 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2270 return X86_64_INTEGERSI_CLASS;
2271 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2272 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2273 return X86_64_INTEGER_CLASS;
2275 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2276 MEMORY is used. */
2277 if (class1 == X86_64_X87_CLASS
2278 || class1 == X86_64_X87UP_CLASS
2279 || class1 == X86_64_COMPLEX_X87_CLASS
2280 || class2 == X86_64_X87_CLASS
2281 || class2 == X86_64_X87UP_CLASS
2282 || class2 == X86_64_COMPLEX_X87_CLASS)
2283 return X86_64_MEMORY_CLASS;
2285 /* Rule #6: Otherwise class SSE is used. */
2286 return X86_64_SSE_CLASS;
2289 /* Classify the argument of type TYPE and mode MODE.
2290 CLASSES will be filled by the register class used to pass each word
2291 of the operand. The number of words is returned. In case the parameter
2292 should be passed in memory, 0 is returned. As a special case for zero
2293 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2295 BIT_OFFSET is used internally for handling records and specifies offset
2296 of the offset in bits modulo 256 to avoid overflow cases.
2298 See the x86-64 PS ABI for details.
2301 static int
2302 classify_argument (enum machine_mode mode, tree type,
2303 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2305 HOST_WIDE_INT bytes =
2306 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2307 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2309 /* Variable sized entities are always passed/returned in memory. */
2310 if (bytes < 0)
2311 return 0;
2313 if (mode != VOIDmode
2314 && targetm.calls.must_pass_in_stack (mode, type))
2315 return 0;
2317 if (type && AGGREGATE_TYPE_P (type))
2319 int i;
2320 tree field;
2321 enum x86_64_reg_class subclasses[MAX_CLASSES];
2323 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2324 if (bytes > 16)
2325 return 0;
2327 for (i = 0; i < words; i++)
2328 classes[i] = X86_64_NO_CLASS;
2330 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2331 signalize memory class, so handle it as special case. */
2332 if (!words)
2334 classes[0] = X86_64_NO_CLASS;
2335 return 1;
2338 /* Classify each field of record and merge classes. */
2339 switch (TREE_CODE (type))
2341 case RECORD_TYPE:
2342 /* For classes first merge in the field of the subclasses. */
2343 if (TYPE_BINFO (type))
2345 tree binfo, base_binfo;
2346 int basenum;
2348 for (binfo = TYPE_BINFO (type), basenum = 0;
2349 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2351 int num;
2352 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2353 tree type = BINFO_TYPE (base_binfo);
2355 num = classify_argument (TYPE_MODE (type),
2356 type, subclasses,
2357 (offset + bit_offset) % 256);
2358 if (!num)
2359 return 0;
2360 for (i = 0; i < num; i++)
2362 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2363 classes[i + pos] =
2364 merge_classes (subclasses[i], classes[i + pos]);
2368 /* And now merge the fields of structure. */
2369 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2371 if (TREE_CODE (field) == FIELD_DECL)
2373 int num;
2375 /* Bitfields are always classified as integer. Handle them
2376 early, since later code would consider them to be
2377 misaligned integers. */
2378 if (DECL_BIT_FIELD (field))
2380 for (i = int_bit_position (field) / 8 / 8;
2381 i < (int_bit_position (field)
2382 + tree_low_cst (DECL_SIZE (field), 0)
2383 + 63) / 8 / 8; i++)
2384 classes[i] =
2385 merge_classes (X86_64_INTEGER_CLASS,
2386 classes[i]);
2388 else
2390 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2391 TREE_TYPE (field), subclasses,
2392 (int_bit_position (field)
2393 + bit_offset) % 256);
2394 if (!num)
2395 return 0;
2396 for (i = 0; i < num; i++)
2398 int pos =
2399 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2400 classes[i + pos] =
2401 merge_classes (subclasses[i], classes[i + pos]);
2406 break;
2408 case ARRAY_TYPE:
2409 /* Arrays are handled as small records. */
2411 int num;
2412 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2413 TREE_TYPE (type), subclasses, bit_offset);
2414 if (!num)
2415 return 0;
2417 /* The partial classes are now full classes. */
2418 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2419 subclasses[0] = X86_64_SSE_CLASS;
2420 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2421 subclasses[0] = X86_64_INTEGER_CLASS;
2423 for (i = 0; i < words; i++)
2424 classes[i] = subclasses[i % num];
2426 break;
2428 case UNION_TYPE:
2429 case QUAL_UNION_TYPE:
2430 /* Unions are similar to RECORD_TYPE but offset is always 0.
2433 /* Unions are not derived. */
2434 gcc_assert (!TYPE_BINFO (type)
2435 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2436 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2438 if (TREE_CODE (field) == FIELD_DECL)
2440 int num;
2441 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2442 TREE_TYPE (field), subclasses,
2443 bit_offset);
2444 if (!num)
2445 return 0;
2446 for (i = 0; i < num; i++)
2447 classes[i] = merge_classes (subclasses[i], classes[i]);
2450 break;
2452 default:
2453 gcc_unreachable ();
2456 /* Final merger cleanup. */
2457 for (i = 0; i < words; i++)
2459 /* If one class is MEMORY, everything should be passed in
2460 memory. */
2461 if (classes[i] == X86_64_MEMORY_CLASS)
2462 return 0;
2464 /* The X86_64_SSEUP_CLASS should be always preceded by
2465 X86_64_SSE_CLASS. */
2466 if (classes[i] == X86_64_SSEUP_CLASS
2467 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2468 classes[i] = X86_64_SSE_CLASS;
2470 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2471 if (classes[i] == X86_64_X87UP_CLASS
2472 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2473 classes[i] = X86_64_SSE_CLASS;
2475 return words;
2478 /* Compute alignment needed. We align all types to natural boundaries with
2479 exception of XFmode that is aligned to 64bits. */
2480 if (mode != VOIDmode && mode != BLKmode)
2482 int mode_alignment = GET_MODE_BITSIZE (mode);
2484 if (mode == XFmode)
2485 mode_alignment = 128;
2486 else if (mode == XCmode)
2487 mode_alignment = 256;
2488 if (COMPLEX_MODE_P (mode))
2489 mode_alignment /= 2;
2490 /* Misaligned fields are always returned in memory. */
2491 if (bit_offset % mode_alignment)
2492 return 0;
2495 /* for V1xx modes, just use the base mode */
2496 if (VECTOR_MODE_P (mode)
2497 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2498 mode = GET_MODE_INNER (mode);
2500 /* Classification of atomic types. */
2501 switch (mode)
2503 case DImode:
2504 case SImode:
2505 case HImode:
2506 case QImode:
2507 case CSImode:
2508 case CHImode:
2509 case CQImode:
2510 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2511 classes[0] = X86_64_INTEGERSI_CLASS;
2512 else
2513 classes[0] = X86_64_INTEGER_CLASS;
2514 return 1;
2515 case CDImode:
2516 case TImode:
2517 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2518 return 2;
2519 case CTImode:
2520 return 0;
2521 case SFmode:
2522 if (!(bit_offset % 64))
2523 classes[0] = X86_64_SSESF_CLASS;
2524 else
2525 classes[0] = X86_64_SSE_CLASS;
2526 return 1;
2527 case DFmode:
2528 classes[0] = X86_64_SSEDF_CLASS;
2529 return 1;
2530 case XFmode:
2531 classes[0] = X86_64_X87_CLASS;
2532 classes[1] = X86_64_X87UP_CLASS;
2533 return 2;
2534 case TFmode:
2535 classes[0] = X86_64_SSE_CLASS;
2536 classes[1] = X86_64_SSEUP_CLASS;
2537 return 2;
2538 case SCmode:
2539 classes[0] = X86_64_SSE_CLASS;
2540 return 1;
2541 case DCmode:
2542 classes[0] = X86_64_SSEDF_CLASS;
2543 classes[1] = X86_64_SSEDF_CLASS;
2544 return 2;
2545 case XCmode:
2546 classes[0] = X86_64_COMPLEX_X87_CLASS;
2547 return 1;
2548 case TCmode:
2549 /* This modes is larger than 16 bytes. */
2550 return 0;
2551 case V4SFmode:
2552 case V4SImode:
2553 case V16QImode:
2554 case V8HImode:
2555 case V2DFmode:
2556 case V2DImode:
2557 classes[0] = X86_64_SSE_CLASS;
2558 classes[1] = X86_64_SSEUP_CLASS;
2559 return 2;
2560 case V2SFmode:
2561 case V2SImode:
2562 case V4HImode:
2563 case V8QImode:
2564 classes[0] = X86_64_SSE_CLASS;
2565 return 1;
2566 case BLKmode:
2567 case VOIDmode:
2568 return 0;
2569 default:
2570 gcc_assert (VECTOR_MODE_P (mode));
2572 if (bytes > 16)
2573 return 0;
2575 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2577 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2578 classes[0] = X86_64_INTEGERSI_CLASS;
2579 else
2580 classes[0] = X86_64_INTEGER_CLASS;
2581 classes[1] = X86_64_INTEGER_CLASS;
2582 return 1 + (bytes > 8);
2586 /* Examine the argument and return set number of register required in each
2587 class. Return 0 iff parameter should be passed in memory. */
2588 static int
2589 examine_argument (enum machine_mode mode, tree type, int in_return,
2590 int *int_nregs, int *sse_nregs)
2592 enum x86_64_reg_class class[MAX_CLASSES];
2593 int n = classify_argument (mode, type, class, 0);
2595 *int_nregs = 0;
2596 *sse_nregs = 0;
2597 if (!n)
2598 return 0;
2599 for (n--; n >= 0; n--)
2600 switch (class[n])
2602 case X86_64_INTEGER_CLASS:
2603 case X86_64_INTEGERSI_CLASS:
2604 (*int_nregs)++;
2605 break;
2606 case X86_64_SSE_CLASS:
2607 case X86_64_SSESF_CLASS:
2608 case X86_64_SSEDF_CLASS:
2609 (*sse_nregs)++;
2610 break;
2611 case X86_64_NO_CLASS:
2612 case X86_64_SSEUP_CLASS:
2613 break;
2614 case X86_64_X87_CLASS:
2615 case X86_64_X87UP_CLASS:
2616 if (!in_return)
2617 return 0;
2618 break;
2619 case X86_64_COMPLEX_X87_CLASS:
2620 return in_return ? 2 : 0;
2621 case X86_64_MEMORY_CLASS:
2622 gcc_unreachable ();
2624 return 1;
2627 /* Construct container for the argument used by GCC interface. See
2628 FUNCTION_ARG for the detailed description. */
2630 static rtx
2631 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2632 tree type, int in_return, int nintregs, int nsseregs,
2633 const int *intreg, int sse_regno)
2635 enum machine_mode tmpmode;
2636 int bytes =
2637 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2638 enum x86_64_reg_class class[MAX_CLASSES];
2639 int n;
2640 int i;
2641 int nexps = 0;
2642 int needed_sseregs, needed_intregs;
2643 rtx exp[MAX_CLASSES];
2644 rtx ret;
2646 n = classify_argument (mode, type, class, 0);
2647 if (TARGET_DEBUG_ARG)
2649 if (!n)
2650 fprintf (stderr, "Memory class\n");
2651 else
2653 fprintf (stderr, "Classes:");
2654 for (i = 0; i < n; i++)
2656 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2658 fprintf (stderr, "\n");
2661 if (!n)
2662 return NULL;
2663 if (!examine_argument (mode, type, in_return, &needed_intregs,
2664 &needed_sseregs))
2665 return NULL;
2666 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2667 return NULL;
2669 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2670 some less clueful developer tries to use floating-point anyway. */
2671 if (needed_sseregs && !TARGET_SSE)
2673 static bool issued_error;
2674 if (!issued_error)
2676 issued_error = true;
2677 if (in_return)
2678 error ("SSE register return with SSE disabled");
2679 else
2680 error ("SSE register argument with SSE disabled");
2682 return NULL;
2685 /* First construct simple cases. Avoid SCmode, since we want to use
2686 single register to pass this type. */
2687 if (n == 1 && mode != SCmode)
2688 switch (class[0])
2690 case X86_64_INTEGER_CLASS:
2691 case X86_64_INTEGERSI_CLASS:
2692 return gen_rtx_REG (mode, intreg[0]);
2693 case X86_64_SSE_CLASS:
2694 case X86_64_SSESF_CLASS:
2695 case X86_64_SSEDF_CLASS:
2696 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2697 case X86_64_X87_CLASS:
2698 case X86_64_COMPLEX_X87_CLASS:
2699 return gen_rtx_REG (mode, FIRST_STACK_REG);
2700 case X86_64_NO_CLASS:
2701 /* Zero sized array, struct or class. */
2702 return NULL;
2703 default:
2704 gcc_unreachable ();
2706 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2707 && mode != BLKmode)
2708 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2709 if (n == 2
2710 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2711 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2712 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2713 && class[1] == X86_64_INTEGER_CLASS
2714 && (mode == CDImode || mode == TImode || mode == TFmode)
2715 && intreg[0] + 1 == intreg[1])
2716 return gen_rtx_REG (mode, intreg[0]);
2718 /* Otherwise figure out the entries of the PARALLEL. */
2719 for (i = 0; i < n; i++)
2721 switch (class[i])
2723 case X86_64_NO_CLASS:
2724 break;
2725 case X86_64_INTEGER_CLASS:
2726 case X86_64_INTEGERSI_CLASS:
2727 /* Merge TImodes on aligned occasions here too. */
2728 if (i * 8 + 8 > bytes)
2729 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2730 else if (class[i] == X86_64_INTEGERSI_CLASS)
2731 tmpmode = SImode;
2732 else
2733 tmpmode = DImode;
2734 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2735 if (tmpmode == BLKmode)
2736 tmpmode = DImode;
2737 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2738 gen_rtx_REG (tmpmode, *intreg),
2739 GEN_INT (i*8));
2740 intreg++;
2741 break;
2742 case X86_64_SSESF_CLASS:
2743 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2744 gen_rtx_REG (SFmode,
2745 SSE_REGNO (sse_regno)),
2746 GEN_INT (i*8));
2747 sse_regno++;
2748 break;
2749 case X86_64_SSEDF_CLASS:
2750 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2751 gen_rtx_REG (DFmode,
2752 SSE_REGNO (sse_regno)),
2753 GEN_INT (i*8));
2754 sse_regno++;
2755 break;
2756 case X86_64_SSE_CLASS:
2757 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2758 tmpmode = TImode;
2759 else
2760 tmpmode = DImode;
2761 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2762 gen_rtx_REG (tmpmode,
2763 SSE_REGNO (sse_regno)),
2764 GEN_INT (i*8));
2765 if (tmpmode == TImode)
2766 i++;
2767 sse_regno++;
2768 break;
2769 default:
2770 gcc_unreachable ();
2774 /* Empty aligned struct, union or class. */
2775 if (nexps == 0)
2776 return NULL;
2778 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2779 for (i = 0; i < nexps; i++)
2780 XVECEXP (ret, 0, i) = exp [i];
2781 return ret;
2784 /* Update the data in CUM to advance over an argument
2785 of mode MODE and data type TYPE.
2786 (TYPE is null for libcalls where that information may not be available.) */
2788 void
2789 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2790 tree type, int named)
2792 int bytes =
2793 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2794 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2796 if (type)
2797 mode = type_natural_mode (type);
2799 if (TARGET_DEBUG_ARG)
2800 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2801 "mode=%s, named=%d)\n\n",
2802 words, cum->words, cum->nregs, cum->sse_nregs,
2803 GET_MODE_NAME (mode), named);
2805 if (TARGET_64BIT)
2807 int int_nregs, sse_nregs;
2808 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2809 cum->words += words;
2810 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2812 cum->nregs -= int_nregs;
2813 cum->sse_nregs -= sse_nregs;
2814 cum->regno += int_nregs;
2815 cum->sse_regno += sse_nregs;
2817 else
2818 cum->words += words;
2820 else
2822 switch (mode)
2824 default:
2825 break;
2827 case BLKmode:
2828 if (bytes < 0)
2829 break;
2830 /* FALLTHRU */
2832 case DImode:
2833 case SImode:
2834 case HImode:
2835 case QImode:
2836 cum->words += words;
2837 cum->nregs -= words;
2838 cum->regno += words;
2840 if (cum->nregs <= 0)
2842 cum->nregs = 0;
2843 cum->regno = 0;
2845 break;
2847 case DFmode:
2848 if (!TARGET_SSE2)
2849 break;
2850 case SFmode:
2851 if (!cum->float_in_sse)
2852 break;
2853 /* FALLTHRU */
2855 case TImode:
2856 case V16QImode:
2857 case V8HImode:
2858 case V4SImode:
2859 case V2DImode:
2860 case V4SFmode:
2861 case V2DFmode:
2862 if (!type || !AGGREGATE_TYPE_P (type))
2864 cum->sse_words += words;
2865 cum->sse_nregs -= 1;
2866 cum->sse_regno += 1;
2867 if (cum->sse_nregs <= 0)
2869 cum->sse_nregs = 0;
2870 cum->sse_regno = 0;
2873 break;
2875 case V8QImode:
2876 case V4HImode:
2877 case V2SImode:
2878 case V2SFmode:
2879 if (!type || !AGGREGATE_TYPE_P (type))
2881 cum->mmx_words += words;
2882 cum->mmx_nregs -= 1;
2883 cum->mmx_regno += 1;
2884 if (cum->mmx_nregs <= 0)
2886 cum->mmx_nregs = 0;
2887 cum->mmx_regno = 0;
2890 break;
2895 /* Define where to put the arguments to a function.
2896 Value is zero to push the argument on the stack,
2897 or a hard register in which to store the argument.
2899 MODE is the argument's machine mode.
2900 TYPE is the data type of the argument (as a tree).
2901 This is null for libcalls where that information may
2902 not be available.
2903 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2904 the preceding args and about the function being called.
2905 NAMED is nonzero if this argument is a named parameter
2906 (otherwise it is an extra parameter matching an ellipsis). */
2909 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2910 tree type, int named)
2912 enum machine_mode mode = orig_mode;
2913 rtx ret = NULL_RTX;
2914 int bytes =
2915 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2916 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2917 static bool warnedsse, warnedmmx;
2919 /* To simplify the code below, represent vector types with a vector mode
2920 even if MMX/SSE are not active. */
2921 if (type && TREE_CODE (type) == VECTOR_TYPE)
2922 mode = type_natural_mode (type);
2924 /* Handle a hidden AL argument containing number of registers for varargs
2925 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2926 any AL settings. */
2927 if (mode == VOIDmode)
2929 if (TARGET_64BIT)
2930 return GEN_INT (cum->maybe_vaarg
2931 ? (cum->sse_nregs < 0
2932 ? SSE_REGPARM_MAX
2933 : cum->sse_regno)
2934 : -1);
2935 else
2936 return constm1_rtx;
2938 if (TARGET_64BIT)
2939 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2940 cum->sse_nregs,
2941 &x86_64_int_parameter_registers [cum->regno],
2942 cum->sse_regno);
2943 else
2944 switch (mode)
2946 /* For now, pass fp/complex values on the stack. */
2947 default:
2948 break;
2950 case BLKmode:
2951 if (bytes < 0)
2952 break;
2953 /* FALLTHRU */
2954 case DImode:
2955 case SImode:
2956 case HImode:
2957 case QImode:
2958 if (words <= cum->nregs)
2960 int regno = cum->regno;
2962 /* Fastcall allocates the first two DWORD (SImode) or
2963 smaller arguments to ECX and EDX. */
2964 if (cum->fastcall)
2966 if (mode == BLKmode || mode == DImode)
2967 break;
2969 /* ECX not EAX is the first allocated register. */
2970 if (regno == 0)
2971 regno = 2;
2973 ret = gen_rtx_REG (mode, regno);
2975 break;
2976 case DFmode:
2977 if (!TARGET_SSE2)
2978 break;
2979 case SFmode:
2980 if (!cum->float_in_sse)
2981 break;
2982 /* FALLTHRU */
2983 case TImode:
2984 case V16QImode:
2985 case V8HImode:
2986 case V4SImode:
2987 case V2DImode:
2988 case V4SFmode:
2989 case V2DFmode:
2990 if (!type || !AGGREGATE_TYPE_P (type))
2992 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2994 warnedsse = true;
2995 warning (0, "SSE vector argument without SSE enabled "
2996 "changes the ABI");
2998 if (cum->sse_nregs)
2999 ret = gen_reg_or_parallel (mode, orig_mode,
3000 cum->sse_regno + FIRST_SSE_REG);
3002 break;
3003 case V8QImode:
3004 case V4HImode:
3005 case V2SImode:
3006 case V2SFmode:
3007 if (!type || !AGGREGATE_TYPE_P (type))
3009 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3011 warnedmmx = true;
3012 warning (0, "MMX vector argument without MMX enabled "
3013 "changes the ABI");
3015 if (cum->mmx_nregs)
3016 ret = gen_reg_or_parallel (mode, orig_mode,
3017 cum->mmx_regno + FIRST_MMX_REG);
3019 break;
3022 if (TARGET_DEBUG_ARG)
3024 fprintf (stderr,
3025 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3026 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3028 if (ret)
3029 print_simple_rtl (stderr, ret);
3030 else
3031 fprintf (stderr, ", stack");
3033 fprintf (stderr, " )\n");
3036 return ret;
3039 /* A C expression that indicates when an argument must be passed by
3040 reference. If nonzero for an argument, a copy of that argument is
3041 made in memory and a pointer to the argument is passed instead of
3042 the argument itself. The pointer is passed in whatever way is
3043 appropriate for passing a pointer to that type. */
3045 static bool
3046 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3047 enum machine_mode mode ATTRIBUTE_UNUSED,
3048 tree type, bool named ATTRIBUTE_UNUSED)
3050 if (!TARGET_64BIT)
3051 return 0;
3053 if (type && int_size_in_bytes (type) == -1)
3055 if (TARGET_DEBUG_ARG)
3056 fprintf (stderr, "function_arg_pass_by_reference\n");
3057 return 1;
3060 return 0;
3063 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3064 ABI. Only called if TARGET_SSE. */
3065 static bool
3066 contains_128bit_aligned_vector_p (tree type)
3068 enum machine_mode mode = TYPE_MODE (type);
3069 if (SSE_REG_MODE_P (mode)
3070 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3071 return true;
3072 if (TYPE_ALIGN (type) < 128)
3073 return false;
3075 if (AGGREGATE_TYPE_P (type))
3077 /* Walk the aggregates recursively. */
3078 switch (TREE_CODE (type))
3080 case RECORD_TYPE:
3081 case UNION_TYPE:
3082 case QUAL_UNION_TYPE:
3084 tree field;
3086 if (TYPE_BINFO (type))
3088 tree binfo, base_binfo;
3089 int i;
3091 for (binfo = TYPE_BINFO (type), i = 0;
3092 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3093 if (contains_128bit_aligned_vector_p
3094 (BINFO_TYPE (base_binfo)))
3095 return true;
3097 /* And now merge the fields of structure. */
3098 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3100 if (TREE_CODE (field) == FIELD_DECL
3101 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3102 return true;
3104 break;
3107 case ARRAY_TYPE:
3108 /* Just for use if some languages passes arrays by value. */
3109 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3110 return true;
3112 default:
3113 gcc_unreachable ();
3116 return false;
3119 /* Gives the alignment boundary, in bits, of an argument with the
3120 specified mode and type. */
3123 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3125 int align;
3126 if (type)
3127 align = TYPE_ALIGN (type);
3128 else
3129 align = GET_MODE_ALIGNMENT (mode);
3130 if (align < PARM_BOUNDARY)
3131 align = PARM_BOUNDARY;
3132 if (!TARGET_64BIT)
3134 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3135 make an exception for SSE modes since these require 128bit
3136 alignment.
3138 The handling here differs from field_alignment. ICC aligns MMX
3139 arguments to 4 byte boundaries, while structure fields are aligned
3140 to 8 byte boundaries. */
3141 if (!TARGET_SSE)
3142 align = PARM_BOUNDARY;
3143 else if (!type)
3145 if (!SSE_REG_MODE_P (mode))
3146 align = PARM_BOUNDARY;
3148 else
3150 if (!contains_128bit_aligned_vector_p (type))
3151 align = PARM_BOUNDARY;
3154 if (align > 128)
3155 align = 128;
3156 return align;
3159 /* Return true if N is a possible register number of function value. */
3160 bool
3161 ix86_function_value_regno_p (int regno)
3163 if (!TARGET_64BIT)
3165 return ((regno) == 0
3166 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3167 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3169 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3170 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3171 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3174 /* Define how to find the value returned by a function.
3175 VALTYPE is the data type of the value (as a tree).
3176 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3177 otherwise, FUNC is 0. */
3179 ix86_function_value (tree valtype, tree func)
3181 enum machine_mode natmode = type_natural_mode (valtype);
3183 if (TARGET_64BIT)
3185 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3186 1, REGPARM_MAX, SSE_REGPARM_MAX,
3187 x86_64_int_return_registers, 0);
3188 /* For zero sized structures, construct_container return NULL, but we
3189 need to keep rest of compiler happy by returning meaningful value. */
3190 if (!ret)
3191 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3192 return ret;
3194 else
3195 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
3198 /* Return false iff type is returned in memory. */
3200 ix86_return_in_memory (tree type)
3202 int needed_intregs, needed_sseregs, size;
3203 enum machine_mode mode = type_natural_mode (type);
3205 if (TARGET_64BIT)
3206 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3208 if (mode == BLKmode)
3209 return 1;
3211 size = int_size_in_bytes (type);
3213 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3214 return 0;
3216 if (VECTOR_MODE_P (mode) || mode == TImode)
3218 /* User-created vectors small enough to fit in EAX. */
3219 if (size < 8)
3220 return 0;
3222 /* MMX/3dNow values are returned on the stack, since we've
3223 got to EMMS/FEMMS before returning. */
3224 if (size == 8)
3225 return 1;
3227 /* SSE values are returned in XMM0, except when it doesn't exist. */
3228 if (size == 16)
3229 return (TARGET_SSE ? 0 : 1);
3232 if (mode == XFmode)
3233 return 0;
3235 if (size > 12)
3236 return 1;
3237 return 0;
3240 /* When returning SSE vector types, we have a choice of either
3241 (1) being abi incompatible with a -march switch, or
3242 (2) generating an error.
3243 Given no good solution, I think the safest thing is one warning.
3244 The user won't be able to use -Werror, but....
3246 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3247 called in response to actually generating a caller or callee that
3248 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3249 via aggregate_value_p for general type probing from tree-ssa. */
3251 static rtx
3252 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3254 static bool warned;
3256 if (!TARGET_SSE && type && !warned)
3258 /* Look at the return type of the function, not the function type. */
3259 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3261 if (mode == TImode
3262 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3264 warned = true;
3265 warning (0, "SSE vector return without SSE enabled changes the ABI");
3269 return NULL;
3272 /* Define how to find the value returned by a library function
3273 assuming the value has mode MODE. */
3275 ix86_libcall_value (enum machine_mode mode)
3277 if (TARGET_64BIT)
3279 switch (mode)
3281 case SFmode:
3282 case SCmode:
3283 case DFmode:
3284 case DCmode:
3285 case TFmode:
3286 return gen_rtx_REG (mode, FIRST_SSE_REG);
3287 case XFmode:
3288 case XCmode:
3289 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3290 case TCmode:
3291 return NULL;
3292 default:
3293 return gen_rtx_REG (mode, 0);
3296 else
3297 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
3300 /* Given a mode, return the register to use for a return value. */
3302 static int
3303 ix86_value_regno (enum machine_mode mode, tree func)
3305 gcc_assert (!TARGET_64BIT);
3307 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3308 we prevent this case when sse is not available. */
3309 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3310 return FIRST_SSE_REG;
3312 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3313 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3314 return 0;
3316 /* Floating point return values in %st(0), except for local functions when
3317 SSE math is enabled. */
3318 if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
3319 && flag_unit_at_a_time)
3321 struct cgraph_local_info *i = cgraph_local_info (func);
3322 if (i && i->local)
3323 return FIRST_SSE_REG;
3326 return FIRST_FLOAT_REG;
3329 /* Create the va_list data type. */
3331 static tree
3332 ix86_build_builtin_va_list (void)
3334 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3336 /* For i386 we use plain pointer to argument area. */
3337 if (!TARGET_64BIT)
3338 return build_pointer_type (char_type_node);
3340 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3341 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3343 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3344 unsigned_type_node);
3345 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3346 unsigned_type_node);
3347 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3348 ptr_type_node);
3349 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3350 ptr_type_node);
3352 va_list_gpr_counter_field = f_gpr;
3353 va_list_fpr_counter_field = f_fpr;
3355 DECL_FIELD_CONTEXT (f_gpr) = record;
3356 DECL_FIELD_CONTEXT (f_fpr) = record;
3357 DECL_FIELD_CONTEXT (f_ovf) = record;
3358 DECL_FIELD_CONTEXT (f_sav) = record;
3360 TREE_CHAIN (record) = type_decl;
3361 TYPE_NAME (record) = type_decl;
3362 TYPE_FIELDS (record) = f_gpr;
3363 TREE_CHAIN (f_gpr) = f_fpr;
3364 TREE_CHAIN (f_fpr) = f_ovf;
3365 TREE_CHAIN (f_ovf) = f_sav;
3367 layout_type (record);
3369 /* The correct type is an array type of one element. */
3370 return build_array_type (record, build_index_type (size_zero_node));
3373 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3375 static void
3376 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3377 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3378 int no_rtl)
3380 CUMULATIVE_ARGS next_cum;
3381 rtx save_area = NULL_RTX, mem;
3382 rtx label;
3383 rtx label_ref;
3384 rtx tmp_reg;
3385 rtx nsse_reg;
3386 int set;
3387 tree fntype;
3388 int stdarg_p;
3389 int i;
3391 if (!TARGET_64BIT)
3392 return;
3394 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3395 return;
3397 /* Indicate to allocate space on the stack for varargs save area. */
3398 ix86_save_varrargs_registers = 1;
3400 cfun->stack_alignment_needed = 128;
3402 fntype = TREE_TYPE (current_function_decl);
3403 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3404 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3405 != void_type_node));
3407 /* For varargs, we do not want to skip the dummy va_dcl argument.
3408 For stdargs, we do want to skip the last named argument. */
3409 next_cum = *cum;
3410 if (stdarg_p)
3411 function_arg_advance (&next_cum, mode, type, 1);
3413 if (!no_rtl)
3414 save_area = frame_pointer_rtx;
3416 set = get_varargs_alias_set ();
3418 for (i = next_cum.regno;
3419 i < ix86_regparm
3420 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3421 i++)
3423 mem = gen_rtx_MEM (Pmode,
3424 plus_constant (save_area, i * UNITS_PER_WORD));
3425 set_mem_alias_set (mem, set);
3426 emit_move_insn (mem, gen_rtx_REG (Pmode,
3427 x86_64_int_parameter_registers[i]));
3430 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3432 /* Now emit code to save SSE registers. The AX parameter contains number
3433 of SSE parameter registers used to call this function. We use
3434 sse_prologue_save insn template that produces computed jump across
3435 SSE saves. We need some preparation work to get this working. */
3437 label = gen_label_rtx ();
3438 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3440 /* Compute address to jump to :
3441 label - 5*eax + nnamed_sse_arguments*5 */
3442 tmp_reg = gen_reg_rtx (Pmode);
3443 nsse_reg = gen_reg_rtx (Pmode);
3444 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3445 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3446 gen_rtx_MULT (Pmode, nsse_reg,
3447 GEN_INT (4))));
3448 if (next_cum.sse_regno)
3449 emit_move_insn
3450 (nsse_reg,
3451 gen_rtx_CONST (DImode,
3452 gen_rtx_PLUS (DImode,
3453 label_ref,
3454 GEN_INT (next_cum.sse_regno * 4))));
3455 else
3456 emit_move_insn (nsse_reg, label_ref);
3457 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3459 /* Compute address of memory block we save into. We always use pointer
3460 pointing 127 bytes after first byte to store - this is needed to keep
3461 instruction size limited by 4 bytes. */
3462 tmp_reg = gen_reg_rtx (Pmode);
3463 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3464 plus_constant (save_area,
3465 8 * REGPARM_MAX + 127)));
3466 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3467 set_mem_alias_set (mem, set);
3468 set_mem_align (mem, BITS_PER_WORD);
3470 /* And finally do the dirty job! */
3471 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3472 GEN_INT (next_cum.sse_regno), label));
3477 /* Implement va_start. */
3479 void
3480 ix86_va_start (tree valist, rtx nextarg)
3482 HOST_WIDE_INT words, n_gpr, n_fpr;
3483 tree f_gpr, f_fpr, f_ovf, f_sav;
3484 tree gpr, fpr, ovf, sav, t;
3486 /* Only 64bit target needs something special. */
3487 if (!TARGET_64BIT)
3489 std_expand_builtin_va_start (valist, nextarg);
3490 return;
3493 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3494 f_fpr = TREE_CHAIN (f_gpr);
3495 f_ovf = TREE_CHAIN (f_fpr);
3496 f_sav = TREE_CHAIN (f_ovf);
3498 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3499 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3500 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3501 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3502 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3504 /* Count number of gp and fp argument registers used. */
3505 words = current_function_args_info.words;
3506 n_gpr = current_function_args_info.regno;
3507 n_fpr = current_function_args_info.sse_regno;
3509 if (TARGET_DEBUG_ARG)
3510 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3511 (int) words, (int) n_gpr, (int) n_fpr);
3513 if (cfun->va_list_gpr_size)
3515 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3516 build_int_cst (NULL_TREE, n_gpr * 8));
3517 TREE_SIDE_EFFECTS (t) = 1;
3518 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3521 if (cfun->va_list_fpr_size)
3523 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3524 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3525 TREE_SIDE_EFFECTS (t) = 1;
3526 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3529 /* Find the overflow area. */
3530 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3531 if (words != 0)
3532 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3533 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3534 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3535 TREE_SIDE_EFFECTS (t) = 1;
3536 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3538 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3540 /* Find the register save area.
3541 Prologue of the function save it right above stack frame. */
3542 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3543 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3544 TREE_SIDE_EFFECTS (t) = 1;
3545 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3549 /* Implement va_arg. */
3551 tree
3552 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3554 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3555 tree f_gpr, f_fpr, f_ovf, f_sav;
3556 tree gpr, fpr, ovf, sav, t;
3557 int size, rsize;
3558 tree lab_false, lab_over = NULL_TREE;
3559 tree addr, t2;
3560 rtx container;
3561 int indirect_p = 0;
3562 tree ptrtype;
3563 enum machine_mode nat_mode;
3565 /* Only 64bit target needs something special. */
3566 if (!TARGET_64BIT)
3567 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3569 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3570 f_fpr = TREE_CHAIN (f_gpr);
3571 f_ovf = TREE_CHAIN (f_fpr);
3572 f_sav = TREE_CHAIN (f_ovf);
3574 valist = build_va_arg_indirect_ref (valist);
3575 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3576 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3577 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3578 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3580 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3581 if (indirect_p)
3582 type = build_pointer_type (type);
3583 size = int_size_in_bytes (type);
3584 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3586 nat_mode = type_natural_mode (type);
3587 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3588 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3590 /* Pull the value out of the saved registers. */
3592 addr = create_tmp_var (ptr_type_node, "addr");
3593 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3595 if (container)
3597 int needed_intregs, needed_sseregs;
3598 bool need_temp;
3599 tree int_addr, sse_addr;
3601 lab_false = create_artificial_label ();
3602 lab_over = create_artificial_label ();
3604 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3606 need_temp = (!REG_P (container)
3607 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3608 || TYPE_ALIGN (type) > 128));
3610 /* In case we are passing structure, verify that it is consecutive block
3611 on the register save area. If not we need to do moves. */
3612 if (!need_temp && !REG_P (container))
3614 /* Verify that all registers are strictly consecutive */
3615 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3617 int i;
3619 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3621 rtx slot = XVECEXP (container, 0, i);
3622 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3623 || INTVAL (XEXP (slot, 1)) != i * 16)
3624 need_temp = 1;
3627 else
3629 int i;
3631 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3633 rtx slot = XVECEXP (container, 0, i);
3634 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3635 || INTVAL (XEXP (slot, 1)) != i * 8)
3636 need_temp = 1;
3640 if (!need_temp)
3642 int_addr = addr;
3643 sse_addr = addr;
3645 else
3647 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3648 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3649 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3650 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3653 /* First ensure that we fit completely in registers. */
3654 if (needed_intregs)
3656 t = build_int_cst (TREE_TYPE (gpr),
3657 (REGPARM_MAX - needed_intregs + 1) * 8);
3658 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3659 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3660 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3661 gimplify_and_add (t, pre_p);
3663 if (needed_sseregs)
3665 t = build_int_cst (TREE_TYPE (fpr),
3666 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3667 + REGPARM_MAX * 8);
3668 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3669 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3670 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3671 gimplify_and_add (t, pre_p);
3674 /* Compute index to start of area used for integer regs. */
3675 if (needed_intregs)
3677 /* int_addr = gpr + sav; */
3678 t = fold_convert (ptr_type_node, gpr);
3679 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3680 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3681 gimplify_and_add (t, pre_p);
3683 if (needed_sseregs)
3685 /* sse_addr = fpr + sav; */
3686 t = fold_convert (ptr_type_node, fpr);
3687 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3688 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3689 gimplify_and_add (t, pre_p);
3691 if (need_temp)
3693 int i;
3694 tree temp = create_tmp_var (type, "va_arg_tmp");
3696 /* addr = &temp; */
3697 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3698 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3699 gimplify_and_add (t, pre_p);
3701 for (i = 0; i < XVECLEN (container, 0); i++)
3703 rtx slot = XVECEXP (container, 0, i);
3704 rtx reg = XEXP (slot, 0);
3705 enum machine_mode mode = GET_MODE (reg);
3706 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3707 tree addr_type = build_pointer_type (piece_type);
3708 tree src_addr, src;
3709 int src_offset;
3710 tree dest_addr, dest;
3712 if (SSE_REGNO_P (REGNO (reg)))
3714 src_addr = sse_addr;
3715 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3717 else
3719 src_addr = int_addr;
3720 src_offset = REGNO (reg) * 8;
3722 src_addr = fold_convert (addr_type, src_addr);
3723 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3724 size_int (src_offset)));
3725 src = build_va_arg_indirect_ref (src_addr);
3727 dest_addr = fold_convert (addr_type, addr);
3728 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3729 size_int (INTVAL (XEXP (slot, 1)))));
3730 dest = build_va_arg_indirect_ref (dest_addr);
3732 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3733 gimplify_and_add (t, pre_p);
3737 if (needed_intregs)
3739 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3740 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3741 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3742 gimplify_and_add (t, pre_p);
3744 if (needed_sseregs)
3746 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3747 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3748 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3749 gimplify_and_add (t, pre_p);
3752 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3753 gimplify_and_add (t, pre_p);
3755 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3756 append_to_statement_list (t, pre_p);
3759 /* ... otherwise out of the overflow area. */
3761 /* Care for on-stack alignment if needed. */
3762 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3763 t = ovf;
3764 else
3766 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3767 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3768 build_int_cst (TREE_TYPE (ovf), align - 1));
3769 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3770 build_int_cst (TREE_TYPE (t), -align));
3772 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3774 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3775 gimplify_and_add (t2, pre_p);
3777 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3778 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3779 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3780 gimplify_and_add (t, pre_p);
3782 if (container)
3784 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3785 append_to_statement_list (t, pre_p);
3788 ptrtype = build_pointer_type (type);
3789 addr = fold_convert (ptrtype, addr);
3791 if (indirect_p)
3792 addr = build_va_arg_indirect_ref (addr);
3793 return build_va_arg_indirect_ref (addr);
3796 /* Return nonzero if OPNUM's MEM should be matched
3797 in movabs* patterns. */
3800 ix86_check_movabs (rtx insn, int opnum)
3802 rtx set, mem;
3804 set = PATTERN (insn);
3805 if (GET_CODE (set) == PARALLEL)
3806 set = XVECEXP (set, 0, 0);
3807 gcc_assert (GET_CODE (set) == SET);
3808 mem = XEXP (set, opnum);
3809 while (GET_CODE (mem) == SUBREG)
3810 mem = SUBREG_REG (mem);
3811 gcc_assert (GET_CODE (mem) == MEM);
3812 return (volatile_ok || !MEM_VOLATILE_P (mem));
3815 /* Initialize the table of extra 80387 mathematical constants. */
3817 static void
3818 init_ext_80387_constants (void)
3820 static const char * cst[5] =
3822 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3823 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3824 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3825 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3826 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3828 int i;
3830 for (i = 0; i < 5; i++)
3832 real_from_string (&ext_80387_constants_table[i], cst[i]);
3833 /* Ensure each constant is rounded to XFmode precision. */
3834 real_convert (&ext_80387_constants_table[i],
3835 XFmode, &ext_80387_constants_table[i]);
3838 ext_80387_constants_init = 1;
3841 /* Return true if the constant is something that can be loaded with
3842 a special instruction. */
3845 standard_80387_constant_p (rtx x)
3847 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3848 return -1;
3850 if (x == CONST0_RTX (GET_MODE (x)))
3851 return 1;
3852 if (x == CONST1_RTX (GET_MODE (x)))
3853 return 2;
3855 /* For XFmode constants, try to find a special 80387 instruction when
3856 optimizing for size or on those CPUs that benefit from them. */
3857 if (GET_MODE (x) == XFmode
3858 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3860 REAL_VALUE_TYPE r;
3861 int i;
3863 if (! ext_80387_constants_init)
3864 init_ext_80387_constants ();
3866 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3867 for (i = 0; i < 5; i++)
3868 if (real_identical (&r, &ext_80387_constants_table[i]))
3869 return i + 3;
3872 return 0;
3875 /* Return the opcode of the special instruction to be used to load
3876 the constant X. */
3878 const char *
3879 standard_80387_constant_opcode (rtx x)
3881 switch (standard_80387_constant_p (x))
3883 case 1:
3884 return "fldz";
3885 case 2:
3886 return "fld1";
3887 case 3:
3888 return "fldlg2";
3889 case 4:
3890 return "fldln2";
3891 case 5:
3892 return "fldl2e";
3893 case 6:
3894 return "fldl2t";
3895 case 7:
3896 return "fldpi";
3897 default:
3898 gcc_unreachable ();
3902 /* Return the CONST_DOUBLE representing the 80387 constant that is
3903 loaded by the specified special instruction. The argument IDX
3904 matches the return value from standard_80387_constant_p. */
3907 standard_80387_constant_rtx (int idx)
3909 int i;
3911 if (! ext_80387_constants_init)
3912 init_ext_80387_constants ();
3914 switch (idx)
3916 case 3:
3917 case 4:
3918 case 5:
3919 case 6:
3920 case 7:
3921 i = idx - 3;
3922 break;
3924 default:
3925 gcc_unreachable ();
3928 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3929 XFmode);
3932 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3935 standard_sse_constant_p (rtx x)
3937 if (x == const0_rtx)
3938 return 1;
3939 return (x == CONST0_RTX (GET_MODE (x)));
3942 /* Returns 1 if OP contains a symbol reference */
3945 symbolic_reference_mentioned_p (rtx op)
3947 const char *fmt;
3948 int i;
3950 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3951 return 1;
3953 fmt = GET_RTX_FORMAT (GET_CODE (op));
3954 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3956 if (fmt[i] == 'E')
3958 int j;
3960 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3961 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3962 return 1;
3965 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3966 return 1;
3969 return 0;
3972 /* Return 1 if it is appropriate to emit `ret' instructions in the
3973 body of a function. Do this only if the epilogue is simple, needing a
3974 couple of insns. Prior to reloading, we can't tell how many registers
3975 must be saved, so return 0 then. Return 0 if there is no frame
3976 marker to de-allocate. */
3979 ix86_can_use_return_insn_p (void)
3981 struct ix86_frame frame;
3983 if (! reload_completed || frame_pointer_needed)
3984 return 0;
3986 /* Don't allow more than 32 pop, since that's all we can do
3987 with one instruction. */
3988 if (current_function_pops_args
3989 && current_function_args_size >= 32768)
3990 return 0;
3992 ix86_compute_frame_layout (&frame);
3993 return frame.to_allocate == 0 && frame.nregs == 0;
3996 /* Value should be nonzero if functions must have frame pointers.
3997 Zero means the frame pointer need not be set up (and parms may
3998 be accessed via the stack pointer) in functions that seem suitable. */
4001 ix86_frame_pointer_required (void)
4003 /* If we accessed previous frames, then the generated code expects
4004 to be able to access the saved ebp value in our frame. */
4005 if (cfun->machine->accesses_prev_frame)
4006 return 1;
4008 /* Several x86 os'es need a frame pointer for other reasons,
4009 usually pertaining to setjmp. */
4010 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4011 return 1;
4013 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4014 the frame pointer by default. Turn it back on now if we've not
4015 got a leaf function. */
4016 if (TARGET_OMIT_LEAF_FRAME_POINTER
4017 && (!current_function_is_leaf))
4018 return 1;
4020 if (current_function_profile)
4021 return 1;
4023 return 0;
4026 /* Record that the current function accesses previous call frames. */
4028 void
4029 ix86_setup_frame_addresses (void)
4031 cfun->machine->accesses_prev_frame = 1;
4034 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4035 # define USE_HIDDEN_LINKONCE 1
4036 #else
4037 # define USE_HIDDEN_LINKONCE 0
4038 #endif
4040 static int pic_labels_used;
4042 /* Fills in the label name that should be used for a pc thunk for
4043 the given register. */
4045 static void
4046 get_pc_thunk_name (char name[32], unsigned int regno)
4048 if (USE_HIDDEN_LINKONCE)
4049 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4050 else
4051 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4055 /* This function generates code for -fpic that loads %ebx with
4056 the return address of the caller and then returns. */
4058 void
4059 ix86_file_end (void)
4061 rtx xops[2];
4062 int regno;
4064 for (regno = 0; regno < 8; ++regno)
4066 char name[32];
4068 if (! ((pic_labels_used >> regno) & 1))
4069 continue;
4071 get_pc_thunk_name (name, regno);
4073 if (USE_HIDDEN_LINKONCE)
4075 tree decl;
4077 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4078 error_mark_node);
4079 TREE_PUBLIC (decl) = 1;
4080 TREE_STATIC (decl) = 1;
4081 DECL_ONE_ONLY (decl) = 1;
4083 (*targetm.asm_out.unique_section) (decl, 0);
4084 named_section (decl, NULL, 0);
4086 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4087 fputs ("\t.hidden\t", asm_out_file);
4088 assemble_name (asm_out_file, name);
4089 fputc ('\n', asm_out_file);
4090 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4092 else
4094 text_section ();
4095 ASM_OUTPUT_LABEL (asm_out_file, name);
4098 xops[0] = gen_rtx_REG (SImode, regno);
4099 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4100 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4101 output_asm_insn ("ret", xops);
4104 if (NEED_INDICATE_EXEC_STACK)
4105 file_end_indicate_exec_stack ();
4108 /* Emit code for the SET_GOT patterns. */
4110 const char *
4111 output_set_got (rtx dest)
4113 rtx xops[3];
4115 xops[0] = dest;
4116 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4118 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4120 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4122 if (!flag_pic)
4123 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4124 else
4125 output_asm_insn ("call\t%a2", xops);
4127 #if TARGET_MACHO
4128 /* Output the "canonical" label name ("Lxx$pb") here too. This
4129 is what will be referred to by the Mach-O PIC subsystem. */
4130 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4131 #endif
4132 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4133 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4135 if (flag_pic)
4136 output_asm_insn ("pop{l}\t%0", xops);
4138 else
4140 char name[32];
4141 get_pc_thunk_name (name, REGNO (dest));
4142 pic_labels_used |= 1 << REGNO (dest);
4144 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4145 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4146 output_asm_insn ("call\t%X2", xops);
4149 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4150 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4151 else if (!TARGET_MACHO)
4152 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4154 return "";
4157 /* Generate an "push" pattern for input ARG. */
4159 static rtx
4160 gen_push (rtx arg)
4162 return gen_rtx_SET (VOIDmode,
4163 gen_rtx_MEM (Pmode,
4164 gen_rtx_PRE_DEC (Pmode,
4165 stack_pointer_rtx)),
4166 arg);
4169 /* Return >= 0 if there is an unused call-clobbered register available
4170 for the entire function. */
4172 static unsigned int
4173 ix86_select_alt_pic_regnum (void)
4175 if (current_function_is_leaf && !current_function_profile)
4177 int i;
4178 for (i = 2; i >= 0; --i)
4179 if (!regs_ever_live[i])
4180 return i;
4183 return INVALID_REGNUM;
4186 /* Return 1 if we need to save REGNO. */
4187 static int
4188 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4190 if (pic_offset_table_rtx
4191 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4192 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4193 || current_function_profile
4194 || current_function_calls_eh_return
4195 || current_function_uses_const_pool))
4197 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4198 return 0;
4199 return 1;
4202 if (current_function_calls_eh_return && maybe_eh_return)
4204 unsigned i;
4205 for (i = 0; ; i++)
4207 unsigned test = EH_RETURN_DATA_REGNO (i);
4208 if (test == INVALID_REGNUM)
4209 break;
4210 if (test == regno)
4211 return 1;
4215 return (regs_ever_live[regno]
4216 && !call_used_regs[regno]
4217 && !fixed_regs[regno]
4218 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4221 /* Return number of registers to be saved on the stack. */
4223 static int
4224 ix86_nsaved_regs (void)
4226 int nregs = 0;
4227 int regno;
4229 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4230 if (ix86_save_reg (regno, true))
4231 nregs++;
4232 return nregs;
4235 /* Return the offset between two registers, one to be eliminated, and the other
4236 its replacement, at the start of a routine. */
4238 HOST_WIDE_INT
4239 ix86_initial_elimination_offset (int from, int to)
4241 struct ix86_frame frame;
4242 ix86_compute_frame_layout (&frame);
4244 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4245 return frame.hard_frame_pointer_offset;
4246 else if (from == FRAME_POINTER_REGNUM
4247 && to == HARD_FRAME_POINTER_REGNUM)
4248 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4249 else
4251 gcc_assert (to == STACK_POINTER_REGNUM);
4253 if (from == ARG_POINTER_REGNUM)
4254 return frame.stack_pointer_offset;
4256 gcc_assert (from == FRAME_POINTER_REGNUM);
4257 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4261 /* Fill structure ix86_frame about frame of currently computed function. */
4263 static void
4264 ix86_compute_frame_layout (struct ix86_frame *frame)
4266 HOST_WIDE_INT total_size;
4267 unsigned int stack_alignment_needed;
4268 HOST_WIDE_INT offset;
4269 unsigned int preferred_alignment;
4270 HOST_WIDE_INT size = get_frame_size ();
4272 frame->nregs = ix86_nsaved_regs ();
4273 total_size = size;
4275 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4276 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4278 /* During reload iteration the amount of registers saved can change.
4279 Recompute the value as needed. Do not recompute when amount of registers
4280 didn't change as reload does multiple calls to the function and does not
4281 expect the decision to change within single iteration. */
4282 if (!optimize_size
4283 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4285 int count = frame->nregs;
4287 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4288 /* The fast prologue uses move instead of push to save registers. This
4289 is significantly longer, but also executes faster as modern hardware
4290 can execute the moves in parallel, but can't do that for push/pop.
4292 Be careful about choosing what prologue to emit: When function takes
4293 many instructions to execute we may use slow version as well as in
4294 case function is known to be outside hot spot (this is known with
4295 feedback only). Weight the size of function by number of registers
4296 to save as it is cheap to use one or two push instructions but very
4297 slow to use many of them. */
4298 if (count)
4299 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4300 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4301 || (flag_branch_probabilities
4302 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4303 cfun->machine->use_fast_prologue_epilogue = false;
4304 else
4305 cfun->machine->use_fast_prologue_epilogue
4306 = !expensive_function_p (count);
4308 if (TARGET_PROLOGUE_USING_MOVE
4309 && cfun->machine->use_fast_prologue_epilogue)
4310 frame->save_regs_using_mov = true;
4311 else
4312 frame->save_regs_using_mov = false;
4315 /* Skip return address and saved base pointer. */
4316 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4318 frame->hard_frame_pointer_offset = offset;
4320 /* Do some sanity checking of stack_alignment_needed and
4321 preferred_alignment, since i386 port is the only using those features
4322 that may break easily. */
4324 gcc_assert (!size || stack_alignment_needed);
4325 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4326 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4327 gcc_assert (stack_alignment_needed
4328 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4330 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4331 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4333 /* Register save area */
4334 offset += frame->nregs * UNITS_PER_WORD;
4336 /* Va-arg area */
4337 if (ix86_save_varrargs_registers)
4339 offset += X86_64_VARARGS_SIZE;
4340 frame->va_arg_size = X86_64_VARARGS_SIZE;
4342 else
4343 frame->va_arg_size = 0;
4345 /* Align start of frame for local function. */
4346 frame->padding1 = ((offset + stack_alignment_needed - 1)
4347 & -stack_alignment_needed) - offset;
4349 offset += frame->padding1;
4351 /* Frame pointer points here. */
4352 frame->frame_pointer_offset = offset;
4354 offset += size;
4356 /* Add outgoing arguments area. Can be skipped if we eliminated
4357 all the function calls as dead code.
4358 Skipping is however impossible when function calls alloca. Alloca
4359 expander assumes that last current_function_outgoing_args_size
4360 of stack frame are unused. */
4361 if (ACCUMULATE_OUTGOING_ARGS
4362 && (!current_function_is_leaf || current_function_calls_alloca))
4364 offset += current_function_outgoing_args_size;
4365 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4367 else
4368 frame->outgoing_arguments_size = 0;
4370 /* Align stack boundary. Only needed if we're calling another function
4371 or using alloca. */
4372 if (!current_function_is_leaf || current_function_calls_alloca)
4373 frame->padding2 = ((offset + preferred_alignment - 1)
4374 & -preferred_alignment) - offset;
4375 else
4376 frame->padding2 = 0;
4378 offset += frame->padding2;
4380 /* We've reached end of stack frame. */
4381 frame->stack_pointer_offset = offset;
4383 /* Size prologue needs to allocate. */
4384 frame->to_allocate =
4385 (size + frame->padding1 + frame->padding2
4386 + frame->outgoing_arguments_size + frame->va_arg_size);
4388 if ((!frame->to_allocate && frame->nregs <= 1)
4389 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4390 frame->save_regs_using_mov = false;
4392 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4393 && current_function_is_leaf)
4395 frame->red_zone_size = frame->to_allocate;
4396 if (frame->save_regs_using_mov)
4397 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4398 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4399 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4401 else
4402 frame->red_zone_size = 0;
4403 frame->to_allocate -= frame->red_zone_size;
4404 frame->stack_pointer_offset -= frame->red_zone_size;
4405 #if 0
4406 fprintf (stderr, "nregs: %i\n", frame->nregs);
4407 fprintf (stderr, "size: %i\n", size);
4408 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4409 fprintf (stderr, "padding1: %i\n", frame->padding1);
4410 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4411 fprintf (stderr, "padding2: %i\n", frame->padding2);
4412 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4413 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4414 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4415 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4416 frame->hard_frame_pointer_offset);
4417 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4418 #endif
4421 /* Emit code to save registers in the prologue. */
4423 static void
4424 ix86_emit_save_regs (void)
4426 int regno;
4427 rtx insn;
4429 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4430 if (ix86_save_reg (regno, true))
4432 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4433 RTX_FRAME_RELATED_P (insn) = 1;
4437 /* Emit code to save registers using MOV insns. First register
4438 is restored from POINTER + OFFSET. */
4439 static void
4440 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4442 int regno;
4443 rtx insn;
4445 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4446 if (ix86_save_reg (regno, true))
4448 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4449 Pmode, offset),
4450 gen_rtx_REG (Pmode, regno));
4451 RTX_FRAME_RELATED_P (insn) = 1;
4452 offset += UNITS_PER_WORD;
4456 /* Expand prologue or epilogue stack adjustment.
4457 The pattern exist to put a dependency on all ebp-based memory accesses.
4458 STYLE should be negative if instructions should be marked as frame related,
4459 zero if %r11 register is live and cannot be freely used and positive
4460 otherwise. */
4462 static void
4463 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4465 rtx insn;
4467 if (! TARGET_64BIT)
4468 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4469 else if (x86_64_immediate_operand (offset, DImode))
4470 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4471 else
4473 rtx r11;
4474 /* r11 is used by indirect sibcall return as well, set before the
4475 epilogue and used after the epilogue. ATM indirect sibcall
4476 shouldn't be used together with huge frame sizes in one
4477 function because of the frame_size check in sibcall.c. */
4478 gcc_assert (style);
4479 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4480 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4481 if (style < 0)
4482 RTX_FRAME_RELATED_P (insn) = 1;
4483 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4484 offset));
4486 if (style < 0)
4487 RTX_FRAME_RELATED_P (insn) = 1;
4490 /* Expand the prologue into a bunch of separate insns. */
4492 void
4493 ix86_expand_prologue (void)
4495 rtx insn;
4496 bool pic_reg_used;
4497 struct ix86_frame frame;
4498 HOST_WIDE_INT allocate;
4500 ix86_compute_frame_layout (&frame);
4502 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4503 slower on all targets. Also sdb doesn't like it. */
4505 if (frame_pointer_needed)
4507 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4508 RTX_FRAME_RELATED_P (insn) = 1;
4510 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4511 RTX_FRAME_RELATED_P (insn) = 1;
4514 allocate = frame.to_allocate;
4516 if (!frame.save_regs_using_mov)
4517 ix86_emit_save_regs ();
4518 else
4519 allocate += frame.nregs * UNITS_PER_WORD;
4521 /* When using red zone we may start register saving before allocating
4522 the stack frame saving one cycle of the prologue. */
4523 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4524 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4525 : stack_pointer_rtx,
4526 -frame.nregs * UNITS_PER_WORD);
4528 if (allocate == 0)
4530 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4531 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4532 GEN_INT (-allocate), -1);
4533 else
4535 /* Only valid for Win32. */
4536 rtx eax = gen_rtx_REG (SImode, 0);
4537 bool eax_live = ix86_eax_live_at_start_p ();
4538 rtx t;
4540 gcc_assert (!TARGET_64BIT);
4542 if (eax_live)
4544 emit_insn (gen_push (eax));
4545 allocate -= 4;
4548 emit_move_insn (eax, GEN_INT (allocate));
4550 insn = emit_insn (gen_allocate_stack_worker (eax));
4551 RTX_FRAME_RELATED_P (insn) = 1;
4552 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4553 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4554 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4555 t, REG_NOTES (insn));
4557 if (eax_live)
4559 if (frame_pointer_needed)
4560 t = plus_constant (hard_frame_pointer_rtx,
4561 allocate
4562 - frame.to_allocate
4563 - frame.nregs * UNITS_PER_WORD);
4564 else
4565 t = plus_constant (stack_pointer_rtx, allocate);
4566 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4570 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4572 if (!frame_pointer_needed || !frame.to_allocate)
4573 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4574 else
4575 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4576 -frame.nregs * UNITS_PER_WORD);
4579 pic_reg_used = false;
4580 if (pic_offset_table_rtx
4581 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4582 || current_function_profile))
4584 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4586 if (alt_pic_reg_used != INVALID_REGNUM)
4587 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4589 pic_reg_used = true;
4592 if (pic_reg_used)
4594 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4596 /* Even with accurate pre-reload life analysis, we can wind up
4597 deleting all references to the pic register after reload.
4598 Consider if cross-jumping unifies two sides of a branch
4599 controlled by a comparison vs the only read from a global.
4600 In which case, allow the set_got to be deleted, though we're
4601 too late to do anything about the ebx save in the prologue. */
4602 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4605 /* Prevent function calls from be scheduled before the call to mcount.
4606 In the pic_reg_used case, make sure that the got load isn't deleted. */
4607 if (current_function_profile)
4608 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4611 /* Emit code to restore saved registers using MOV insns. First register
4612 is restored from POINTER + OFFSET. */
4613 static void
4614 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4615 int maybe_eh_return)
4617 int regno;
4618 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4620 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4621 if (ix86_save_reg (regno, maybe_eh_return))
4623 /* Ensure that adjust_address won't be forced to produce pointer
4624 out of range allowed by x86-64 instruction set. */
4625 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4627 rtx r11;
4629 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4630 emit_move_insn (r11, GEN_INT (offset));
4631 emit_insn (gen_adddi3 (r11, r11, pointer));
4632 base_address = gen_rtx_MEM (Pmode, r11);
4633 offset = 0;
4635 emit_move_insn (gen_rtx_REG (Pmode, regno),
4636 adjust_address (base_address, Pmode, offset));
4637 offset += UNITS_PER_WORD;
4641 /* Restore function stack, frame, and registers. */
4643 void
4644 ix86_expand_epilogue (int style)
4646 int regno;
4647 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4648 struct ix86_frame frame;
4649 HOST_WIDE_INT offset;
4651 ix86_compute_frame_layout (&frame);
4653 /* Calculate start of saved registers relative to ebp. Special care
4654 must be taken for the normal return case of a function using
4655 eh_return: the eax and edx registers are marked as saved, but not
4656 restored along this path. */
4657 offset = frame.nregs;
4658 if (current_function_calls_eh_return && style != 2)
4659 offset -= 2;
4660 offset *= -UNITS_PER_WORD;
4662 /* If we're only restoring one register and sp is not valid then
4663 using a move instruction to restore the register since it's
4664 less work than reloading sp and popping the register.
4666 The default code result in stack adjustment using add/lea instruction,
4667 while this code results in LEAVE instruction (or discrete equivalent),
4668 so it is profitable in some other cases as well. Especially when there
4669 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4670 and there is exactly one register to pop. This heuristic may need some
4671 tuning in future. */
4672 if ((!sp_valid && frame.nregs <= 1)
4673 || (TARGET_EPILOGUE_USING_MOVE
4674 && cfun->machine->use_fast_prologue_epilogue
4675 && (frame.nregs > 1 || frame.to_allocate))
4676 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4677 || (frame_pointer_needed && TARGET_USE_LEAVE
4678 && cfun->machine->use_fast_prologue_epilogue
4679 && frame.nregs == 1)
4680 || current_function_calls_eh_return)
4682 /* Restore registers. We can use ebp or esp to address the memory
4683 locations. If both are available, default to ebp, since offsets
4684 are known to be small. Only exception is esp pointing directly to the
4685 end of block of saved registers, where we may simplify addressing
4686 mode. */
4688 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4689 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4690 frame.to_allocate, style == 2);
4691 else
4692 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4693 offset, style == 2);
4695 /* eh_return epilogues need %ecx added to the stack pointer. */
4696 if (style == 2)
4698 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4700 if (frame_pointer_needed)
4702 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4703 tmp = plus_constant (tmp, UNITS_PER_WORD);
4704 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4706 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4707 emit_move_insn (hard_frame_pointer_rtx, tmp);
4709 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4710 const0_rtx, style);
4712 else
4714 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4715 tmp = plus_constant (tmp, (frame.to_allocate
4716 + frame.nregs * UNITS_PER_WORD));
4717 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4720 else if (!frame_pointer_needed)
4721 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4722 GEN_INT (frame.to_allocate
4723 + frame.nregs * UNITS_PER_WORD),
4724 style);
4725 /* If not an i386, mov & pop is faster than "leave". */
4726 else if (TARGET_USE_LEAVE || optimize_size
4727 || !cfun->machine->use_fast_prologue_epilogue)
4728 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4729 else
4731 pro_epilogue_adjust_stack (stack_pointer_rtx,
4732 hard_frame_pointer_rtx,
4733 const0_rtx, style);
4734 if (TARGET_64BIT)
4735 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4736 else
4737 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4740 else
4742 /* First step is to deallocate the stack frame so that we can
4743 pop the registers. */
4744 if (!sp_valid)
4746 gcc_assert (frame_pointer_needed);
4747 pro_epilogue_adjust_stack (stack_pointer_rtx,
4748 hard_frame_pointer_rtx,
4749 GEN_INT (offset), style);
4751 else if (frame.to_allocate)
4752 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4753 GEN_INT (frame.to_allocate), style);
4755 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4756 if (ix86_save_reg (regno, false))
4758 if (TARGET_64BIT)
4759 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4760 else
4761 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4763 if (frame_pointer_needed)
4765 /* Leave results in shorter dependency chains on CPUs that are
4766 able to grok it fast. */
4767 if (TARGET_USE_LEAVE)
4768 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4769 else if (TARGET_64BIT)
4770 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4771 else
4772 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4776 /* Sibcall epilogues don't want a return instruction. */
4777 if (style == 0)
4778 return;
4780 if (current_function_pops_args && current_function_args_size)
4782 rtx popc = GEN_INT (current_function_pops_args);
4784 /* i386 can only pop 64K bytes. If asked to pop more, pop
4785 return address, do explicit add, and jump indirectly to the
4786 caller. */
4788 if (current_function_pops_args >= 65536)
4790 rtx ecx = gen_rtx_REG (SImode, 2);
4792 /* There is no "pascal" calling convention in 64bit ABI. */
4793 gcc_assert (!TARGET_64BIT);
4795 emit_insn (gen_popsi1 (ecx));
4796 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4797 emit_jump_insn (gen_return_indirect_internal (ecx));
4799 else
4800 emit_jump_insn (gen_return_pop_internal (popc));
4802 else
4803 emit_jump_insn (gen_return_internal ());
4806 /* Reset from the function's potential modifications. */
4808 static void
4809 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4810 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4812 if (pic_offset_table_rtx)
4813 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4816 /* Extract the parts of an RTL expression that is a valid memory address
4817 for an instruction. Return 0 if the structure of the address is
4818 grossly off. Return -1 if the address contains ASHIFT, so it is not
4819 strictly valid, but still used for computing length of lea instruction. */
4822 ix86_decompose_address (rtx addr, struct ix86_address *out)
4824 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4825 rtx base_reg, index_reg;
4826 HOST_WIDE_INT scale = 1;
4827 rtx scale_rtx = NULL_RTX;
4828 int retval = 1;
4829 enum ix86_address_seg seg = SEG_DEFAULT;
4831 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4832 base = addr;
4833 else if (GET_CODE (addr) == PLUS)
4835 rtx addends[4], op;
4836 int n = 0, i;
4838 op = addr;
4841 if (n >= 4)
4842 return 0;
4843 addends[n++] = XEXP (op, 1);
4844 op = XEXP (op, 0);
4846 while (GET_CODE (op) == PLUS);
4847 if (n >= 4)
4848 return 0;
4849 addends[n] = op;
4851 for (i = n; i >= 0; --i)
4853 op = addends[i];
4854 switch (GET_CODE (op))
4856 case MULT:
4857 if (index)
4858 return 0;
4859 index = XEXP (op, 0);
4860 scale_rtx = XEXP (op, 1);
4861 break;
4863 case UNSPEC:
4864 if (XINT (op, 1) == UNSPEC_TP
4865 && TARGET_TLS_DIRECT_SEG_REFS
4866 && seg == SEG_DEFAULT)
4867 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4868 else
4869 return 0;
4870 break;
4872 case REG:
4873 case SUBREG:
4874 if (!base)
4875 base = op;
4876 else if (!index)
4877 index = op;
4878 else
4879 return 0;
4880 break;
4882 case CONST:
4883 case CONST_INT:
4884 case SYMBOL_REF:
4885 case LABEL_REF:
4886 if (disp)
4887 return 0;
4888 disp = op;
4889 break;
4891 default:
4892 return 0;
4896 else if (GET_CODE (addr) == MULT)
4898 index = XEXP (addr, 0); /* index*scale */
4899 scale_rtx = XEXP (addr, 1);
4901 else if (GET_CODE (addr) == ASHIFT)
4903 rtx tmp;
4905 /* We're called for lea too, which implements ashift on occasion. */
4906 index = XEXP (addr, 0);
4907 tmp = XEXP (addr, 1);
4908 if (GET_CODE (tmp) != CONST_INT)
4909 return 0;
4910 scale = INTVAL (tmp);
4911 if ((unsigned HOST_WIDE_INT) scale > 3)
4912 return 0;
4913 scale = 1 << scale;
4914 retval = -1;
4916 else
4917 disp = addr; /* displacement */
4919 /* Extract the integral value of scale. */
4920 if (scale_rtx)
4922 if (GET_CODE (scale_rtx) != CONST_INT)
4923 return 0;
4924 scale = INTVAL (scale_rtx);
4927 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4928 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4930 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4931 if (base_reg && index_reg && scale == 1
4932 && (index_reg == arg_pointer_rtx
4933 || index_reg == frame_pointer_rtx
4934 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4936 rtx tmp;
4937 tmp = base, base = index, index = tmp;
4938 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4941 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4942 if ((base_reg == hard_frame_pointer_rtx
4943 || base_reg == frame_pointer_rtx
4944 || base_reg == arg_pointer_rtx) && !disp)
4945 disp = const0_rtx;
4947 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4948 Avoid this by transforming to [%esi+0]. */
4949 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4950 && base_reg && !index_reg && !disp
4951 && REG_P (base_reg)
4952 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4953 disp = const0_rtx;
4955 /* Special case: encode reg+reg instead of reg*2. */
4956 if (!base && index && scale && scale == 2)
4957 base = index, base_reg = index_reg, scale = 1;
4959 /* Special case: scaling cannot be encoded without base or displacement. */
4960 if (!base && !disp && index && scale != 1)
4961 disp = const0_rtx;
4963 out->base = base;
4964 out->index = index;
4965 out->disp = disp;
4966 out->scale = scale;
4967 out->seg = seg;
4969 return retval;
4972 /* Return cost of the memory address x.
4973 For i386, it is better to use a complex address than let gcc copy
4974 the address into a reg and make a new pseudo. But not if the address
4975 requires to two regs - that would mean more pseudos with longer
4976 lifetimes. */
4977 static int
4978 ix86_address_cost (rtx x)
4980 struct ix86_address parts;
4981 int cost = 1;
4982 int ok = ix86_decompose_address (x, &parts);
4984 gcc_assert (ok);
4986 if (parts.base && GET_CODE (parts.base) == SUBREG)
4987 parts.base = SUBREG_REG (parts.base);
4988 if (parts.index && GET_CODE (parts.index) == SUBREG)
4989 parts.index = SUBREG_REG (parts.index);
4991 /* More complex memory references are better. */
4992 if (parts.disp && parts.disp != const0_rtx)
4993 cost--;
4994 if (parts.seg != SEG_DEFAULT)
4995 cost--;
4997 /* Attempt to minimize number of registers in the address. */
4998 if ((parts.base
4999 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5000 || (parts.index
5001 && (!REG_P (parts.index)
5002 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5003 cost++;
5005 if (parts.base
5006 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5007 && parts.index
5008 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5009 && parts.base != parts.index)
5010 cost++;
5012 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5013 since it's predecode logic can't detect the length of instructions
5014 and it degenerates to vector decoded. Increase cost of such
5015 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5016 to split such addresses or even refuse such addresses at all.
5018 Following addressing modes are affected:
5019 [base+scale*index]
5020 [scale*index+disp]
5021 [base+index]
5023 The first and last case may be avoidable by explicitly coding the zero in
5024 memory address, but I don't have AMD-K6 machine handy to check this
5025 theory. */
5027 if (TARGET_K6
5028 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5029 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5030 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5031 cost += 10;
5033 return cost;
5036 /* If X is a machine specific address (i.e. a symbol or label being
5037 referenced as a displacement from the GOT implemented using an
5038 UNSPEC), then return the base term. Otherwise return X. */
5041 ix86_find_base_term (rtx x)
5043 rtx term;
5045 if (TARGET_64BIT)
5047 if (GET_CODE (x) != CONST)
5048 return x;
5049 term = XEXP (x, 0);
5050 if (GET_CODE (term) == PLUS
5051 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5052 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5053 term = XEXP (term, 0);
5054 if (GET_CODE (term) != UNSPEC
5055 || XINT (term, 1) != UNSPEC_GOTPCREL)
5056 return x;
5058 term = XVECEXP (term, 0, 0);
5060 if (GET_CODE (term) != SYMBOL_REF
5061 && GET_CODE (term) != LABEL_REF)
5062 return x;
5064 return term;
5067 term = ix86_delegitimize_address (x);
5069 if (GET_CODE (term) != SYMBOL_REF
5070 && GET_CODE (term) != LABEL_REF)
5071 return x;
5073 return term;
5076 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5077 this is used for to form addresses to local data when -fPIC is in
5078 use. */
5080 static bool
5081 darwin_local_data_pic (rtx disp)
5083 if (GET_CODE (disp) == MINUS)
5085 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5086 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5087 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5089 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5090 if (! strcmp (sym_name, "<pic base>"))
5091 return true;
5095 return false;
5098 /* Determine if a given RTX is a valid constant. We already know this
5099 satisfies CONSTANT_P. */
5101 bool
5102 legitimate_constant_p (rtx x)
5104 switch (GET_CODE (x))
5106 case CONST:
5107 x = XEXP (x, 0);
5109 if (GET_CODE (x) == PLUS)
5111 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5112 return false;
5113 x = XEXP (x, 0);
5116 if (TARGET_MACHO && darwin_local_data_pic (x))
5117 return true;
5119 /* Only some unspecs are valid as "constants". */
5120 if (GET_CODE (x) == UNSPEC)
5121 switch (XINT (x, 1))
5123 case UNSPEC_TPOFF:
5124 case UNSPEC_NTPOFF:
5125 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5126 case UNSPEC_DTPOFF:
5127 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5128 default:
5129 return false;
5132 /* We must have drilled down to a symbol. */
5133 if (!symbolic_operand (x, Pmode))
5134 return false;
5135 /* FALLTHRU */
5137 case SYMBOL_REF:
5138 /* TLS symbols are never valid. */
5139 if (tls_symbolic_operand (x, Pmode))
5140 return false;
5141 break;
5143 default:
5144 break;
5147 /* Otherwise we handle everything else in the move patterns. */
5148 return true;
5151 /* Determine if it's legal to put X into the constant pool. This
5152 is not possible for the address of thread-local symbols, which
5153 is checked above. */
5155 static bool
5156 ix86_cannot_force_const_mem (rtx x)
5158 return !legitimate_constant_p (x);
5161 /* Determine if a given RTX is a valid constant address. */
5163 bool
5164 constant_address_p (rtx x)
5166 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5169 /* Nonzero if the constant value X is a legitimate general operand
5170 when generating PIC code. It is given that flag_pic is on and
5171 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5173 bool
5174 legitimate_pic_operand_p (rtx x)
5176 rtx inner;
5178 switch (GET_CODE (x))
5180 case CONST:
5181 inner = XEXP (x, 0);
5183 /* Only some unspecs are valid as "constants". */
5184 if (GET_CODE (inner) == UNSPEC)
5185 switch (XINT (inner, 1))
5187 case UNSPEC_TPOFF:
5188 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5189 default:
5190 return false;
5192 /* FALLTHRU */
5194 case SYMBOL_REF:
5195 case LABEL_REF:
5196 return legitimate_pic_address_disp_p (x);
5198 default:
5199 return true;
5203 /* Determine if a given CONST RTX is a valid memory displacement
5204 in PIC mode. */
5207 legitimate_pic_address_disp_p (rtx disp)
5209 bool saw_plus;
5211 /* In 64bit mode we can allow direct addresses of symbols and labels
5212 when they are not dynamic symbols. */
5213 if (TARGET_64BIT)
5215 /* TLS references should always be enclosed in UNSPEC. */
5216 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5217 return 0;
5218 if (GET_CODE (disp) == SYMBOL_REF
5219 && ix86_cmodel == CM_SMALL_PIC
5220 && SYMBOL_REF_LOCAL_P (disp))
5221 return 1;
5222 if (GET_CODE (disp) == LABEL_REF)
5223 return 1;
5224 if (GET_CODE (disp) == CONST
5225 && GET_CODE (XEXP (disp, 0)) == PLUS)
5227 rtx op0 = XEXP (XEXP (disp, 0), 0);
5228 rtx op1 = XEXP (XEXP (disp, 0), 1);
5230 /* TLS references should always be enclosed in UNSPEC. */
5231 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5232 return 0;
5233 if (((GET_CODE (op0) == SYMBOL_REF
5234 && ix86_cmodel == CM_SMALL_PIC
5235 && SYMBOL_REF_LOCAL_P (op0))
5236 || GET_CODE (op0) == LABEL_REF)
5237 && GET_CODE (op1) == CONST_INT
5238 && INTVAL (op1) < 16*1024*1024
5239 && INTVAL (op1) >= -16*1024*1024)
5240 return 1;
5243 if (GET_CODE (disp) != CONST)
5244 return 0;
5245 disp = XEXP (disp, 0);
5247 if (TARGET_64BIT)
5249 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5250 of GOT tables. We should not need these anyway. */
5251 if (GET_CODE (disp) != UNSPEC
5252 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5253 return 0;
5255 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5256 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5257 return 0;
5258 return 1;
5261 saw_plus = false;
5262 if (GET_CODE (disp) == PLUS)
5264 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5265 return 0;
5266 disp = XEXP (disp, 0);
5267 saw_plus = true;
5270 if (TARGET_MACHO && darwin_local_data_pic (disp))
5271 return 1;
5273 if (GET_CODE (disp) != UNSPEC)
5274 return 0;
5276 switch (XINT (disp, 1))
5278 case UNSPEC_GOT:
5279 if (saw_plus)
5280 return false;
5281 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5282 case UNSPEC_GOTOFF:
5283 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5284 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5285 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5286 return false;
5287 case UNSPEC_GOTTPOFF:
5288 case UNSPEC_GOTNTPOFF:
5289 case UNSPEC_INDNTPOFF:
5290 if (saw_plus)
5291 return false;
5292 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5293 case UNSPEC_NTPOFF:
5294 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5295 case UNSPEC_DTPOFF:
5296 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5299 return 0;
5302 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5303 memory address for an instruction. The MODE argument is the machine mode
5304 for the MEM expression that wants to use this address.
5306 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5307 convert common non-canonical forms to canonical form so that they will
5308 be recognized. */
5311 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5313 struct ix86_address parts;
5314 rtx base, index, disp;
5315 HOST_WIDE_INT scale;
5316 const char *reason = NULL;
5317 rtx reason_rtx = NULL_RTX;
5319 if (TARGET_DEBUG_ADDR)
5321 fprintf (stderr,
5322 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5323 GET_MODE_NAME (mode), strict);
5324 debug_rtx (addr);
5327 if (ix86_decompose_address (addr, &parts) <= 0)
5329 reason = "decomposition failed";
5330 goto report_error;
5333 base = parts.base;
5334 index = parts.index;
5335 disp = parts.disp;
5336 scale = parts.scale;
5338 /* Validate base register.
5340 Don't allow SUBREG's that span more than a word here. It can lead to spill
5341 failures when the base is one word out of a two word structure, which is
5342 represented internally as a DImode int. */
5344 if (base)
5346 rtx reg;
5347 reason_rtx = base;
5349 if (REG_P (base))
5350 reg = base;
5351 else if (GET_CODE (base) == SUBREG
5352 && REG_P (SUBREG_REG (base))
5353 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5354 <= UNITS_PER_WORD)
5355 reg = SUBREG_REG (base);
5356 else
5358 reason = "base is not a register";
5359 goto report_error;
5362 if (GET_MODE (base) != Pmode)
5364 reason = "base is not in Pmode";
5365 goto report_error;
5368 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5369 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5371 reason = "base is not valid";
5372 goto report_error;
5376 /* Validate index register.
5378 Don't allow SUBREG's that span more than a word here -- same as above. */
5380 if (index)
5382 rtx reg;
5383 reason_rtx = index;
5385 if (REG_P (index))
5386 reg = index;
5387 else if (GET_CODE (index) == SUBREG
5388 && REG_P (SUBREG_REG (index))
5389 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5390 <= UNITS_PER_WORD)
5391 reg = SUBREG_REG (index);
5392 else
5394 reason = "index is not a register";
5395 goto report_error;
5398 if (GET_MODE (index) != Pmode)
5400 reason = "index is not in Pmode";
5401 goto report_error;
5404 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5405 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5407 reason = "index is not valid";
5408 goto report_error;
5412 /* Validate scale factor. */
5413 if (scale != 1)
5415 reason_rtx = GEN_INT (scale);
5416 if (!index)
5418 reason = "scale without index";
5419 goto report_error;
5422 if (scale != 2 && scale != 4 && scale != 8)
5424 reason = "scale is not a valid multiplier";
5425 goto report_error;
5429 /* Validate displacement. */
5430 if (disp)
5432 reason_rtx = disp;
5434 if (GET_CODE (disp) == CONST
5435 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5436 switch (XINT (XEXP (disp, 0), 1))
5438 case UNSPEC_GOT:
5439 case UNSPEC_GOTOFF:
5440 case UNSPEC_GOTPCREL:
5441 gcc_assert (flag_pic);
5442 goto is_legitimate_pic;
5444 case UNSPEC_GOTTPOFF:
5445 case UNSPEC_GOTNTPOFF:
5446 case UNSPEC_INDNTPOFF:
5447 case UNSPEC_NTPOFF:
5448 case UNSPEC_DTPOFF:
5449 break;
5451 default:
5452 reason = "invalid address unspec";
5453 goto report_error;
5456 else if (flag_pic && (SYMBOLIC_CONST (disp)
5457 #if TARGET_MACHO
5458 && !machopic_operand_p (disp)
5459 #endif
5462 is_legitimate_pic:
5463 if (TARGET_64BIT && (index || base))
5465 /* foo@dtpoff(%rX) is ok. */
5466 if (GET_CODE (disp) != CONST
5467 || GET_CODE (XEXP (disp, 0)) != PLUS
5468 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5469 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5470 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5471 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5473 reason = "non-constant pic memory reference";
5474 goto report_error;
5477 else if (! legitimate_pic_address_disp_p (disp))
5479 reason = "displacement is an invalid pic construct";
5480 goto report_error;
5483 /* This code used to verify that a symbolic pic displacement
5484 includes the pic_offset_table_rtx register.
5486 While this is good idea, unfortunately these constructs may
5487 be created by "adds using lea" optimization for incorrect
5488 code like:
5490 int a;
5491 int foo(int i)
5493 return *(&a+i);
5496 This code is nonsensical, but results in addressing
5497 GOT table with pic_offset_table_rtx base. We can't
5498 just refuse it easily, since it gets matched by
5499 "addsi3" pattern, that later gets split to lea in the
5500 case output register differs from input. While this
5501 can be handled by separate addsi pattern for this case
5502 that never results in lea, this seems to be easier and
5503 correct fix for crash to disable this test. */
5505 else if (GET_CODE (disp) != LABEL_REF
5506 && GET_CODE (disp) != CONST_INT
5507 && (GET_CODE (disp) != CONST
5508 || !legitimate_constant_p (disp))
5509 && (GET_CODE (disp) != SYMBOL_REF
5510 || !legitimate_constant_p (disp)))
5512 reason = "displacement is not constant";
5513 goto report_error;
5515 else if (TARGET_64BIT
5516 && !x86_64_immediate_operand (disp, VOIDmode))
5518 reason = "displacement is out of range";
5519 goto report_error;
5523 /* Everything looks valid. */
5524 if (TARGET_DEBUG_ADDR)
5525 fprintf (stderr, "Success.\n");
5526 return TRUE;
5528 report_error:
5529 if (TARGET_DEBUG_ADDR)
5531 fprintf (stderr, "Error: %s\n", reason);
5532 debug_rtx (reason_rtx);
5534 return FALSE;
5537 /* Return an unique alias set for the GOT. */
5539 static HOST_WIDE_INT
5540 ix86_GOT_alias_set (void)
5542 static HOST_WIDE_INT set = -1;
5543 if (set == -1)
5544 set = new_alias_set ();
5545 return set;
5548 /* Return a legitimate reference for ORIG (an address) using the
5549 register REG. If REG is 0, a new pseudo is generated.
5551 There are two types of references that must be handled:
5553 1. Global data references must load the address from the GOT, via
5554 the PIC reg. An insn is emitted to do this load, and the reg is
5555 returned.
5557 2. Static data references, constant pool addresses, and code labels
5558 compute the address as an offset from the GOT, whose base is in
5559 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5560 differentiate them from global data objects. The returned
5561 address is the PIC reg + an unspec constant.
5563 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5564 reg also appears in the address. */
5566 static rtx
5567 legitimize_pic_address (rtx orig, rtx reg)
5569 rtx addr = orig;
5570 rtx new = orig;
5571 rtx base;
5573 #if TARGET_MACHO
5574 if (reg == 0)
5575 reg = gen_reg_rtx (Pmode);
5576 /* Use the generic Mach-O PIC machinery. */
5577 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5578 #endif
5580 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5581 new = addr;
5582 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5584 /* This symbol may be referenced via a displacement from the PIC
5585 base address (@GOTOFF). */
5587 if (reload_in_progress)
5588 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5589 if (GET_CODE (addr) == CONST)
5590 addr = XEXP (addr, 0);
5591 if (GET_CODE (addr) == PLUS)
5593 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5594 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5596 else
5597 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5598 new = gen_rtx_CONST (Pmode, new);
5599 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5601 if (reg != 0)
5603 emit_move_insn (reg, new);
5604 new = reg;
5607 else if (GET_CODE (addr) == SYMBOL_REF)
5609 if (TARGET_64BIT)
5611 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5612 new = gen_rtx_CONST (Pmode, new);
5613 new = gen_const_mem (Pmode, new);
5614 set_mem_alias_set (new, ix86_GOT_alias_set ());
5616 if (reg == 0)
5617 reg = gen_reg_rtx (Pmode);
5618 /* Use directly gen_movsi, otherwise the address is loaded
5619 into register for CSE. We don't want to CSE this addresses,
5620 instead we CSE addresses from the GOT table, so skip this. */
5621 emit_insn (gen_movsi (reg, new));
5622 new = reg;
5624 else
5626 /* This symbol must be referenced via a load from the
5627 Global Offset Table (@GOT). */
5629 if (reload_in_progress)
5630 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5631 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5632 new = gen_rtx_CONST (Pmode, new);
5633 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5634 new = gen_const_mem (Pmode, new);
5635 set_mem_alias_set (new, ix86_GOT_alias_set ());
5637 if (reg == 0)
5638 reg = gen_reg_rtx (Pmode);
5639 emit_move_insn (reg, new);
5640 new = reg;
5643 else
5645 if (GET_CODE (addr) == CONST)
5647 addr = XEXP (addr, 0);
5649 /* We must match stuff we generate before. Assume the only
5650 unspecs that can get here are ours. Not that we could do
5651 anything with them anyway.... */
5652 if (GET_CODE (addr) == UNSPEC
5653 || (GET_CODE (addr) == PLUS
5654 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5655 return orig;
5656 gcc_assert (GET_CODE (addr) == PLUS);
5658 if (GET_CODE (addr) == PLUS)
5660 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5662 /* Check first to see if this is a constant offset from a @GOTOFF
5663 symbol reference. */
5664 if (local_symbolic_operand (op0, Pmode)
5665 && GET_CODE (op1) == CONST_INT)
5667 if (!TARGET_64BIT)
5669 if (reload_in_progress)
5670 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5671 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5672 UNSPEC_GOTOFF);
5673 new = gen_rtx_PLUS (Pmode, new, op1);
5674 new = gen_rtx_CONST (Pmode, new);
5675 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5677 if (reg != 0)
5679 emit_move_insn (reg, new);
5680 new = reg;
5683 else
5685 if (INTVAL (op1) < -16*1024*1024
5686 || INTVAL (op1) >= 16*1024*1024)
5687 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5690 else
5692 base = legitimize_pic_address (XEXP (addr, 0), reg);
5693 new = legitimize_pic_address (XEXP (addr, 1),
5694 base == reg ? NULL_RTX : reg);
5696 if (GET_CODE (new) == CONST_INT)
5697 new = plus_constant (base, INTVAL (new));
5698 else
5700 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5702 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5703 new = XEXP (new, 1);
5705 new = gen_rtx_PLUS (Pmode, base, new);
5710 return new;
5713 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5715 static rtx
5716 get_thread_pointer (int to_reg)
5718 rtx tp, reg, insn;
5720 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5721 if (!to_reg)
5722 return tp;
5724 reg = gen_reg_rtx (Pmode);
5725 insn = gen_rtx_SET (VOIDmode, reg, tp);
5726 insn = emit_insn (insn);
5728 return reg;
5731 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5732 false if we expect this to be used for a memory address and true if
5733 we expect to load the address into a register. */
5735 static rtx
5736 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5738 rtx dest, base, off, pic;
5739 int type;
5741 switch (model)
5743 case TLS_MODEL_GLOBAL_DYNAMIC:
5744 dest = gen_reg_rtx (Pmode);
5745 if (TARGET_64BIT)
5747 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5749 start_sequence ();
5750 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5751 insns = get_insns ();
5752 end_sequence ();
5754 emit_libcall_block (insns, dest, rax, x);
5756 else
5757 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5758 break;
5760 case TLS_MODEL_LOCAL_DYNAMIC:
5761 base = gen_reg_rtx (Pmode);
5762 if (TARGET_64BIT)
5764 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5766 start_sequence ();
5767 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5768 insns = get_insns ();
5769 end_sequence ();
5771 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5772 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5773 emit_libcall_block (insns, base, rax, note);
5775 else
5776 emit_insn (gen_tls_local_dynamic_base_32 (base));
5778 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5779 off = gen_rtx_CONST (Pmode, off);
5781 return gen_rtx_PLUS (Pmode, base, off);
5783 case TLS_MODEL_INITIAL_EXEC:
5784 if (TARGET_64BIT)
5786 pic = NULL;
5787 type = UNSPEC_GOTNTPOFF;
5789 else if (flag_pic)
5791 if (reload_in_progress)
5792 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5793 pic = pic_offset_table_rtx;
5794 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5796 else if (!TARGET_GNU_TLS)
5798 pic = gen_reg_rtx (Pmode);
5799 emit_insn (gen_set_got (pic));
5800 type = UNSPEC_GOTTPOFF;
5802 else
5804 pic = NULL;
5805 type = UNSPEC_INDNTPOFF;
5808 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5809 off = gen_rtx_CONST (Pmode, off);
5810 if (pic)
5811 off = gen_rtx_PLUS (Pmode, pic, off);
5812 off = gen_const_mem (Pmode, off);
5813 set_mem_alias_set (off, ix86_GOT_alias_set ());
5815 if (TARGET_64BIT || TARGET_GNU_TLS)
5817 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5818 off = force_reg (Pmode, off);
5819 return gen_rtx_PLUS (Pmode, base, off);
5821 else
5823 base = get_thread_pointer (true);
5824 dest = gen_reg_rtx (Pmode);
5825 emit_insn (gen_subsi3 (dest, base, off));
5827 break;
5829 case TLS_MODEL_LOCAL_EXEC:
5830 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5831 (TARGET_64BIT || TARGET_GNU_TLS)
5832 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5833 off = gen_rtx_CONST (Pmode, off);
5835 if (TARGET_64BIT || TARGET_GNU_TLS)
5837 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5838 return gen_rtx_PLUS (Pmode, base, off);
5840 else
5842 base = get_thread_pointer (true);
5843 dest = gen_reg_rtx (Pmode);
5844 emit_insn (gen_subsi3 (dest, base, off));
5846 break;
5848 default:
5849 gcc_unreachable ();
5852 return dest;
5855 /* Try machine-dependent ways of modifying an illegitimate address
5856 to be legitimate. If we find one, return the new, valid address.
5857 This macro is used in only one place: `memory_address' in explow.c.
5859 OLDX is the address as it was before break_out_memory_refs was called.
5860 In some cases it is useful to look at this to decide what needs to be done.
5862 MODE and WIN are passed so that this macro can use
5863 GO_IF_LEGITIMATE_ADDRESS.
5865 It is always safe for this macro to do nothing. It exists to recognize
5866 opportunities to optimize the output.
5868 For the 80386, we handle X+REG by loading X into a register R and
5869 using R+REG. R will go in a general reg and indexing will be used.
5870 However, if REG is a broken-out memory address or multiplication,
5871 nothing needs to be done because REG can certainly go in a general reg.
5873 When -fpic is used, special handling is needed for symbolic references.
5874 See comments by legitimize_pic_address in i386.c for details. */
5877 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5879 int changed = 0;
5880 unsigned log;
5882 if (TARGET_DEBUG_ADDR)
5884 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5885 GET_MODE_NAME (mode));
5886 debug_rtx (x);
5889 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5890 if (log)
5891 return legitimize_tls_address (x, log, false);
5892 if (GET_CODE (x) == CONST
5893 && GET_CODE (XEXP (x, 0)) == PLUS
5894 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5895 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5897 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5898 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5901 if (flag_pic && SYMBOLIC_CONST (x))
5902 return legitimize_pic_address (x, 0);
5904 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5905 if (GET_CODE (x) == ASHIFT
5906 && GET_CODE (XEXP (x, 1)) == CONST_INT
5907 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
5909 changed = 1;
5910 log = INTVAL (XEXP (x, 1));
5911 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5912 GEN_INT (1 << log));
5915 if (GET_CODE (x) == PLUS)
5917 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5919 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5920 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5921 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
5923 changed = 1;
5924 log = INTVAL (XEXP (XEXP (x, 0), 1));
5925 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5926 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5927 GEN_INT (1 << log));
5930 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5931 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5932 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
5934 changed = 1;
5935 log = INTVAL (XEXP (XEXP (x, 1), 1));
5936 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5937 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5938 GEN_INT (1 << log));
5941 /* Put multiply first if it isn't already. */
5942 if (GET_CODE (XEXP (x, 1)) == MULT)
5944 rtx tmp = XEXP (x, 0);
5945 XEXP (x, 0) = XEXP (x, 1);
5946 XEXP (x, 1) = tmp;
5947 changed = 1;
5950 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5951 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5952 created by virtual register instantiation, register elimination, and
5953 similar optimizations. */
5954 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5956 changed = 1;
5957 x = gen_rtx_PLUS (Pmode,
5958 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5959 XEXP (XEXP (x, 1), 0)),
5960 XEXP (XEXP (x, 1), 1));
5963 /* Canonicalize
5964 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5965 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5966 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5968 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5969 && CONSTANT_P (XEXP (x, 1)))
5971 rtx constant;
5972 rtx other = NULL_RTX;
5974 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5976 constant = XEXP (x, 1);
5977 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5979 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5981 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5982 other = XEXP (x, 1);
5984 else
5985 constant = 0;
5987 if (constant)
5989 changed = 1;
5990 x = gen_rtx_PLUS (Pmode,
5991 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5992 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5993 plus_constant (other, INTVAL (constant)));
5997 if (changed && legitimate_address_p (mode, x, FALSE))
5998 return x;
6000 if (GET_CODE (XEXP (x, 0)) == MULT)
6002 changed = 1;
6003 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6006 if (GET_CODE (XEXP (x, 1)) == MULT)
6008 changed = 1;
6009 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6012 if (changed
6013 && GET_CODE (XEXP (x, 1)) == REG
6014 && GET_CODE (XEXP (x, 0)) == REG)
6015 return x;
6017 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6019 changed = 1;
6020 x = legitimize_pic_address (x, 0);
6023 if (changed && legitimate_address_p (mode, x, FALSE))
6024 return x;
6026 if (GET_CODE (XEXP (x, 0)) == REG)
6028 rtx temp = gen_reg_rtx (Pmode);
6029 rtx val = force_operand (XEXP (x, 1), temp);
6030 if (val != temp)
6031 emit_move_insn (temp, val);
6033 XEXP (x, 1) = temp;
6034 return x;
6037 else if (GET_CODE (XEXP (x, 1)) == REG)
6039 rtx temp = gen_reg_rtx (Pmode);
6040 rtx val = force_operand (XEXP (x, 0), temp);
6041 if (val != temp)
6042 emit_move_insn (temp, val);
6044 XEXP (x, 0) = temp;
6045 return x;
6049 return x;
6052 /* Print an integer constant expression in assembler syntax. Addition
6053 and subtraction are the only arithmetic that may appear in these
6054 expressions. FILE is the stdio stream to write to, X is the rtx, and
6055 CODE is the operand print code from the output string. */
6057 static void
6058 output_pic_addr_const (FILE *file, rtx x, int code)
6060 char buf[256];
6062 switch (GET_CODE (x))
6064 case PC:
6065 gcc_assert (flag_pic);
6066 putc ('.', file);
6067 break;
6069 case SYMBOL_REF:
6070 assemble_name (file, XSTR (x, 0));
6071 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6072 fputs ("@PLT", file);
6073 break;
6075 case LABEL_REF:
6076 x = XEXP (x, 0);
6077 /* FALLTHRU */
6078 case CODE_LABEL:
6079 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6080 assemble_name (asm_out_file, buf);
6081 break;
6083 case CONST_INT:
6084 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6085 break;
6087 case CONST:
6088 /* This used to output parentheses around the expression,
6089 but that does not work on the 386 (either ATT or BSD assembler). */
6090 output_pic_addr_const (file, XEXP (x, 0), code);
6091 break;
6093 case CONST_DOUBLE:
6094 if (GET_MODE (x) == VOIDmode)
6096 /* We can use %d if the number is <32 bits and positive. */
6097 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6098 fprintf (file, "0x%lx%08lx",
6099 (unsigned long) CONST_DOUBLE_HIGH (x),
6100 (unsigned long) CONST_DOUBLE_LOW (x));
6101 else
6102 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6104 else
6105 /* We can't handle floating point constants;
6106 PRINT_OPERAND must handle them. */
6107 output_operand_lossage ("floating constant misused");
6108 break;
6110 case PLUS:
6111 /* Some assemblers need integer constants to appear first. */
6112 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6114 output_pic_addr_const (file, XEXP (x, 0), code);
6115 putc ('+', file);
6116 output_pic_addr_const (file, XEXP (x, 1), code);
6118 else
6120 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6121 output_pic_addr_const (file, XEXP (x, 1), code);
6122 putc ('+', file);
6123 output_pic_addr_const (file, XEXP (x, 0), code);
6125 break;
6127 case MINUS:
6128 if (!TARGET_MACHO)
6129 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6130 output_pic_addr_const (file, XEXP (x, 0), code);
6131 putc ('-', file);
6132 output_pic_addr_const (file, XEXP (x, 1), code);
6133 if (!TARGET_MACHO)
6134 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6135 break;
6137 case UNSPEC:
6138 gcc_assert (XVECLEN (x, 0) == 1);
6139 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6140 switch (XINT (x, 1))
6142 case UNSPEC_GOT:
6143 fputs ("@GOT", file);
6144 break;
6145 case UNSPEC_GOTOFF:
6146 fputs ("@GOTOFF", file);
6147 break;
6148 case UNSPEC_GOTPCREL:
6149 fputs ("@GOTPCREL(%rip)", file);
6150 break;
6151 case UNSPEC_GOTTPOFF:
6152 /* FIXME: This might be @TPOFF in Sun ld too. */
6153 fputs ("@GOTTPOFF", file);
6154 break;
6155 case UNSPEC_TPOFF:
6156 fputs ("@TPOFF", file);
6157 break;
6158 case UNSPEC_NTPOFF:
6159 if (TARGET_64BIT)
6160 fputs ("@TPOFF", file);
6161 else
6162 fputs ("@NTPOFF", file);
6163 break;
6164 case UNSPEC_DTPOFF:
6165 fputs ("@DTPOFF", file);
6166 break;
6167 case UNSPEC_GOTNTPOFF:
6168 if (TARGET_64BIT)
6169 fputs ("@GOTTPOFF(%rip)", file);
6170 else
6171 fputs ("@GOTNTPOFF", file);
6172 break;
6173 case UNSPEC_INDNTPOFF:
6174 fputs ("@INDNTPOFF", file);
6175 break;
6176 default:
6177 output_operand_lossage ("invalid UNSPEC as operand");
6178 break;
6180 break;
6182 default:
6183 output_operand_lossage ("invalid expression as operand");
6187 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6188 We need to emit DTP-relative relocations. */
6190 void
6191 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6193 fputs (ASM_LONG, file);
6194 output_addr_const (file, x);
6195 fputs ("@DTPOFF", file);
6196 switch (size)
6198 case 4:
6199 break;
6200 case 8:
6201 fputs (", 0", file);
6202 break;
6203 default:
6204 gcc_unreachable ();
6208 /* In the name of slightly smaller debug output, and to cater to
6209 general assembler lossage, recognize PIC+GOTOFF and turn it back
6210 into a direct symbol reference. */
6212 static rtx
6213 ix86_delegitimize_address (rtx orig_x)
6215 rtx x = orig_x, y;
6217 if (GET_CODE (x) == MEM)
6218 x = XEXP (x, 0);
6220 if (TARGET_64BIT)
6222 if (GET_CODE (x) != CONST
6223 || GET_CODE (XEXP (x, 0)) != UNSPEC
6224 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6225 || GET_CODE (orig_x) != MEM)
6226 return orig_x;
6227 return XVECEXP (XEXP (x, 0), 0, 0);
6230 if (GET_CODE (x) != PLUS
6231 || GET_CODE (XEXP (x, 1)) != CONST)
6232 return orig_x;
6234 if (GET_CODE (XEXP (x, 0)) == REG
6235 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6236 /* %ebx + GOT/GOTOFF */
6237 y = NULL;
6238 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6240 /* %ebx + %reg * scale + GOT/GOTOFF */
6241 y = XEXP (x, 0);
6242 if (GET_CODE (XEXP (y, 0)) == REG
6243 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6244 y = XEXP (y, 1);
6245 else if (GET_CODE (XEXP (y, 1)) == REG
6246 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6247 y = XEXP (y, 0);
6248 else
6249 return orig_x;
6250 if (GET_CODE (y) != REG
6251 && GET_CODE (y) != MULT
6252 && GET_CODE (y) != ASHIFT)
6253 return orig_x;
6255 else
6256 return orig_x;
6258 x = XEXP (XEXP (x, 1), 0);
6259 if (GET_CODE (x) == UNSPEC
6260 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6261 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6263 if (y)
6264 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6265 return XVECEXP (x, 0, 0);
6268 if (GET_CODE (x) == PLUS
6269 && GET_CODE (XEXP (x, 0)) == UNSPEC
6270 && GET_CODE (XEXP (x, 1)) == CONST_INT
6271 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6272 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6273 && GET_CODE (orig_x) != MEM)))
6275 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6276 if (y)
6277 return gen_rtx_PLUS (Pmode, y, x);
6278 return x;
6281 return orig_x;
6284 static void
6285 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6286 int fp, FILE *file)
6288 const char *suffix;
6290 if (mode == CCFPmode || mode == CCFPUmode)
6292 enum rtx_code second_code, bypass_code;
6293 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6294 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6295 code = ix86_fp_compare_code_to_integer (code);
6296 mode = CCmode;
6298 if (reverse)
6299 code = reverse_condition (code);
6301 switch (code)
6303 case EQ:
6304 suffix = "e";
6305 break;
6306 case NE:
6307 suffix = "ne";
6308 break;
6309 case GT:
6310 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6311 suffix = "g";
6312 break;
6313 case GTU:
6314 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6315 Those same assemblers have the same but opposite lossage on cmov. */
6316 gcc_assert (mode == CCmode);
6317 suffix = fp ? "nbe" : "a";
6318 break;
6319 case LT:
6320 switch (mode)
6322 case CCNOmode:
6323 case CCGOCmode:
6324 suffix = "s";
6325 break;
6327 case CCmode:
6328 case CCGCmode:
6329 suffix = "l";
6330 break;
6332 default:
6333 gcc_unreachable ();
6335 break;
6336 case LTU:
6337 gcc_assert (mode == CCmode);
6338 suffix = "b";
6339 break;
6340 case GE:
6341 switch (mode)
6343 case CCNOmode:
6344 case CCGOCmode:
6345 suffix = "ns";
6346 break;
6348 case CCmode:
6349 case CCGCmode:
6350 suffix = "ge";
6351 break;
6353 default:
6354 gcc_unreachable ();
6356 break;
6357 case GEU:
6358 /* ??? As above. */
6359 gcc_assert (mode == CCmode);
6360 suffix = fp ? "nb" : "ae";
6361 break;
6362 case LE:
6363 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6364 suffix = "le";
6365 break;
6366 case LEU:
6367 gcc_assert (mode == CCmode);
6368 suffix = "be";
6369 break;
6370 case UNORDERED:
6371 suffix = fp ? "u" : "p";
6372 break;
6373 case ORDERED:
6374 suffix = fp ? "nu" : "np";
6375 break;
6376 default:
6377 gcc_unreachable ();
6379 fputs (suffix, file);
6382 /* Print the name of register X to FILE based on its machine mode and number.
6383 If CODE is 'w', pretend the mode is HImode.
6384 If CODE is 'b', pretend the mode is QImode.
6385 If CODE is 'k', pretend the mode is SImode.
6386 If CODE is 'q', pretend the mode is DImode.
6387 If CODE is 'h', pretend the reg is the 'high' byte register.
6388 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6390 void
6391 print_reg (rtx x, int code, FILE *file)
6393 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6394 && REGNO (x) != FRAME_POINTER_REGNUM
6395 && REGNO (x) != FLAGS_REG
6396 && REGNO (x) != FPSR_REG);
6398 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6399 putc ('%', file);
6401 if (code == 'w' || MMX_REG_P (x))
6402 code = 2;
6403 else if (code == 'b')
6404 code = 1;
6405 else if (code == 'k')
6406 code = 4;
6407 else if (code == 'q')
6408 code = 8;
6409 else if (code == 'y')
6410 code = 3;
6411 else if (code == 'h')
6412 code = 0;
6413 else
6414 code = GET_MODE_SIZE (GET_MODE (x));
6416 /* Irritatingly, AMD extended registers use different naming convention
6417 from the normal registers. */
6418 if (REX_INT_REG_P (x))
6420 gcc_assert (TARGET_64BIT);
6421 switch (code)
6423 case 0:
6424 error ("extended registers have no high halves");
6425 break;
6426 case 1:
6427 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6428 break;
6429 case 2:
6430 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6431 break;
6432 case 4:
6433 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6434 break;
6435 case 8:
6436 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6437 break;
6438 default:
6439 error ("unsupported operand size for extended register");
6440 break;
6442 return;
6444 switch (code)
6446 case 3:
6447 if (STACK_TOP_P (x))
6449 fputs ("st(0)", file);
6450 break;
6452 /* FALLTHRU */
6453 case 8:
6454 case 4:
6455 case 12:
6456 if (! ANY_FP_REG_P (x))
6457 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6458 /* FALLTHRU */
6459 case 16:
6460 case 2:
6461 normal:
6462 fputs (hi_reg_name[REGNO (x)], file);
6463 break;
6464 case 1:
6465 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6466 goto normal;
6467 fputs (qi_reg_name[REGNO (x)], file);
6468 break;
6469 case 0:
6470 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6471 goto normal;
6472 fputs (qi_high_reg_name[REGNO (x)], file);
6473 break;
6474 default:
6475 gcc_unreachable ();
6479 /* Locate some local-dynamic symbol still in use by this function
6480 so that we can print its name in some tls_local_dynamic_base
6481 pattern. */
6483 static const char *
6484 get_some_local_dynamic_name (void)
6486 rtx insn;
6488 if (cfun->machine->some_ld_name)
6489 return cfun->machine->some_ld_name;
6491 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6492 if (INSN_P (insn)
6493 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6494 return cfun->machine->some_ld_name;
6496 gcc_unreachable ();
6499 static int
6500 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6502 rtx x = *px;
6504 if (GET_CODE (x) == SYMBOL_REF
6505 && local_dynamic_symbolic_operand (x, Pmode))
6507 cfun->machine->some_ld_name = XSTR (x, 0);
6508 return 1;
6511 return 0;
6514 /* Meaning of CODE:
6515 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6516 C -- print opcode suffix for set/cmov insn.
6517 c -- like C, but print reversed condition
6518 F,f -- likewise, but for floating-point.
6519 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6520 otherwise nothing
6521 R -- print the prefix for register names.
6522 z -- print the opcode suffix for the size of the current operand.
6523 * -- print a star (in certain assembler syntax)
6524 A -- print an absolute memory reference.
6525 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6526 s -- print a shift double count, followed by the assemblers argument
6527 delimiter.
6528 b -- print the QImode name of the register for the indicated operand.
6529 %b0 would print %al if operands[0] is reg 0.
6530 w -- likewise, print the HImode name of the register.
6531 k -- likewise, print the SImode name of the register.
6532 q -- likewise, print the DImode name of the register.
6533 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6534 y -- print "st(0)" instead of "st" as a register.
6535 D -- print condition for SSE cmp instruction.
6536 P -- if PIC, print an @PLT suffix.
6537 X -- don't print any sort of PIC '@' suffix for a symbol.
6538 & -- print some in-use local-dynamic symbol name.
6539 H -- print a memory address offset by 8; used for sse high-parts
6542 void
6543 print_operand (FILE *file, rtx x, int code)
6545 if (code)
6547 switch (code)
6549 case '*':
6550 if (ASSEMBLER_DIALECT == ASM_ATT)
6551 putc ('*', file);
6552 return;
6554 case '&':
6555 assemble_name (file, get_some_local_dynamic_name ());
6556 return;
6558 case 'A':
6559 switch (ASSEMBLER_DIALECT)
6561 case ASM_ATT:
6562 putc ('*', file);
6563 break;
6565 case ASM_INTEL:
6566 /* Intel syntax. For absolute addresses, registers should not
6567 be surrounded by braces. */
6568 if (GET_CODE (x) != REG)
6570 putc ('[', file);
6571 PRINT_OPERAND (file, x, 0);
6572 putc (']', file);
6573 return;
6575 break;
6577 default:
6578 gcc_unreachable ();
6581 PRINT_OPERAND (file, x, 0);
6582 return;
6585 case 'L':
6586 if (ASSEMBLER_DIALECT == ASM_ATT)
6587 putc ('l', file);
6588 return;
6590 case 'W':
6591 if (ASSEMBLER_DIALECT == ASM_ATT)
6592 putc ('w', file);
6593 return;
6595 case 'B':
6596 if (ASSEMBLER_DIALECT == ASM_ATT)
6597 putc ('b', file);
6598 return;
6600 case 'Q':
6601 if (ASSEMBLER_DIALECT == ASM_ATT)
6602 putc ('l', file);
6603 return;
6605 case 'S':
6606 if (ASSEMBLER_DIALECT == ASM_ATT)
6607 putc ('s', file);
6608 return;
6610 case 'T':
6611 if (ASSEMBLER_DIALECT == ASM_ATT)
6612 putc ('t', file);
6613 return;
6615 case 'z':
6616 /* 387 opcodes don't get size suffixes if the operands are
6617 registers. */
6618 if (STACK_REG_P (x))
6619 return;
6621 /* Likewise if using Intel opcodes. */
6622 if (ASSEMBLER_DIALECT == ASM_INTEL)
6623 return;
6625 /* This is the size of op from size of operand. */
6626 switch (GET_MODE_SIZE (GET_MODE (x)))
6628 case 2:
6629 #ifdef HAVE_GAS_FILDS_FISTS
6630 putc ('s', file);
6631 #endif
6632 return;
6634 case 4:
6635 if (GET_MODE (x) == SFmode)
6637 putc ('s', file);
6638 return;
6640 else
6641 putc ('l', file);
6642 return;
6644 case 12:
6645 case 16:
6646 putc ('t', file);
6647 return;
6649 case 8:
6650 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6652 #ifdef GAS_MNEMONICS
6653 putc ('q', file);
6654 #else
6655 putc ('l', file);
6656 putc ('l', file);
6657 #endif
6659 else
6660 putc ('l', file);
6661 return;
6663 default:
6664 gcc_unreachable ();
6667 case 'b':
6668 case 'w':
6669 case 'k':
6670 case 'q':
6671 case 'h':
6672 case 'y':
6673 case 'X':
6674 case 'P':
6675 break;
6677 case 's':
6678 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6680 PRINT_OPERAND (file, x, 0);
6681 putc (',', file);
6683 return;
6685 case 'D':
6686 /* Little bit of braindamage here. The SSE compare instructions
6687 does use completely different names for the comparisons that the
6688 fp conditional moves. */
6689 switch (GET_CODE (x))
6691 case EQ:
6692 case UNEQ:
6693 fputs ("eq", file);
6694 break;
6695 case LT:
6696 case UNLT:
6697 fputs ("lt", file);
6698 break;
6699 case LE:
6700 case UNLE:
6701 fputs ("le", file);
6702 break;
6703 case UNORDERED:
6704 fputs ("unord", file);
6705 break;
6706 case NE:
6707 case LTGT:
6708 fputs ("neq", file);
6709 break;
6710 case UNGE:
6711 case GE:
6712 fputs ("nlt", file);
6713 break;
6714 case UNGT:
6715 case GT:
6716 fputs ("nle", file);
6717 break;
6718 case ORDERED:
6719 fputs ("ord", file);
6720 break;
6721 default:
6722 gcc_unreachable ();
6724 return;
6725 case 'O':
6726 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6727 if (ASSEMBLER_DIALECT == ASM_ATT)
6729 switch (GET_MODE (x))
6731 case HImode: putc ('w', file); break;
6732 case SImode:
6733 case SFmode: putc ('l', file); break;
6734 case DImode:
6735 case DFmode: putc ('q', file); break;
6736 default: gcc_unreachable ();
6738 putc ('.', file);
6740 #endif
6741 return;
6742 case 'C':
6743 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6744 return;
6745 case 'F':
6746 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6747 if (ASSEMBLER_DIALECT == ASM_ATT)
6748 putc ('.', file);
6749 #endif
6750 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6751 return;
6753 /* Like above, but reverse condition */
6754 case 'c':
6755 /* Check to see if argument to %c is really a constant
6756 and not a condition code which needs to be reversed. */
6757 if (!COMPARISON_P (x))
6759 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6760 return;
6762 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6763 return;
6764 case 'f':
6765 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6766 if (ASSEMBLER_DIALECT == ASM_ATT)
6767 putc ('.', file);
6768 #endif
6769 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6770 return;
6772 case 'H':
6773 /* It doesn't actually matter what mode we use here, as we're
6774 only going to use this for printing. */
6775 x = adjust_address_nv (x, DImode, 8);
6776 break;
6778 case '+':
6780 rtx x;
6782 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6783 return;
6785 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6786 if (x)
6788 int pred_val = INTVAL (XEXP (x, 0));
6790 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6791 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6793 int taken = pred_val > REG_BR_PROB_BASE / 2;
6794 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6796 /* Emit hints only in the case default branch prediction
6797 heuristics would fail. */
6798 if (taken != cputaken)
6800 /* We use 3e (DS) prefix for taken branches and
6801 2e (CS) prefix for not taken branches. */
6802 if (taken)
6803 fputs ("ds ; ", file);
6804 else
6805 fputs ("cs ; ", file);
6809 return;
6811 default:
6812 output_operand_lossage ("invalid operand code '%c'", code);
6816 if (GET_CODE (x) == REG)
6817 print_reg (x, code, file);
6819 else if (GET_CODE (x) == MEM)
6821 /* No `byte ptr' prefix for call instructions. */
6822 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6824 const char * size;
6825 switch (GET_MODE_SIZE (GET_MODE (x)))
6827 case 1: size = "BYTE"; break;
6828 case 2: size = "WORD"; break;
6829 case 4: size = "DWORD"; break;
6830 case 8: size = "QWORD"; break;
6831 case 12: size = "XWORD"; break;
6832 case 16: size = "XMMWORD"; break;
6833 default:
6834 gcc_unreachable ();
6837 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6838 if (code == 'b')
6839 size = "BYTE";
6840 else if (code == 'w')
6841 size = "WORD";
6842 else if (code == 'k')
6843 size = "DWORD";
6845 fputs (size, file);
6846 fputs (" PTR ", file);
6849 x = XEXP (x, 0);
6850 /* Avoid (%rip) for call operands. */
6851 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6852 && GET_CODE (x) != CONST_INT)
6853 output_addr_const (file, x);
6854 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6855 output_operand_lossage ("invalid constraints for operand");
6856 else
6857 output_address (x);
6860 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6862 REAL_VALUE_TYPE r;
6863 long l;
6865 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6866 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6868 if (ASSEMBLER_DIALECT == ASM_ATT)
6869 putc ('$', file);
6870 fprintf (file, "0x%08lx", l);
6873 /* These float cases don't actually occur as immediate operands. */
6874 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6876 char dstr[30];
6878 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6879 fprintf (file, "%s", dstr);
6882 else if (GET_CODE (x) == CONST_DOUBLE
6883 && GET_MODE (x) == XFmode)
6885 char dstr[30];
6887 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6888 fprintf (file, "%s", dstr);
6891 else
6893 /* We have patterns that allow zero sets of memory, for instance.
6894 In 64-bit mode, we should probably support all 8-byte vectors,
6895 since we can in fact encode that into an immediate. */
6896 if (GET_CODE (x) == CONST_VECTOR)
6898 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
6899 x = const0_rtx;
6902 if (code != 'P')
6904 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6906 if (ASSEMBLER_DIALECT == ASM_ATT)
6907 putc ('$', file);
6909 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6910 || GET_CODE (x) == LABEL_REF)
6912 if (ASSEMBLER_DIALECT == ASM_ATT)
6913 putc ('$', file);
6914 else
6915 fputs ("OFFSET FLAT:", file);
6918 if (GET_CODE (x) == CONST_INT)
6919 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6920 else if (flag_pic)
6921 output_pic_addr_const (file, x, code);
6922 else
6923 output_addr_const (file, x);
6927 /* Print a memory operand whose address is ADDR. */
6929 void
6930 print_operand_address (FILE *file, rtx addr)
6932 struct ix86_address parts;
6933 rtx base, index, disp;
6934 int scale;
6935 int ok = ix86_decompose_address (addr, &parts);
6937 gcc_assert (ok);
6939 base = parts.base;
6940 index = parts.index;
6941 disp = parts.disp;
6942 scale = parts.scale;
6944 switch (parts.seg)
6946 case SEG_DEFAULT:
6947 break;
6948 case SEG_FS:
6949 case SEG_GS:
6950 if (USER_LABEL_PREFIX[0] == 0)
6951 putc ('%', file);
6952 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6953 break;
6954 default:
6955 gcc_unreachable ();
6958 if (!base && !index)
6960 /* Displacement only requires special attention. */
6962 if (GET_CODE (disp) == CONST_INT)
6964 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6966 if (USER_LABEL_PREFIX[0] == 0)
6967 putc ('%', file);
6968 fputs ("ds:", file);
6970 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6972 else if (flag_pic)
6973 output_pic_addr_const (file, disp, 0);
6974 else
6975 output_addr_const (file, disp);
6977 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6978 if (TARGET_64BIT
6979 && ((GET_CODE (disp) == SYMBOL_REF
6980 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6981 || GET_CODE (disp) == LABEL_REF
6982 || (GET_CODE (disp) == CONST
6983 && GET_CODE (XEXP (disp, 0)) == PLUS
6984 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6985 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6986 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6987 fputs ("(%rip)", file);
6989 else
6991 if (ASSEMBLER_DIALECT == ASM_ATT)
6993 if (disp)
6995 if (flag_pic)
6996 output_pic_addr_const (file, disp, 0);
6997 else if (GET_CODE (disp) == LABEL_REF)
6998 output_asm_label (disp);
6999 else
7000 output_addr_const (file, disp);
7003 putc ('(', file);
7004 if (base)
7005 print_reg (base, 0, file);
7006 if (index)
7008 putc (',', file);
7009 print_reg (index, 0, file);
7010 if (scale != 1)
7011 fprintf (file, ",%d", scale);
7013 putc (')', file);
7015 else
7017 rtx offset = NULL_RTX;
7019 if (disp)
7021 /* Pull out the offset of a symbol; print any symbol itself. */
7022 if (GET_CODE (disp) == CONST
7023 && GET_CODE (XEXP (disp, 0)) == PLUS
7024 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7026 offset = XEXP (XEXP (disp, 0), 1);
7027 disp = gen_rtx_CONST (VOIDmode,
7028 XEXP (XEXP (disp, 0), 0));
7031 if (flag_pic)
7032 output_pic_addr_const (file, disp, 0);
7033 else if (GET_CODE (disp) == LABEL_REF)
7034 output_asm_label (disp);
7035 else if (GET_CODE (disp) == CONST_INT)
7036 offset = disp;
7037 else
7038 output_addr_const (file, disp);
7041 putc ('[', file);
7042 if (base)
7044 print_reg (base, 0, file);
7045 if (offset)
7047 if (INTVAL (offset) >= 0)
7048 putc ('+', file);
7049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7052 else if (offset)
7053 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7054 else
7055 putc ('0', file);
7057 if (index)
7059 putc ('+', file);
7060 print_reg (index, 0, file);
7061 if (scale != 1)
7062 fprintf (file, "*%d", scale);
7064 putc (']', file);
7069 bool
7070 output_addr_const_extra (FILE *file, rtx x)
7072 rtx op;
7074 if (GET_CODE (x) != UNSPEC)
7075 return false;
7077 op = XVECEXP (x, 0, 0);
7078 switch (XINT (x, 1))
7080 case UNSPEC_GOTTPOFF:
7081 output_addr_const (file, op);
7082 /* FIXME: This might be @TPOFF in Sun ld. */
7083 fputs ("@GOTTPOFF", file);
7084 break;
7085 case UNSPEC_TPOFF:
7086 output_addr_const (file, op);
7087 fputs ("@TPOFF", file);
7088 break;
7089 case UNSPEC_NTPOFF:
7090 output_addr_const (file, op);
7091 if (TARGET_64BIT)
7092 fputs ("@TPOFF", file);
7093 else
7094 fputs ("@NTPOFF", file);
7095 break;
7096 case UNSPEC_DTPOFF:
7097 output_addr_const (file, op);
7098 fputs ("@DTPOFF", file);
7099 break;
7100 case UNSPEC_GOTNTPOFF:
7101 output_addr_const (file, op);
7102 if (TARGET_64BIT)
7103 fputs ("@GOTTPOFF(%rip)", file);
7104 else
7105 fputs ("@GOTNTPOFF", file);
7106 break;
7107 case UNSPEC_INDNTPOFF:
7108 output_addr_const (file, op);
7109 fputs ("@INDNTPOFF", file);
7110 break;
7112 default:
7113 return false;
7116 return true;
7119 /* Split one or more DImode RTL references into pairs of SImode
7120 references. The RTL can be REG, offsettable MEM, integer constant, or
7121 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7122 split and "num" is its length. lo_half and hi_half are output arrays
7123 that parallel "operands". */
7125 void
7126 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7128 while (num--)
7130 rtx op = operands[num];
7132 /* simplify_subreg refuse to split volatile memory addresses,
7133 but we still have to handle it. */
7134 if (GET_CODE (op) == MEM)
7136 lo_half[num] = adjust_address (op, SImode, 0);
7137 hi_half[num] = adjust_address (op, SImode, 4);
7139 else
7141 lo_half[num] = simplify_gen_subreg (SImode, op,
7142 GET_MODE (op) == VOIDmode
7143 ? DImode : GET_MODE (op), 0);
7144 hi_half[num] = simplify_gen_subreg (SImode, op,
7145 GET_MODE (op) == VOIDmode
7146 ? DImode : GET_MODE (op), 4);
7150 /* Split one or more TImode RTL references into pairs of SImode
7151 references. The RTL can be REG, offsettable MEM, integer constant, or
7152 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7153 split and "num" is its length. lo_half and hi_half are output arrays
7154 that parallel "operands". */
7156 void
7157 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7159 while (num--)
7161 rtx op = operands[num];
7163 /* simplify_subreg refuse to split volatile memory addresses, but we
7164 still have to handle it. */
7165 if (GET_CODE (op) == MEM)
7167 lo_half[num] = adjust_address (op, DImode, 0);
7168 hi_half[num] = adjust_address (op, DImode, 8);
7170 else
7172 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7173 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7178 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7179 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7180 is the expression of the binary operation. The output may either be
7181 emitted here, or returned to the caller, like all output_* functions.
7183 There is no guarantee that the operands are the same mode, as they
7184 might be within FLOAT or FLOAT_EXTEND expressions. */
7186 #ifndef SYSV386_COMPAT
7187 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7188 wants to fix the assemblers because that causes incompatibility
7189 with gcc. No-one wants to fix gcc because that causes
7190 incompatibility with assemblers... You can use the option of
7191 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7192 #define SYSV386_COMPAT 1
7193 #endif
7195 const char *
7196 output_387_binary_op (rtx insn, rtx *operands)
7198 static char buf[30];
7199 const char *p;
7200 const char *ssep;
7201 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7203 #ifdef ENABLE_CHECKING
7204 /* Even if we do not want to check the inputs, this documents input
7205 constraints. Which helps in understanding the following code. */
7206 if (STACK_REG_P (operands[0])
7207 && ((REG_P (operands[1])
7208 && REGNO (operands[0]) == REGNO (operands[1])
7209 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7210 || (REG_P (operands[2])
7211 && REGNO (operands[0]) == REGNO (operands[2])
7212 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7213 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7214 ; /* ok */
7215 else
7216 gcc_assert (is_sse);
7217 #endif
7219 switch (GET_CODE (operands[3]))
7221 case PLUS:
7222 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7223 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7224 p = "fiadd";
7225 else
7226 p = "fadd";
7227 ssep = "add";
7228 break;
7230 case MINUS:
7231 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7232 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7233 p = "fisub";
7234 else
7235 p = "fsub";
7236 ssep = "sub";
7237 break;
7239 case MULT:
7240 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7241 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7242 p = "fimul";
7243 else
7244 p = "fmul";
7245 ssep = "mul";
7246 break;
7248 case DIV:
7249 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7250 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7251 p = "fidiv";
7252 else
7253 p = "fdiv";
7254 ssep = "div";
7255 break;
7257 default:
7258 gcc_unreachable ();
7261 if (is_sse)
7263 strcpy (buf, ssep);
7264 if (GET_MODE (operands[0]) == SFmode)
7265 strcat (buf, "ss\t{%2, %0|%0, %2}");
7266 else
7267 strcat (buf, "sd\t{%2, %0|%0, %2}");
7268 return buf;
7270 strcpy (buf, p);
7272 switch (GET_CODE (operands[3]))
7274 case MULT:
7275 case PLUS:
7276 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7278 rtx temp = operands[2];
7279 operands[2] = operands[1];
7280 operands[1] = temp;
7283 /* know operands[0] == operands[1]. */
7285 if (GET_CODE (operands[2]) == MEM)
7287 p = "%z2\t%2";
7288 break;
7291 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7293 if (STACK_TOP_P (operands[0]))
7294 /* How is it that we are storing to a dead operand[2]?
7295 Well, presumably operands[1] is dead too. We can't
7296 store the result to st(0) as st(0) gets popped on this
7297 instruction. Instead store to operands[2] (which I
7298 think has to be st(1)). st(1) will be popped later.
7299 gcc <= 2.8.1 didn't have this check and generated
7300 assembly code that the Unixware assembler rejected. */
7301 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7302 else
7303 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7304 break;
7307 if (STACK_TOP_P (operands[0]))
7308 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7309 else
7310 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7311 break;
7313 case MINUS:
7314 case DIV:
7315 if (GET_CODE (operands[1]) == MEM)
7317 p = "r%z1\t%1";
7318 break;
7321 if (GET_CODE (operands[2]) == MEM)
7323 p = "%z2\t%2";
7324 break;
7327 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7329 #if SYSV386_COMPAT
7330 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7331 derived assemblers, confusingly reverse the direction of
7332 the operation for fsub{r} and fdiv{r} when the
7333 destination register is not st(0). The Intel assembler
7334 doesn't have this brain damage. Read !SYSV386_COMPAT to
7335 figure out what the hardware really does. */
7336 if (STACK_TOP_P (operands[0]))
7337 p = "{p\t%0, %2|rp\t%2, %0}";
7338 else
7339 p = "{rp\t%2, %0|p\t%0, %2}";
7340 #else
7341 if (STACK_TOP_P (operands[0]))
7342 /* As above for fmul/fadd, we can't store to st(0). */
7343 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7344 else
7345 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7346 #endif
7347 break;
7350 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7352 #if SYSV386_COMPAT
7353 if (STACK_TOP_P (operands[0]))
7354 p = "{rp\t%0, %1|p\t%1, %0}";
7355 else
7356 p = "{p\t%1, %0|rp\t%0, %1}";
7357 #else
7358 if (STACK_TOP_P (operands[0]))
7359 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7360 else
7361 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7362 #endif
7363 break;
7366 if (STACK_TOP_P (operands[0]))
7368 if (STACK_TOP_P (operands[1]))
7369 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7370 else
7371 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7372 break;
7374 else if (STACK_TOP_P (operands[1]))
7376 #if SYSV386_COMPAT
7377 p = "{\t%1, %0|r\t%0, %1}";
7378 #else
7379 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7380 #endif
7382 else
7384 #if SYSV386_COMPAT
7385 p = "{r\t%2, %0|\t%0, %2}";
7386 #else
7387 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7388 #endif
7390 break;
7392 default:
7393 gcc_unreachable ();
7396 strcat (buf, p);
7397 return buf;
7400 /* Output code to initialize control word copies used by trunc?f?i and
7401 rounding patterns. CURRENT_MODE is set to current control word,
7402 while NEW_MODE is set to new control word. */
7404 void
7405 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7407 rtx reg = gen_reg_rtx (HImode);
7409 emit_insn (gen_x86_fnstcw_1 (current_mode));
7410 emit_move_insn (reg, current_mode);
7412 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7413 && !TARGET_64BIT)
7415 switch (mode)
7417 case I387_CW_FLOOR:
7418 /* round down toward -oo */
7419 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7420 break;
7422 case I387_CW_CEIL:
7423 /* round up toward +oo */
7424 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7425 break;
7427 case I387_CW_TRUNC:
7428 /* round toward zero (truncate) */
7429 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7430 break;
7432 case I387_CW_MASK_PM:
7433 /* mask precision exception for nearbyint() */
7434 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7435 break;
7437 default:
7438 gcc_unreachable ();
7441 else
7443 switch (mode)
7445 case I387_CW_FLOOR:
7446 /* round down toward -oo */
7447 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7448 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7449 break;
7451 case I387_CW_CEIL:
7452 /* round up toward +oo */
7453 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7454 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7455 break;
7457 case I387_CW_TRUNC:
7458 /* round toward zero (truncate) */
7459 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7460 break;
7462 case I387_CW_MASK_PM:
7463 /* mask precision exception for nearbyint() */
7464 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7465 break;
7467 default:
7468 gcc_unreachable ();
7472 emit_move_insn (new_mode, reg);
7475 /* Output code for INSN to convert a float to a signed int. OPERANDS
7476 are the insn operands. The output may be [HSD]Imode and the input
7477 operand may be [SDX]Fmode. */
7479 const char *
7480 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7482 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7483 int dimode_p = GET_MODE (operands[0]) == DImode;
7484 int round_mode = get_attr_i387_cw (insn);
7486 /* Jump through a hoop or two for DImode, since the hardware has no
7487 non-popping instruction. We used to do this a different way, but
7488 that was somewhat fragile and broke with post-reload splitters. */
7489 if ((dimode_p || fisttp) && !stack_top_dies)
7490 output_asm_insn ("fld\t%y1", operands);
7492 gcc_assert (STACK_TOP_P (operands[1]));
7493 gcc_assert (GET_CODE (operands[0]) == MEM);
7495 if (fisttp)
7496 output_asm_insn ("fisttp%z0\t%0", operands);
7497 else
7499 if (round_mode != I387_CW_ANY)
7500 output_asm_insn ("fldcw\t%3", operands);
7501 if (stack_top_dies || dimode_p)
7502 output_asm_insn ("fistp%z0\t%0", operands);
7503 else
7504 output_asm_insn ("fist%z0\t%0", operands);
7505 if (round_mode != I387_CW_ANY)
7506 output_asm_insn ("fldcw\t%2", operands);
7509 return "";
7512 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7513 should be used. UNORDERED_P is true when fucom should be used. */
7515 const char *
7516 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7518 int stack_top_dies;
7519 rtx cmp_op0, cmp_op1;
7520 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7522 if (eflags_p)
7524 cmp_op0 = operands[0];
7525 cmp_op1 = operands[1];
7527 else
7529 cmp_op0 = operands[1];
7530 cmp_op1 = operands[2];
7533 if (is_sse)
7535 if (GET_MODE (operands[0]) == SFmode)
7536 if (unordered_p)
7537 return "ucomiss\t{%1, %0|%0, %1}";
7538 else
7539 return "comiss\t{%1, %0|%0, %1}";
7540 else
7541 if (unordered_p)
7542 return "ucomisd\t{%1, %0|%0, %1}";
7543 else
7544 return "comisd\t{%1, %0|%0, %1}";
7547 gcc_assert (STACK_TOP_P (cmp_op0));
7549 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7551 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7553 if (stack_top_dies)
7555 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7556 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7558 else
7559 return "ftst\n\tfnstsw\t%0";
7562 if (STACK_REG_P (cmp_op1)
7563 && stack_top_dies
7564 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7565 && REGNO (cmp_op1) != FIRST_STACK_REG)
7567 /* If both the top of the 387 stack dies, and the other operand
7568 is also a stack register that dies, then this must be a
7569 `fcompp' float compare */
7571 if (eflags_p)
7573 /* There is no double popping fcomi variant. Fortunately,
7574 eflags is immune from the fstp's cc clobbering. */
7575 if (unordered_p)
7576 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7577 else
7578 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7579 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7581 else
7583 if (unordered_p)
7584 return "fucompp\n\tfnstsw\t%0";
7585 else
7586 return "fcompp\n\tfnstsw\t%0";
7589 else
7591 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7593 static const char * const alt[16] =
7595 "fcom%z2\t%y2\n\tfnstsw\t%0",
7596 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7597 "fucom%z2\t%y2\n\tfnstsw\t%0",
7598 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7600 "ficom%z2\t%y2\n\tfnstsw\t%0",
7601 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7602 NULL,
7603 NULL,
7605 "fcomi\t{%y1, %0|%0, %y1}",
7606 "fcomip\t{%y1, %0|%0, %y1}",
7607 "fucomi\t{%y1, %0|%0, %y1}",
7608 "fucomip\t{%y1, %0|%0, %y1}",
7610 NULL,
7611 NULL,
7612 NULL,
7613 NULL
7616 int mask;
7617 const char *ret;
7619 mask = eflags_p << 3;
7620 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7621 mask |= unordered_p << 1;
7622 mask |= stack_top_dies;
7624 gcc_assert (mask < 16);
7625 ret = alt[mask];
7626 gcc_assert (ret);
7628 return ret;
7632 void
7633 ix86_output_addr_vec_elt (FILE *file, int value)
7635 const char *directive = ASM_LONG;
7637 #ifdef ASM_QUAD
7638 if (TARGET_64BIT)
7639 directive = ASM_QUAD;
7640 #else
7641 gcc_assert (!TARGET_64BIT);
7642 #endif
7644 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7647 void
7648 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7650 if (TARGET_64BIT)
7651 fprintf (file, "%s%s%d-%s%d\n",
7652 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7653 else if (HAVE_AS_GOTOFF_IN_DATA)
7654 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7655 #if TARGET_MACHO
7656 else if (TARGET_MACHO)
7658 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7659 machopic_output_function_base_name (file);
7660 fprintf(file, "\n");
7662 #endif
7663 else
7664 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7665 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7668 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7669 for the target. */
7671 void
7672 ix86_expand_clear (rtx dest)
7674 rtx tmp;
7676 /* We play register width games, which are only valid after reload. */
7677 gcc_assert (reload_completed);
7679 /* Avoid HImode and its attendant prefix byte. */
7680 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7681 dest = gen_rtx_REG (SImode, REGNO (dest));
7683 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7685 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7686 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7688 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7689 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7692 emit_insn (tmp);
7695 /* X is an unchanging MEM. If it is a constant pool reference, return
7696 the constant pool rtx, else NULL. */
7699 maybe_get_pool_constant (rtx x)
7701 x = ix86_delegitimize_address (XEXP (x, 0));
7703 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7704 return get_pool_constant (x);
7706 return NULL_RTX;
7709 void
7710 ix86_expand_move (enum machine_mode mode, rtx operands[])
7712 int strict = (reload_in_progress || reload_completed);
7713 rtx op0, op1;
7714 enum tls_model model;
7716 op0 = operands[0];
7717 op1 = operands[1];
7719 if (GET_CODE (op1) == SYMBOL_REF)
7721 model = SYMBOL_REF_TLS_MODEL (op1);
7722 if (model)
7724 op1 = legitimize_tls_address (op1, model, true);
7725 op1 = force_operand (op1, op0);
7726 if (op1 == op0)
7727 return;
7730 else if (GET_CODE (op1) == CONST
7731 && GET_CODE (XEXP (op1, 0)) == PLUS
7732 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7734 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7735 if (model)
7737 rtx addend = XEXP (XEXP (op1, 0), 1);
7738 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7739 op1 = force_operand (op1, NULL);
7740 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7741 op0, 1, OPTAB_DIRECT);
7742 if (op1 == op0)
7743 return;
7747 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7749 #if TARGET_MACHO
7750 if (MACHOPIC_PURE)
7752 rtx temp = ((reload_in_progress
7753 || ((op0 && GET_CODE (op0) == REG)
7754 && mode == Pmode))
7755 ? op0 : gen_reg_rtx (Pmode));
7756 op1 = machopic_indirect_data_reference (op1, temp);
7757 op1 = machopic_legitimize_pic_address (op1, mode,
7758 temp == op1 ? 0 : temp);
7760 else if (MACHOPIC_INDIRECT)
7761 op1 = machopic_indirect_data_reference (op1, 0);
7762 if (op0 == op1)
7763 return;
7764 #else
7765 if (GET_CODE (op0) == MEM)
7766 op1 = force_reg (Pmode, op1);
7767 else
7768 op1 = legitimize_address (op1, op1, Pmode);
7769 #endif /* TARGET_MACHO */
7771 else
7773 if (GET_CODE (op0) == MEM
7774 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7775 || !push_operand (op0, mode))
7776 && GET_CODE (op1) == MEM)
7777 op1 = force_reg (mode, op1);
7779 if (push_operand (op0, mode)
7780 && ! general_no_elim_operand (op1, mode))
7781 op1 = copy_to_mode_reg (mode, op1);
7783 /* Force large constants in 64bit compilation into register
7784 to get them CSEed. */
7785 if (TARGET_64BIT && mode == DImode
7786 && immediate_operand (op1, mode)
7787 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7788 && !register_operand (op0, mode)
7789 && optimize && !reload_completed && !reload_in_progress)
7790 op1 = copy_to_mode_reg (mode, op1);
7792 if (FLOAT_MODE_P (mode))
7794 /* If we are loading a floating point constant to a register,
7795 force the value to memory now, since we'll get better code
7796 out the back end. */
7798 if (strict)
7800 else if (GET_CODE (op1) == CONST_DOUBLE)
7802 op1 = validize_mem (force_const_mem (mode, op1));
7803 if (!register_operand (op0, mode))
7805 rtx temp = gen_reg_rtx (mode);
7806 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7807 emit_move_insn (op0, temp);
7808 return;
7814 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7817 void
7818 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7820 rtx op0 = operands[0], op1 = operands[1];
7822 /* Force constants other than zero into memory. We do not know how
7823 the instructions used to build constants modify the upper 64 bits
7824 of the register, once we have that information we may be able
7825 to handle some of them more efficiently. */
7826 if ((reload_in_progress | reload_completed) == 0
7827 && register_operand (op0, mode)
7828 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7829 op1 = validize_mem (force_const_mem (mode, op1));
7831 /* Make operand1 a register if it isn't already. */
7832 if (!no_new_pseudos
7833 && !register_operand (op0, mode)
7834 && !register_operand (op1, mode))
7836 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7837 return;
7840 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7843 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7844 straight to ix86_expand_vector_move. */
7846 void
7847 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7849 rtx op0, op1, m;
7851 op0 = operands[0];
7852 op1 = operands[1];
7854 if (MEM_P (op1))
7856 /* If we're optimizing for size, movups is the smallest. */
7857 if (optimize_size)
7859 op0 = gen_lowpart (V4SFmode, op0);
7860 op1 = gen_lowpart (V4SFmode, op1);
7861 emit_insn (gen_sse_movups (op0, op1));
7862 return;
7865 /* ??? If we have typed data, then it would appear that using
7866 movdqu is the only way to get unaligned data loaded with
7867 integer type. */
7868 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7870 op0 = gen_lowpart (V16QImode, op0);
7871 op1 = gen_lowpart (V16QImode, op1);
7872 emit_insn (gen_sse2_movdqu (op0, op1));
7873 return;
7876 if (TARGET_SSE2 && mode == V2DFmode)
7878 rtx zero;
7880 /* When SSE registers are split into halves, we can avoid
7881 writing to the top half twice. */
7882 if (TARGET_SSE_SPLIT_REGS)
7884 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7885 zero = op0;
7887 else
7889 /* ??? Not sure about the best option for the Intel chips.
7890 The following would seem to satisfy; the register is
7891 entirely cleared, breaking the dependency chain. We
7892 then store to the upper half, with a dependency depth
7893 of one. A rumor has it that Intel recommends two movsd
7894 followed by an unpacklpd, but this is unconfirmed. And
7895 given that the dependency depth of the unpacklpd would
7896 still be one, I'm not sure why this would be better. */
7897 zero = CONST0_RTX (V2DFmode);
7900 m = adjust_address (op1, DFmode, 0);
7901 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7902 m = adjust_address (op1, DFmode, 8);
7903 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7905 else
7907 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7908 emit_move_insn (op0, CONST0_RTX (mode));
7909 else
7910 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7912 if (mode != V4SFmode)
7913 op0 = gen_lowpart (V4SFmode, op0);
7914 m = adjust_address (op1, V2SFmode, 0);
7915 emit_insn (gen_sse_loadlps (op0, op0, m));
7916 m = adjust_address (op1, V2SFmode, 8);
7917 emit_insn (gen_sse_loadhps (op0, op0, m));
7920 else if (MEM_P (op0))
7922 /* If we're optimizing for size, movups is the smallest. */
7923 if (optimize_size)
7925 op0 = gen_lowpart (V4SFmode, op0);
7926 op1 = gen_lowpart (V4SFmode, op1);
7927 emit_insn (gen_sse_movups (op0, op1));
7928 return;
7931 /* ??? Similar to above, only less clear because of quote
7932 typeless stores unquote. */
7933 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7934 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7936 op0 = gen_lowpart (V16QImode, op0);
7937 op1 = gen_lowpart (V16QImode, op1);
7938 emit_insn (gen_sse2_movdqu (op0, op1));
7939 return;
7942 if (TARGET_SSE2 && mode == V2DFmode)
7944 m = adjust_address (op0, DFmode, 0);
7945 emit_insn (gen_sse2_storelpd (m, op1));
7946 m = adjust_address (op0, DFmode, 8);
7947 emit_insn (gen_sse2_storehpd (m, op1));
7949 else
7951 if (mode != V4SFmode)
7952 op1 = gen_lowpart (V4SFmode, op1);
7953 m = adjust_address (op0, V2SFmode, 0);
7954 emit_insn (gen_sse_storelps (m, op1));
7955 m = adjust_address (op0, V2SFmode, 8);
7956 emit_insn (gen_sse_storehps (m, op1));
7959 else
7960 gcc_unreachable ();
7963 /* Expand a push in MODE. This is some mode for which we do not support
7964 proper push instructions, at least from the registers that we expect
7965 the value to live in. */
7967 void
7968 ix86_expand_push (enum machine_mode mode, rtx x)
7970 rtx tmp;
7972 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7973 GEN_INT (-GET_MODE_SIZE (mode)),
7974 stack_pointer_rtx, 1, OPTAB_DIRECT);
7975 if (tmp != stack_pointer_rtx)
7976 emit_move_insn (stack_pointer_rtx, tmp);
7978 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7979 emit_move_insn (tmp, x);
7982 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7983 destination to use for the operation. If different from the true
7984 destination in operands[0], a copy operation will be required. */
7987 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7988 rtx operands[])
7990 int matching_memory;
7991 rtx src1, src2, dst;
7993 dst = operands[0];
7994 src1 = operands[1];
7995 src2 = operands[2];
7997 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7998 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7999 && (rtx_equal_p (dst, src2)
8000 || immediate_operand (src1, mode)))
8002 rtx temp = src1;
8003 src1 = src2;
8004 src2 = temp;
8007 /* If the destination is memory, and we do not have matching source
8008 operands, do things in registers. */
8009 matching_memory = 0;
8010 if (GET_CODE (dst) == MEM)
8012 if (rtx_equal_p (dst, src1))
8013 matching_memory = 1;
8014 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8015 && rtx_equal_p (dst, src2))
8016 matching_memory = 2;
8017 else
8018 dst = gen_reg_rtx (mode);
8021 /* Both source operands cannot be in memory. */
8022 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8024 if (matching_memory != 2)
8025 src2 = force_reg (mode, src2);
8026 else
8027 src1 = force_reg (mode, src1);
8030 /* If the operation is not commutable, source 1 cannot be a constant
8031 or non-matching memory. */
8032 if ((CONSTANT_P (src1)
8033 || (!matching_memory && GET_CODE (src1) == MEM))
8034 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8035 src1 = force_reg (mode, src1);
8037 /* If optimizing, copy to regs to improve CSE */
8038 if (optimize && ! no_new_pseudos)
8040 if (GET_CODE (dst) == MEM)
8041 dst = gen_reg_rtx (mode);
8042 if (GET_CODE (src1) == MEM)
8043 src1 = force_reg (mode, src1);
8044 if (GET_CODE (src2) == MEM)
8045 src2 = force_reg (mode, src2);
8048 src1 = operands[1] = src1;
8049 src2 = operands[2] = src2;
8050 return dst;
8053 /* Similarly, but assume that the destination has already been
8054 set up properly. */
8056 void
8057 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8058 enum machine_mode mode, rtx operands[])
8060 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8061 gcc_assert (dst == operands[0]);
8064 /* Attempt to expand a binary operator. Make the expansion closer to the
8065 actual machine, then just general_operand, which will allow 3 separate
8066 memory references (one output, two input) in a single insn. */
8068 void
8069 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8070 rtx operands[])
8072 rtx src1, src2, dst, op, clob;
8074 dst = ix86_fixup_binary_operands (code, mode, operands);
8075 src1 = operands[1];
8076 src2 = operands[2];
8078 /* Emit the instruction. */
8080 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8081 if (reload_in_progress)
8083 /* Reload doesn't know about the flags register, and doesn't know that
8084 it doesn't want to clobber it. We can only do this with PLUS. */
8085 gcc_assert (code == PLUS);
8086 emit_insn (op);
8088 else
8090 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8091 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8094 /* Fix up the destination if needed. */
8095 if (dst != operands[0])
8096 emit_move_insn (operands[0], dst);
8099 /* Return TRUE or FALSE depending on whether the binary operator meets the
8100 appropriate constraints. */
8103 ix86_binary_operator_ok (enum rtx_code code,
8104 enum machine_mode mode ATTRIBUTE_UNUSED,
8105 rtx operands[3])
8107 /* Both source operands cannot be in memory. */
8108 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8109 return 0;
8110 /* If the operation is not commutable, source 1 cannot be a constant. */
8111 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8112 return 0;
8113 /* If the destination is memory, we must have a matching source operand. */
8114 if (GET_CODE (operands[0]) == MEM
8115 && ! (rtx_equal_p (operands[0], operands[1])
8116 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8117 && rtx_equal_p (operands[0], operands[2]))))
8118 return 0;
8119 /* If the operation is not commutable and the source 1 is memory, we must
8120 have a matching destination. */
8121 if (GET_CODE (operands[1]) == MEM
8122 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8123 && ! rtx_equal_p (operands[0], operands[1]))
8124 return 0;
8125 return 1;
8128 /* Attempt to expand a unary operator. Make the expansion closer to the
8129 actual machine, then just general_operand, which will allow 2 separate
8130 memory references (one output, one input) in a single insn. */
8132 void
8133 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8134 rtx operands[])
8136 int matching_memory;
8137 rtx src, dst, op, clob;
8139 dst = operands[0];
8140 src = operands[1];
8142 /* If the destination is memory, and we do not have matching source
8143 operands, do things in registers. */
8144 matching_memory = 0;
8145 if (MEM_P (dst))
8147 if (rtx_equal_p (dst, src))
8148 matching_memory = 1;
8149 else
8150 dst = gen_reg_rtx (mode);
8153 /* When source operand is memory, destination must match. */
8154 if (MEM_P (src) && !matching_memory)
8155 src = force_reg (mode, src);
8157 /* If optimizing, copy to regs to improve CSE. */
8158 if (optimize && ! no_new_pseudos)
8160 if (GET_CODE (dst) == MEM)
8161 dst = gen_reg_rtx (mode);
8162 if (GET_CODE (src) == MEM)
8163 src = force_reg (mode, src);
8166 /* Emit the instruction. */
8168 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8169 if (reload_in_progress || code == NOT)
8171 /* Reload doesn't know about the flags register, and doesn't know that
8172 it doesn't want to clobber it. */
8173 gcc_assert (code == NOT);
8174 emit_insn (op);
8176 else
8178 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8179 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8182 /* Fix up the destination if needed. */
8183 if (dst != operands[0])
8184 emit_move_insn (operands[0], dst);
8187 /* Return TRUE or FALSE depending on whether the unary operator meets the
8188 appropriate constraints. */
8191 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8192 enum machine_mode mode ATTRIBUTE_UNUSED,
8193 rtx operands[2] ATTRIBUTE_UNUSED)
8195 /* If one of operands is memory, source and destination must match. */
8196 if ((GET_CODE (operands[0]) == MEM
8197 || GET_CODE (operands[1]) == MEM)
8198 && ! rtx_equal_p (operands[0], operands[1]))
8199 return FALSE;
8200 return TRUE;
8203 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8204 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8205 true, then replicate the mask for all elements of the vector register.
8206 If INVERT is true, then create a mask excluding the sign bit. */
8209 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8211 enum machine_mode vec_mode;
8212 HOST_WIDE_INT hi, lo;
8213 int shift = 63;
8214 rtvec v;
8215 rtx mask;
8217 /* Find the sign bit, sign extended to 2*HWI. */
8218 if (mode == SFmode)
8219 lo = 0x80000000, hi = lo < 0;
8220 else if (HOST_BITS_PER_WIDE_INT >= 64)
8221 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8222 else
8223 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8225 if (invert)
8226 lo = ~lo, hi = ~hi;
8228 /* Force this value into the low part of a fp vector constant. */
8229 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8230 mask = gen_lowpart (mode, mask);
8232 if (mode == SFmode)
8234 if (vect)
8235 v = gen_rtvec (4, mask, mask, mask, mask);
8236 else
8237 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8238 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8239 vec_mode = V4SFmode;
8241 else
8243 if (vect)
8244 v = gen_rtvec (2, mask, mask);
8245 else
8246 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8247 vec_mode = V2DFmode;
8250 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8253 /* Generate code for floating point ABS or NEG. */
8255 void
8256 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8257 rtx operands[])
8259 rtx mask, set, use, clob, dst, src;
8260 bool matching_memory;
8261 bool use_sse = false;
8262 bool vector_mode = VECTOR_MODE_P (mode);
8263 enum machine_mode elt_mode = mode;
8265 if (vector_mode)
8267 elt_mode = GET_MODE_INNER (mode);
8268 use_sse = true;
8270 else if (TARGET_SSE_MATH)
8271 use_sse = SSE_FLOAT_MODE_P (mode);
8273 /* NEG and ABS performed with SSE use bitwise mask operations.
8274 Create the appropriate mask now. */
8275 if (use_sse)
8276 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8277 else
8279 /* When not using SSE, we don't use the mask, but prefer to keep the
8280 same general form of the insn pattern to reduce duplication when
8281 it comes time to split. */
8282 mask = const0_rtx;
8285 dst = operands[0];
8286 src = operands[1];
8288 /* If the destination is memory, and we don't have matching source
8289 operands, do things in registers. */
8290 matching_memory = false;
8291 if (MEM_P (dst))
8293 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8294 matching_memory = true;
8295 else
8296 dst = gen_reg_rtx (mode);
8298 if (MEM_P (src) && !matching_memory)
8299 src = force_reg (mode, src);
8301 if (vector_mode)
8303 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8304 set = gen_rtx_SET (VOIDmode, dst, set);
8305 emit_insn (set);
8307 else
8309 set = gen_rtx_fmt_e (code, mode, src);
8310 set = gen_rtx_SET (VOIDmode, dst, set);
8311 use = gen_rtx_USE (VOIDmode, mask);
8312 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8313 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8316 if (dst != operands[0])
8317 emit_move_insn (operands[0], dst);
8320 /* Expand a copysign operation. Special case operand 0 being a constant. */
8322 void
8323 ix86_expand_copysign (rtx operands[])
8325 enum machine_mode mode, vmode;
8326 rtx dest, op0, op1, mask, nmask;
8328 dest = operands[0];
8329 op0 = operands[1];
8330 op1 = operands[2];
8332 mode = GET_MODE (dest);
8333 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8335 if (GET_CODE (op0) == CONST_DOUBLE)
8337 rtvec v;
8339 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8340 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8342 if (op0 == CONST0_RTX (mode))
8343 op0 = CONST0_RTX (vmode);
8344 else
8346 if (mode == SFmode)
8347 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8348 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8349 else
8350 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8351 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8354 mask = ix86_build_signbit_mask (mode, 0, 0);
8356 if (mode == SFmode)
8357 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8358 else
8359 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8361 else
8363 nmask = ix86_build_signbit_mask (mode, 0, 1);
8364 mask = ix86_build_signbit_mask (mode, 0, 0);
8366 if (mode == SFmode)
8367 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8368 else
8369 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8373 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8374 be a constant, and so has already been expanded into a vector constant. */
8376 void
8377 ix86_split_copysign_const (rtx operands[])
8379 enum machine_mode mode, vmode;
8380 rtx dest, op0, op1, mask, x;
8382 dest = operands[0];
8383 op0 = operands[1];
8384 op1 = operands[2];
8385 mask = operands[3];
8387 mode = GET_MODE (dest);
8388 vmode = GET_MODE (mask);
8390 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8391 x = gen_rtx_AND (vmode, dest, mask);
8392 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8394 if (op0 != CONST0_RTX (vmode))
8396 x = gen_rtx_IOR (vmode, dest, op0);
8397 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8401 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8402 so we have to do two masks. */
8404 void
8405 ix86_split_copysign_var (rtx operands[])
8407 enum machine_mode mode, vmode;
8408 rtx dest, scratch, op0, op1, mask, nmask, x;
8410 dest = operands[0];
8411 scratch = operands[1];
8412 op0 = operands[2];
8413 op1 = operands[3];
8414 nmask = operands[4];
8415 mask = operands[5];
8417 mode = GET_MODE (dest);
8418 vmode = GET_MODE (mask);
8420 if (rtx_equal_p (op0, op1))
8422 /* Shouldn't happen often (it's useless, obviously), but when it does
8423 we'd generate incorrect code if we continue below. */
8424 emit_move_insn (dest, op0);
8425 return;
8428 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8430 gcc_assert (REGNO (op1) == REGNO (scratch));
8432 x = gen_rtx_AND (vmode, scratch, mask);
8433 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8435 dest = mask;
8436 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8437 x = gen_rtx_NOT (vmode, dest);
8438 x = gen_rtx_AND (vmode, x, op0);
8439 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8441 else
8443 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8445 x = gen_rtx_AND (vmode, scratch, mask);
8447 else /* alternative 2,4 */
8449 gcc_assert (REGNO (mask) == REGNO (scratch));
8450 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8451 x = gen_rtx_AND (vmode, scratch, op1);
8453 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8455 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8457 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8458 x = gen_rtx_AND (vmode, dest, nmask);
8460 else /* alternative 3,4 */
8462 gcc_assert (REGNO (nmask) == REGNO (dest));
8463 dest = nmask;
8464 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8465 x = gen_rtx_AND (vmode, dest, op0);
8467 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8470 x = gen_rtx_IOR (vmode, dest, scratch);
8471 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8474 /* Return TRUE or FALSE depending on whether the first SET in INSN
8475 has source and destination with matching CC modes, and that the
8476 CC mode is at least as constrained as REQ_MODE. */
8479 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8481 rtx set;
8482 enum machine_mode set_mode;
8484 set = PATTERN (insn);
8485 if (GET_CODE (set) == PARALLEL)
8486 set = XVECEXP (set, 0, 0);
8487 gcc_assert (GET_CODE (set) == SET);
8488 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
8490 set_mode = GET_MODE (SET_DEST (set));
8491 switch (set_mode)
8493 case CCNOmode:
8494 if (req_mode != CCNOmode
8495 && (req_mode != CCmode
8496 || XEXP (SET_SRC (set), 1) != const0_rtx))
8497 return 0;
8498 break;
8499 case CCmode:
8500 if (req_mode == CCGCmode)
8501 return 0;
8502 /* FALLTHRU */
8503 case CCGCmode:
8504 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8505 return 0;
8506 /* FALLTHRU */
8507 case CCGOCmode:
8508 if (req_mode == CCZmode)
8509 return 0;
8510 /* FALLTHRU */
8511 case CCZmode:
8512 break;
8514 default:
8515 gcc_unreachable ();
8518 return (GET_MODE (SET_SRC (set)) == set_mode);
8521 /* Generate insn patterns to do an integer compare of OPERANDS. */
8523 static rtx
8524 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8526 enum machine_mode cmpmode;
8527 rtx tmp, flags;
8529 cmpmode = SELECT_CC_MODE (code, op0, op1);
8530 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8532 /* This is very simple, but making the interface the same as in the
8533 FP case makes the rest of the code easier. */
8534 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8535 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8537 /* Return the test that should be put into the flags user, i.e.
8538 the bcc, scc, or cmov instruction. */
8539 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8542 /* Figure out whether to use ordered or unordered fp comparisons.
8543 Return the appropriate mode to use. */
8545 enum machine_mode
8546 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8548 /* ??? In order to make all comparisons reversible, we do all comparisons
8549 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8550 all forms trapping and nontrapping comparisons, we can make inequality
8551 comparisons trapping again, since it results in better code when using
8552 FCOM based compares. */
8553 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8556 enum machine_mode
8557 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8559 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8560 return ix86_fp_compare_mode (code);
8561 switch (code)
8563 /* Only zero flag is needed. */
8564 case EQ: /* ZF=0 */
8565 case NE: /* ZF!=0 */
8566 return CCZmode;
8567 /* Codes needing carry flag. */
8568 case GEU: /* CF=0 */
8569 case GTU: /* CF=0 & ZF=0 */
8570 case LTU: /* CF=1 */
8571 case LEU: /* CF=1 | ZF=1 */
8572 return CCmode;
8573 /* Codes possibly doable only with sign flag when
8574 comparing against zero. */
8575 case GE: /* SF=OF or SF=0 */
8576 case LT: /* SF<>OF or SF=1 */
8577 if (op1 == const0_rtx)
8578 return CCGOCmode;
8579 else
8580 /* For other cases Carry flag is not required. */
8581 return CCGCmode;
8582 /* Codes doable only with sign flag when comparing
8583 against zero, but we miss jump instruction for it
8584 so we need to use relational tests against overflow
8585 that thus needs to be zero. */
8586 case GT: /* ZF=0 & SF=OF */
8587 case LE: /* ZF=1 | SF<>OF */
8588 if (op1 == const0_rtx)
8589 return CCNOmode;
8590 else
8591 return CCGCmode;
8592 /* strcmp pattern do (use flags) and combine may ask us for proper
8593 mode. */
8594 case USE:
8595 return CCmode;
8596 default:
8597 gcc_unreachable ();
8601 /* Return the fixed registers used for condition codes. */
8603 static bool
8604 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8606 *p1 = FLAGS_REG;
8607 *p2 = FPSR_REG;
8608 return true;
8611 /* If two condition code modes are compatible, return a condition code
8612 mode which is compatible with both. Otherwise, return
8613 VOIDmode. */
8615 static enum machine_mode
8616 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8618 if (m1 == m2)
8619 return m1;
8621 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8622 return VOIDmode;
8624 if ((m1 == CCGCmode && m2 == CCGOCmode)
8625 || (m1 == CCGOCmode && m2 == CCGCmode))
8626 return CCGCmode;
8628 switch (m1)
8630 default:
8631 gcc_unreachable ();
8633 case CCmode:
8634 case CCGCmode:
8635 case CCGOCmode:
8636 case CCNOmode:
8637 case CCZmode:
8638 switch (m2)
8640 default:
8641 return VOIDmode;
8643 case CCmode:
8644 case CCGCmode:
8645 case CCGOCmode:
8646 case CCNOmode:
8647 case CCZmode:
8648 return CCmode;
8651 case CCFPmode:
8652 case CCFPUmode:
8653 /* These are only compatible with themselves, which we already
8654 checked above. */
8655 return VOIDmode;
8659 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8662 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8664 enum rtx_code swapped_code = swap_condition (code);
8665 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8666 || (ix86_fp_comparison_cost (swapped_code)
8667 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8670 /* Swap, force into registers, or otherwise massage the two operands
8671 to a fp comparison. The operands are updated in place; the new
8672 comparison code is returned. */
8674 static enum rtx_code
8675 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8677 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8678 rtx op0 = *pop0, op1 = *pop1;
8679 enum machine_mode op_mode = GET_MODE (op0);
8680 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8682 /* All of the unordered compare instructions only work on registers.
8683 The same is true of the fcomi compare instructions. The same is
8684 true of the XFmode compare instructions if not comparing with
8685 zero (ftst insn is used in this case). */
8687 if (!is_sse
8688 && (fpcmp_mode == CCFPUmode
8689 || (op_mode == XFmode
8690 && ! (standard_80387_constant_p (op0) == 1
8691 || standard_80387_constant_p (op1) == 1))
8692 || ix86_use_fcomi_compare (code)))
8694 op0 = force_reg (op_mode, op0);
8695 op1 = force_reg (op_mode, op1);
8697 else
8699 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8700 things around if they appear profitable, otherwise force op0
8701 into a register. */
8703 if (standard_80387_constant_p (op0) == 0
8704 || (GET_CODE (op0) == MEM
8705 && ! (standard_80387_constant_p (op1) == 0
8706 || GET_CODE (op1) == MEM)))
8708 rtx tmp;
8709 tmp = op0, op0 = op1, op1 = tmp;
8710 code = swap_condition (code);
8713 if (GET_CODE (op0) != REG)
8714 op0 = force_reg (op_mode, op0);
8716 if (CONSTANT_P (op1))
8718 int tmp = standard_80387_constant_p (op1);
8719 if (tmp == 0)
8720 op1 = validize_mem (force_const_mem (op_mode, op1));
8721 else if (tmp == 1)
8723 if (TARGET_CMOVE)
8724 op1 = force_reg (op_mode, op1);
8726 else
8727 op1 = force_reg (op_mode, op1);
8731 /* Try to rearrange the comparison to make it cheaper. */
8732 if (ix86_fp_comparison_cost (code)
8733 > ix86_fp_comparison_cost (swap_condition (code))
8734 && (GET_CODE (op1) == REG || !no_new_pseudos))
8736 rtx tmp;
8737 tmp = op0, op0 = op1, op1 = tmp;
8738 code = swap_condition (code);
8739 if (GET_CODE (op0) != REG)
8740 op0 = force_reg (op_mode, op0);
8743 *pop0 = op0;
8744 *pop1 = op1;
8745 return code;
8748 /* Convert comparison codes we use to represent FP comparison to integer
8749 code that will result in proper branch. Return UNKNOWN if no such code
8750 is available. */
8752 enum rtx_code
8753 ix86_fp_compare_code_to_integer (enum rtx_code code)
8755 switch (code)
8757 case GT:
8758 return GTU;
8759 case GE:
8760 return GEU;
8761 case ORDERED:
8762 case UNORDERED:
8763 return code;
8764 break;
8765 case UNEQ:
8766 return EQ;
8767 break;
8768 case UNLT:
8769 return LTU;
8770 break;
8771 case UNLE:
8772 return LEU;
8773 break;
8774 case LTGT:
8775 return NE;
8776 break;
8777 default:
8778 return UNKNOWN;
8782 /* Split comparison code CODE into comparisons we can do using branch
8783 instructions. BYPASS_CODE is comparison code for branch that will
8784 branch around FIRST_CODE and SECOND_CODE. If some of branches
8785 is not required, set value to UNKNOWN.
8786 We never require more than two branches. */
8788 void
8789 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8790 enum rtx_code *first_code,
8791 enum rtx_code *second_code)
8793 *first_code = code;
8794 *bypass_code = UNKNOWN;
8795 *second_code = UNKNOWN;
8797 /* The fcomi comparison sets flags as follows:
8799 cmp ZF PF CF
8800 > 0 0 0
8801 < 0 0 1
8802 = 1 0 0
8803 un 1 1 1 */
8805 switch (code)
8807 case GT: /* GTU - CF=0 & ZF=0 */
8808 case GE: /* GEU - CF=0 */
8809 case ORDERED: /* PF=0 */
8810 case UNORDERED: /* PF=1 */
8811 case UNEQ: /* EQ - ZF=1 */
8812 case UNLT: /* LTU - CF=1 */
8813 case UNLE: /* LEU - CF=1 | ZF=1 */
8814 case LTGT: /* EQ - ZF=0 */
8815 break;
8816 case LT: /* LTU - CF=1 - fails on unordered */
8817 *first_code = UNLT;
8818 *bypass_code = UNORDERED;
8819 break;
8820 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8821 *first_code = UNLE;
8822 *bypass_code = UNORDERED;
8823 break;
8824 case EQ: /* EQ - ZF=1 - fails on unordered */
8825 *first_code = UNEQ;
8826 *bypass_code = UNORDERED;
8827 break;
8828 case NE: /* NE - ZF=0 - fails on unordered */
8829 *first_code = LTGT;
8830 *second_code = UNORDERED;
8831 break;
8832 case UNGE: /* GEU - CF=0 - fails on unordered */
8833 *first_code = GE;
8834 *second_code = UNORDERED;
8835 break;
8836 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8837 *first_code = GT;
8838 *second_code = UNORDERED;
8839 break;
8840 default:
8841 gcc_unreachable ();
8843 if (!TARGET_IEEE_FP)
8845 *second_code = UNKNOWN;
8846 *bypass_code = UNKNOWN;
8850 /* Return cost of comparison done fcom + arithmetics operations on AX.
8851 All following functions do use number of instructions as a cost metrics.
8852 In future this should be tweaked to compute bytes for optimize_size and
8853 take into account performance of various instructions on various CPUs. */
8854 static int
8855 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8857 if (!TARGET_IEEE_FP)
8858 return 4;
8859 /* The cost of code output by ix86_expand_fp_compare. */
8860 switch (code)
8862 case UNLE:
8863 case UNLT:
8864 case LTGT:
8865 case GT:
8866 case GE:
8867 case UNORDERED:
8868 case ORDERED:
8869 case UNEQ:
8870 return 4;
8871 break;
8872 case LT:
8873 case NE:
8874 case EQ:
8875 case UNGE:
8876 return 5;
8877 break;
8878 case LE:
8879 case UNGT:
8880 return 6;
8881 break;
8882 default:
8883 gcc_unreachable ();
8887 /* Return cost of comparison done using fcomi operation.
8888 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8889 static int
8890 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8892 enum rtx_code bypass_code, first_code, second_code;
8893 /* Return arbitrarily high cost when instruction is not supported - this
8894 prevents gcc from using it. */
8895 if (!TARGET_CMOVE)
8896 return 1024;
8897 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8898 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8901 /* Return cost of comparison done using sahf operation.
8902 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8903 static int
8904 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8906 enum rtx_code bypass_code, first_code, second_code;
8907 /* Return arbitrarily high cost when instruction is not preferred - this
8908 avoids gcc from using it. */
8909 if (!TARGET_USE_SAHF && !optimize_size)
8910 return 1024;
8911 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8912 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8915 /* Compute cost of the comparison done using any method.
8916 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8917 static int
8918 ix86_fp_comparison_cost (enum rtx_code code)
8920 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8921 int min;
8923 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8924 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8926 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8927 if (min > sahf_cost)
8928 min = sahf_cost;
8929 if (min > fcomi_cost)
8930 min = fcomi_cost;
8931 return min;
8934 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8936 static rtx
8937 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8938 rtx *second_test, rtx *bypass_test)
8940 enum machine_mode fpcmp_mode, intcmp_mode;
8941 rtx tmp, tmp2;
8942 int cost = ix86_fp_comparison_cost (code);
8943 enum rtx_code bypass_code, first_code, second_code;
8945 fpcmp_mode = ix86_fp_compare_mode (code);
8946 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8948 if (second_test)
8949 *second_test = NULL_RTX;
8950 if (bypass_test)
8951 *bypass_test = NULL_RTX;
8953 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8955 /* Do fcomi/sahf based test when profitable. */
8956 if ((bypass_code == UNKNOWN || bypass_test)
8957 && (second_code == UNKNOWN || second_test)
8958 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8960 if (TARGET_CMOVE)
8962 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8963 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8964 tmp);
8965 emit_insn (tmp);
8967 else
8969 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8970 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8971 if (!scratch)
8972 scratch = gen_reg_rtx (HImode);
8973 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8974 emit_insn (gen_x86_sahf_1 (scratch));
8977 /* The FP codes work out to act like unsigned. */
8978 intcmp_mode = fpcmp_mode;
8979 code = first_code;
8980 if (bypass_code != UNKNOWN)
8981 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8982 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8983 const0_rtx);
8984 if (second_code != UNKNOWN)
8985 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8986 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8987 const0_rtx);
8989 else
8991 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8992 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8993 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8994 if (!scratch)
8995 scratch = gen_reg_rtx (HImode);
8996 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8998 /* In the unordered case, we have to check C2 for NaN's, which
8999 doesn't happen to work out to anything nice combination-wise.
9000 So do some bit twiddling on the value we've got in AH to come
9001 up with an appropriate set of condition codes. */
9003 intcmp_mode = CCNOmode;
9004 switch (code)
9006 case GT:
9007 case UNGT:
9008 if (code == GT || !TARGET_IEEE_FP)
9010 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9011 code = EQ;
9013 else
9015 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9016 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9017 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9018 intcmp_mode = CCmode;
9019 code = GEU;
9021 break;
9022 case LT:
9023 case UNLT:
9024 if (code == LT && TARGET_IEEE_FP)
9026 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9027 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9028 intcmp_mode = CCmode;
9029 code = EQ;
9031 else
9033 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9034 code = NE;
9036 break;
9037 case GE:
9038 case UNGE:
9039 if (code == GE || !TARGET_IEEE_FP)
9041 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9042 code = EQ;
9044 else
9046 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9047 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9048 GEN_INT (0x01)));
9049 code = NE;
9051 break;
9052 case LE:
9053 case UNLE:
9054 if (code == LE && TARGET_IEEE_FP)
9056 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9057 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9058 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9059 intcmp_mode = CCmode;
9060 code = LTU;
9062 else
9064 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9065 code = NE;
9067 break;
9068 case EQ:
9069 case UNEQ:
9070 if (code == EQ && TARGET_IEEE_FP)
9072 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9073 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9074 intcmp_mode = CCmode;
9075 code = EQ;
9077 else
9079 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9080 code = NE;
9081 break;
9083 break;
9084 case NE:
9085 case LTGT:
9086 if (code == NE && TARGET_IEEE_FP)
9088 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9089 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9090 GEN_INT (0x40)));
9091 code = NE;
9093 else
9095 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9096 code = EQ;
9098 break;
9100 case UNORDERED:
9101 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9102 code = NE;
9103 break;
9104 case ORDERED:
9105 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9106 code = EQ;
9107 break;
9109 default:
9110 gcc_unreachable ();
9114 /* Return the test that should be put into the flags user, i.e.
9115 the bcc, scc, or cmov instruction. */
9116 return gen_rtx_fmt_ee (code, VOIDmode,
9117 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9118 const0_rtx);
9122 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9124 rtx op0, op1, ret;
9125 op0 = ix86_compare_op0;
9126 op1 = ix86_compare_op1;
9128 if (second_test)
9129 *second_test = NULL_RTX;
9130 if (bypass_test)
9131 *bypass_test = NULL_RTX;
9133 if (ix86_compare_emitted)
9135 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9136 ix86_compare_emitted = NULL_RTX;
9138 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9139 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9140 second_test, bypass_test);
9141 else
9142 ret = ix86_expand_int_compare (code, op0, op1);
9144 return ret;
9147 /* Return true if the CODE will result in nontrivial jump sequence. */
9148 bool
9149 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9151 enum rtx_code bypass_code, first_code, second_code;
9152 if (!TARGET_CMOVE)
9153 return true;
9154 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9155 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9158 void
9159 ix86_expand_branch (enum rtx_code code, rtx label)
9161 rtx tmp;
9163 switch (GET_MODE (ix86_compare_op0))
9165 case QImode:
9166 case HImode:
9167 case SImode:
9168 simple:
9169 tmp = ix86_expand_compare (code, NULL, NULL);
9170 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9171 gen_rtx_LABEL_REF (VOIDmode, label),
9172 pc_rtx);
9173 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9174 return;
9176 case SFmode:
9177 case DFmode:
9178 case XFmode:
9180 rtvec vec;
9181 int use_fcomi;
9182 enum rtx_code bypass_code, first_code, second_code;
9184 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9185 &ix86_compare_op1);
9187 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9189 /* Check whether we will use the natural sequence with one jump. If
9190 so, we can expand jump early. Otherwise delay expansion by
9191 creating compound insn to not confuse optimizers. */
9192 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9193 && TARGET_CMOVE)
9195 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9196 gen_rtx_LABEL_REF (VOIDmode, label),
9197 pc_rtx, NULL_RTX, NULL_RTX);
9199 else
9201 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9202 ix86_compare_op0, ix86_compare_op1);
9203 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9204 gen_rtx_LABEL_REF (VOIDmode, label),
9205 pc_rtx);
9206 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9208 use_fcomi = ix86_use_fcomi_compare (code);
9209 vec = rtvec_alloc (3 + !use_fcomi);
9210 RTVEC_ELT (vec, 0) = tmp;
9211 RTVEC_ELT (vec, 1)
9212 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9213 RTVEC_ELT (vec, 2)
9214 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9215 if (! use_fcomi)
9216 RTVEC_ELT (vec, 3)
9217 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9219 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9221 return;
9224 case DImode:
9225 if (TARGET_64BIT)
9226 goto simple;
9227 /* Expand DImode branch into multiple compare+branch. */
9229 rtx lo[2], hi[2], label2;
9230 enum rtx_code code1, code2, code3;
9232 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9234 tmp = ix86_compare_op0;
9235 ix86_compare_op0 = ix86_compare_op1;
9236 ix86_compare_op1 = tmp;
9237 code = swap_condition (code);
9239 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9240 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9242 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9243 avoid two branches. This costs one extra insn, so disable when
9244 optimizing for size. */
9246 if ((code == EQ || code == NE)
9247 && (!optimize_size
9248 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9250 rtx xor0, xor1;
9252 xor1 = hi[0];
9253 if (hi[1] != const0_rtx)
9254 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9255 NULL_RTX, 0, OPTAB_WIDEN);
9257 xor0 = lo[0];
9258 if (lo[1] != const0_rtx)
9259 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9260 NULL_RTX, 0, OPTAB_WIDEN);
9262 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9263 NULL_RTX, 0, OPTAB_WIDEN);
9265 ix86_compare_op0 = tmp;
9266 ix86_compare_op1 = const0_rtx;
9267 ix86_expand_branch (code, label);
9268 return;
9271 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9272 op1 is a constant and the low word is zero, then we can just
9273 examine the high word. */
9275 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9276 switch (code)
9278 case LT: case LTU: case GE: case GEU:
9279 ix86_compare_op0 = hi[0];
9280 ix86_compare_op1 = hi[1];
9281 ix86_expand_branch (code, label);
9282 return;
9283 default:
9284 break;
9287 /* Otherwise, we need two or three jumps. */
9289 label2 = gen_label_rtx ();
9291 code1 = code;
9292 code2 = swap_condition (code);
9293 code3 = unsigned_condition (code);
9295 switch (code)
9297 case LT: case GT: case LTU: case GTU:
9298 break;
9300 case LE: code1 = LT; code2 = GT; break;
9301 case GE: code1 = GT; code2 = LT; break;
9302 case LEU: code1 = LTU; code2 = GTU; break;
9303 case GEU: code1 = GTU; code2 = LTU; break;
9305 case EQ: code1 = UNKNOWN; code2 = NE; break;
9306 case NE: code2 = UNKNOWN; break;
9308 default:
9309 gcc_unreachable ();
9313 * a < b =>
9314 * if (hi(a) < hi(b)) goto true;
9315 * if (hi(a) > hi(b)) goto false;
9316 * if (lo(a) < lo(b)) goto true;
9317 * false:
9320 ix86_compare_op0 = hi[0];
9321 ix86_compare_op1 = hi[1];
9323 if (code1 != UNKNOWN)
9324 ix86_expand_branch (code1, label);
9325 if (code2 != UNKNOWN)
9326 ix86_expand_branch (code2, label2);
9328 ix86_compare_op0 = lo[0];
9329 ix86_compare_op1 = lo[1];
9330 ix86_expand_branch (code3, label);
9332 if (code2 != UNKNOWN)
9333 emit_label (label2);
9334 return;
9337 default:
9338 gcc_unreachable ();
9342 /* Split branch based on floating point condition. */
9343 void
9344 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9345 rtx target1, rtx target2, rtx tmp, rtx pushed)
9347 rtx second, bypass;
9348 rtx label = NULL_RTX;
9349 rtx condition;
9350 int bypass_probability = -1, second_probability = -1, probability = -1;
9351 rtx i;
9353 if (target2 != pc_rtx)
9355 rtx tmp = target2;
9356 code = reverse_condition_maybe_unordered (code);
9357 target2 = target1;
9358 target1 = tmp;
9361 condition = ix86_expand_fp_compare (code, op1, op2,
9362 tmp, &second, &bypass);
9364 /* Remove pushed operand from stack. */
9365 if (pushed)
9366 ix86_free_from_memory (GET_MODE (pushed));
9368 if (split_branch_probability >= 0)
9370 /* Distribute the probabilities across the jumps.
9371 Assume the BYPASS and SECOND to be always test
9372 for UNORDERED. */
9373 probability = split_branch_probability;
9375 /* Value of 1 is low enough to make no need for probability
9376 to be updated. Later we may run some experiments and see
9377 if unordered values are more frequent in practice. */
9378 if (bypass)
9379 bypass_probability = 1;
9380 if (second)
9381 second_probability = 1;
9383 if (bypass != NULL_RTX)
9385 label = gen_label_rtx ();
9386 i = emit_jump_insn (gen_rtx_SET
9387 (VOIDmode, pc_rtx,
9388 gen_rtx_IF_THEN_ELSE (VOIDmode,
9389 bypass,
9390 gen_rtx_LABEL_REF (VOIDmode,
9391 label),
9392 pc_rtx)));
9393 if (bypass_probability >= 0)
9394 REG_NOTES (i)
9395 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9396 GEN_INT (bypass_probability),
9397 REG_NOTES (i));
9399 i = emit_jump_insn (gen_rtx_SET
9400 (VOIDmode, pc_rtx,
9401 gen_rtx_IF_THEN_ELSE (VOIDmode,
9402 condition, target1, target2)));
9403 if (probability >= 0)
9404 REG_NOTES (i)
9405 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9406 GEN_INT (probability),
9407 REG_NOTES (i));
9408 if (second != NULL_RTX)
9410 i = emit_jump_insn (gen_rtx_SET
9411 (VOIDmode, pc_rtx,
9412 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9413 target2)));
9414 if (second_probability >= 0)
9415 REG_NOTES (i)
9416 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9417 GEN_INT (second_probability),
9418 REG_NOTES (i));
9420 if (label != NULL_RTX)
9421 emit_label (label);
9425 ix86_expand_setcc (enum rtx_code code, rtx dest)
9427 rtx ret, tmp, tmpreg, equiv;
9428 rtx second_test, bypass_test;
9430 if (GET_MODE (ix86_compare_op0) == DImode
9431 && !TARGET_64BIT)
9432 return 0; /* FAIL */
9434 gcc_assert (GET_MODE (dest) == QImode);
9436 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9437 PUT_MODE (ret, QImode);
9439 tmp = dest;
9440 tmpreg = dest;
9442 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9443 if (bypass_test || second_test)
9445 rtx test = second_test;
9446 int bypass = 0;
9447 rtx tmp2 = gen_reg_rtx (QImode);
9448 if (bypass_test)
9450 gcc_assert (!second_test);
9451 test = bypass_test;
9452 bypass = 1;
9453 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9455 PUT_MODE (test, QImode);
9456 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9458 if (bypass)
9459 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9460 else
9461 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9464 /* Attach a REG_EQUAL note describing the comparison result. */
9465 if (ix86_compare_op0 && ix86_compare_op1)
9467 equiv = simplify_gen_relational (code, QImode,
9468 GET_MODE (ix86_compare_op0),
9469 ix86_compare_op0, ix86_compare_op1);
9470 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9473 return 1; /* DONE */
9476 /* Expand comparison setting or clearing carry flag. Return true when
9477 successful and set pop for the operation. */
9478 static bool
9479 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9481 enum machine_mode mode =
9482 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9484 /* Do not handle DImode compares that go trought special path. Also we can't
9485 deal with FP compares yet. This is possible to add. */
9486 if ((mode == DImode && !TARGET_64BIT))
9487 return false;
9488 if (FLOAT_MODE_P (mode))
9490 rtx second_test = NULL, bypass_test = NULL;
9491 rtx compare_op, compare_seq;
9493 /* Shortcut: following common codes never translate into carry flag compares. */
9494 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9495 || code == ORDERED || code == UNORDERED)
9496 return false;
9498 /* These comparisons require zero flag; swap operands so they won't. */
9499 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9500 && !TARGET_IEEE_FP)
9502 rtx tmp = op0;
9503 op0 = op1;
9504 op1 = tmp;
9505 code = swap_condition (code);
9508 /* Try to expand the comparison and verify that we end up with carry flag
9509 based comparison. This is fails to be true only when we decide to expand
9510 comparison using arithmetic that is not too common scenario. */
9511 start_sequence ();
9512 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9513 &second_test, &bypass_test);
9514 compare_seq = get_insns ();
9515 end_sequence ();
9517 if (second_test || bypass_test)
9518 return false;
9519 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9520 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9521 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9522 else
9523 code = GET_CODE (compare_op);
9524 if (code != LTU && code != GEU)
9525 return false;
9526 emit_insn (compare_seq);
9527 *pop = compare_op;
9528 return true;
9530 if (!INTEGRAL_MODE_P (mode))
9531 return false;
9532 switch (code)
9534 case LTU:
9535 case GEU:
9536 break;
9538 /* Convert a==0 into (unsigned)a<1. */
9539 case EQ:
9540 case NE:
9541 if (op1 != const0_rtx)
9542 return false;
9543 op1 = const1_rtx;
9544 code = (code == EQ ? LTU : GEU);
9545 break;
9547 /* Convert a>b into b<a or a>=b-1. */
9548 case GTU:
9549 case LEU:
9550 if (GET_CODE (op1) == CONST_INT)
9552 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9553 /* Bail out on overflow. We still can swap operands but that
9554 would force loading of the constant into register. */
9555 if (op1 == const0_rtx
9556 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9557 return false;
9558 code = (code == GTU ? GEU : LTU);
9560 else
9562 rtx tmp = op1;
9563 op1 = op0;
9564 op0 = tmp;
9565 code = (code == GTU ? LTU : GEU);
9567 break;
9569 /* Convert a>=0 into (unsigned)a<0x80000000. */
9570 case LT:
9571 case GE:
9572 if (mode == DImode || op1 != const0_rtx)
9573 return false;
9574 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9575 code = (code == LT ? GEU : LTU);
9576 break;
9577 case LE:
9578 case GT:
9579 if (mode == DImode || op1 != constm1_rtx)
9580 return false;
9581 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9582 code = (code == LE ? GEU : LTU);
9583 break;
9585 default:
9586 return false;
9588 /* Swapping operands may cause constant to appear as first operand. */
9589 if (!nonimmediate_operand (op0, VOIDmode))
9591 if (no_new_pseudos)
9592 return false;
9593 op0 = force_reg (mode, op0);
9595 ix86_compare_op0 = op0;
9596 ix86_compare_op1 = op1;
9597 *pop = ix86_expand_compare (code, NULL, NULL);
9598 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
9599 return true;
9603 ix86_expand_int_movcc (rtx operands[])
9605 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9606 rtx compare_seq, compare_op;
9607 rtx second_test, bypass_test;
9608 enum machine_mode mode = GET_MODE (operands[0]);
9609 bool sign_bit_compare_p = false;;
9611 start_sequence ();
9612 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9613 compare_seq = get_insns ();
9614 end_sequence ();
9616 compare_code = GET_CODE (compare_op);
9618 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9619 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9620 sign_bit_compare_p = true;
9622 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9623 HImode insns, we'd be swallowed in word prefix ops. */
9625 if ((mode != HImode || TARGET_FAST_PREFIX)
9626 && (mode != DImode || TARGET_64BIT)
9627 && GET_CODE (operands[2]) == CONST_INT
9628 && GET_CODE (operands[3]) == CONST_INT)
9630 rtx out = operands[0];
9631 HOST_WIDE_INT ct = INTVAL (operands[2]);
9632 HOST_WIDE_INT cf = INTVAL (operands[3]);
9633 HOST_WIDE_INT diff;
9635 diff = ct - cf;
9636 /* Sign bit compares are better done using shifts than we do by using
9637 sbb. */
9638 if (sign_bit_compare_p
9639 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9640 ix86_compare_op1, &compare_op))
9642 /* Detect overlap between destination and compare sources. */
9643 rtx tmp = out;
9645 if (!sign_bit_compare_p)
9647 bool fpcmp = false;
9649 compare_code = GET_CODE (compare_op);
9651 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9652 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9654 fpcmp = true;
9655 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9658 /* To simplify rest of code, restrict to the GEU case. */
9659 if (compare_code == LTU)
9661 HOST_WIDE_INT tmp = ct;
9662 ct = cf;
9663 cf = tmp;
9664 compare_code = reverse_condition (compare_code);
9665 code = reverse_condition (code);
9667 else
9669 if (fpcmp)
9670 PUT_CODE (compare_op,
9671 reverse_condition_maybe_unordered
9672 (GET_CODE (compare_op)));
9673 else
9674 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9676 diff = ct - cf;
9678 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9679 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9680 tmp = gen_reg_rtx (mode);
9682 if (mode == DImode)
9683 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9684 else
9685 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9687 else
9689 if (code == GT || code == GE)
9690 code = reverse_condition (code);
9691 else
9693 HOST_WIDE_INT tmp = ct;
9694 ct = cf;
9695 cf = tmp;
9696 diff = ct - cf;
9698 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9699 ix86_compare_op1, VOIDmode, 0, -1);
9702 if (diff == 1)
9705 * cmpl op0,op1
9706 * sbbl dest,dest
9707 * [addl dest, ct]
9709 * Size 5 - 8.
9711 if (ct)
9712 tmp = expand_simple_binop (mode, PLUS,
9713 tmp, GEN_INT (ct),
9714 copy_rtx (tmp), 1, OPTAB_DIRECT);
9716 else if (cf == -1)
9719 * cmpl op0,op1
9720 * sbbl dest,dest
9721 * orl $ct, dest
9723 * Size 8.
9725 tmp = expand_simple_binop (mode, IOR,
9726 tmp, GEN_INT (ct),
9727 copy_rtx (tmp), 1, OPTAB_DIRECT);
9729 else if (diff == -1 && ct)
9732 * cmpl op0,op1
9733 * sbbl dest,dest
9734 * notl dest
9735 * [addl dest, cf]
9737 * Size 8 - 11.
9739 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9740 if (cf)
9741 tmp = expand_simple_binop (mode, PLUS,
9742 copy_rtx (tmp), GEN_INT (cf),
9743 copy_rtx (tmp), 1, OPTAB_DIRECT);
9745 else
9748 * cmpl op0,op1
9749 * sbbl dest,dest
9750 * [notl dest]
9751 * andl cf - ct, dest
9752 * [addl dest, ct]
9754 * Size 8 - 11.
9757 if (cf == 0)
9759 cf = ct;
9760 ct = 0;
9761 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9764 tmp = expand_simple_binop (mode, AND,
9765 copy_rtx (tmp),
9766 gen_int_mode (cf - ct, mode),
9767 copy_rtx (tmp), 1, OPTAB_DIRECT);
9768 if (ct)
9769 tmp = expand_simple_binop (mode, PLUS,
9770 copy_rtx (tmp), GEN_INT (ct),
9771 copy_rtx (tmp), 1, OPTAB_DIRECT);
9774 if (!rtx_equal_p (tmp, out))
9775 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9777 return 1; /* DONE */
9780 if (diff < 0)
9782 HOST_WIDE_INT tmp;
9783 tmp = ct, ct = cf, cf = tmp;
9784 diff = -diff;
9785 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9787 /* We may be reversing unordered compare to normal compare, that
9788 is not valid in general (we may convert non-trapping condition
9789 to trapping one), however on i386 we currently emit all
9790 comparisons unordered. */
9791 compare_code = reverse_condition_maybe_unordered (compare_code);
9792 code = reverse_condition_maybe_unordered (code);
9794 else
9796 compare_code = reverse_condition (compare_code);
9797 code = reverse_condition (code);
9801 compare_code = UNKNOWN;
9802 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9803 && GET_CODE (ix86_compare_op1) == CONST_INT)
9805 if (ix86_compare_op1 == const0_rtx
9806 && (code == LT || code == GE))
9807 compare_code = code;
9808 else if (ix86_compare_op1 == constm1_rtx)
9810 if (code == LE)
9811 compare_code = LT;
9812 else if (code == GT)
9813 compare_code = GE;
9817 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9818 if (compare_code != UNKNOWN
9819 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9820 && (cf == -1 || ct == -1))
9822 /* If lea code below could be used, only optimize
9823 if it results in a 2 insn sequence. */
9825 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9826 || diff == 3 || diff == 5 || diff == 9)
9827 || (compare_code == LT && ct == -1)
9828 || (compare_code == GE && cf == -1))
9831 * notl op1 (if necessary)
9832 * sarl $31, op1
9833 * orl cf, op1
9835 if (ct != -1)
9837 cf = ct;
9838 ct = -1;
9839 code = reverse_condition (code);
9842 out = emit_store_flag (out, code, ix86_compare_op0,
9843 ix86_compare_op1, VOIDmode, 0, -1);
9845 out = expand_simple_binop (mode, IOR,
9846 out, GEN_INT (cf),
9847 out, 1, OPTAB_DIRECT);
9848 if (out != operands[0])
9849 emit_move_insn (operands[0], out);
9851 return 1; /* DONE */
9856 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9857 || diff == 3 || diff == 5 || diff == 9)
9858 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9859 && (mode != DImode
9860 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9863 * xorl dest,dest
9864 * cmpl op1,op2
9865 * setcc dest
9866 * lea cf(dest*(ct-cf)),dest
9868 * Size 14.
9870 * This also catches the degenerate setcc-only case.
9873 rtx tmp;
9874 int nops;
9876 out = emit_store_flag (out, code, ix86_compare_op0,
9877 ix86_compare_op1, VOIDmode, 0, 1);
9879 nops = 0;
9880 /* On x86_64 the lea instruction operates on Pmode, so we need
9881 to get arithmetics done in proper mode to match. */
9882 if (diff == 1)
9883 tmp = copy_rtx (out);
9884 else
9886 rtx out1;
9887 out1 = copy_rtx (out);
9888 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9889 nops++;
9890 if (diff & 1)
9892 tmp = gen_rtx_PLUS (mode, tmp, out1);
9893 nops++;
9896 if (cf != 0)
9898 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9899 nops++;
9901 if (!rtx_equal_p (tmp, out))
9903 if (nops == 1)
9904 out = force_operand (tmp, copy_rtx (out));
9905 else
9906 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9908 if (!rtx_equal_p (out, operands[0]))
9909 emit_move_insn (operands[0], copy_rtx (out));
9911 return 1; /* DONE */
9915 * General case: Jumpful:
9916 * xorl dest,dest cmpl op1, op2
9917 * cmpl op1, op2 movl ct, dest
9918 * setcc dest jcc 1f
9919 * decl dest movl cf, dest
9920 * andl (cf-ct),dest 1:
9921 * addl ct,dest
9923 * Size 20. Size 14.
9925 * This is reasonably steep, but branch mispredict costs are
9926 * high on modern cpus, so consider failing only if optimizing
9927 * for space.
9930 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9931 && BRANCH_COST >= 2)
9933 if (cf == 0)
9935 cf = ct;
9936 ct = 0;
9937 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9938 /* We may be reversing unordered compare to normal compare,
9939 that is not valid in general (we may convert non-trapping
9940 condition to trapping one), however on i386 we currently
9941 emit all comparisons unordered. */
9942 code = reverse_condition_maybe_unordered (code);
9943 else
9945 code = reverse_condition (code);
9946 if (compare_code != UNKNOWN)
9947 compare_code = reverse_condition (compare_code);
9951 if (compare_code != UNKNOWN)
9953 /* notl op1 (if needed)
9954 sarl $31, op1
9955 andl (cf-ct), op1
9956 addl ct, op1
9958 For x < 0 (resp. x <= -1) there will be no notl,
9959 so if possible swap the constants to get rid of the
9960 complement.
9961 True/false will be -1/0 while code below (store flag
9962 followed by decrement) is 0/-1, so the constants need
9963 to be exchanged once more. */
9965 if (compare_code == GE || !cf)
9967 code = reverse_condition (code);
9968 compare_code = LT;
9970 else
9972 HOST_WIDE_INT tmp = cf;
9973 cf = ct;
9974 ct = tmp;
9977 out = emit_store_flag (out, code, ix86_compare_op0,
9978 ix86_compare_op1, VOIDmode, 0, -1);
9980 else
9982 out = emit_store_flag (out, code, ix86_compare_op0,
9983 ix86_compare_op1, VOIDmode, 0, 1);
9985 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9986 copy_rtx (out), 1, OPTAB_DIRECT);
9989 out = expand_simple_binop (mode, AND, copy_rtx (out),
9990 gen_int_mode (cf - ct, mode),
9991 copy_rtx (out), 1, OPTAB_DIRECT);
9992 if (ct)
9993 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9994 copy_rtx (out), 1, OPTAB_DIRECT);
9995 if (!rtx_equal_p (out, operands[0]))
9996 emit_move_insn (operands[0], copy_rtx (out));
9998 return 1; /* DONE */
10002 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10004 /* Try a few things more with specific constants and a variable. */
10006 optab op;
10007 rtx var, orig_out, out, tmp;
10009 if (BRANCH_COST <= 2)
10010 return 0; /* FAIL */
10012 /* If one of the two operands is an interesting constant, load a
10013 constant with the above and mask it in with a logical operation. */
10015 if (GET_CODE (operands[2]) == CONST_INT)
10017 var = operands[3];
10018 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10019 operands[3] = constm1_rtx, op = and_optab;
10020 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10021 operands[3] = const0_rtx, op = ior_optab;
10022 else
10023 return 0; /* FAIL */
10025 else if (GET_CODE (operands[3]) == CONST_INT)
10027 var = operands[2];
10028 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10029 operands[2] = constm1_rtx, op = and_optab;
10030 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10031 operands[2] = const0_rtx, op = ior_optab;
10032 else
10033 return 0; /* FAIL */
10035 else
10036 return 0; /* FAIL */
10038 orig_out = operands[0];
10039 tmp = gen_reg_rtx (mode);
10040 operands[0] = tmp;
10042 /* Recurse to get the constant loaded. */
10043 if (ix86_expand_int_movcc (operands) == 0)
10044 return 0; /* FAIL */
10046 /* Mask in the interesting variable. */
10047 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10048 OPTAB_WIDEN);
10049 if (!rtx_equal_p (out, orig_out))
10050 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10052 return 1; /* DONE */
10056 * For comparison with above,
10058 * movl cf,dest
10059 * movl ct,tmp
10060 * cmpl op1,op2
10061 * cmovcc tmp,dest
10063 * Size 15.
10066 if (! nonimmediate_operand (operands[2], mode))
10067 operands[2] = force_reg (mode, operands[2]);
10068 if (! nonimmediate_operand (operands[3], mode))
10069 operands[3] = force_reg (mode, operands[3]);
10071 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10073 rtx tmp = gen_reg_rtx (mode);
10074 emit_move_insn (tmp, operands[3]);
10075 operands[3] = tmp;
10077 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10079 rtx tmp = gen_reg_rtx (mode);
10080 emit_move_insn (tmp, operands[2]);
10081 operands[2] = tmp;
10084 if (! register_operand (operands[2], VOIDmode)
10085 && (mode == QImode
10086 || ! register_operand (operands[3], VOIDmode)))
10087 operands[2] = force_reg (mode, operands[2]);
10089 if (mode == QImode
10090 && ! register_operand (operands[3], VOIDmode))
10091 operands[3] = force_reg (mode, operands[3]);
10093 emit_insn (compare_seq);
10094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10095 gen_rtx_IF_THEN_ELSE (mode,
10096 compare_op, operands[2],
10097 operands[3])));
10098 if (bypass_test)
10099 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10100 gen_rtx_IF_THEN_ELSE (mode,
10101 bypass_test,
10102 copy_rtx (operands[3]),
10103 copy_rtx (operands[0]))));
10104 if (second_test)
10105 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10106 gen_rtx_IF_THEN_ELSE (mode,
10107 second_test,
10108 copy_rtx (operands[2]),
10109 copy_rtx (operands[0]))));
10111 return 1; /* DONE */
10114 /* Swap, force into registers, or otherwise massage the two operands
10115 to an sse comparison with a mask result. Thus we differ a bit from
10116 ix86_prepare_fp_compare_args which expects to produce a flags result.
10118 The DEST operand exists to help determine whether to commute commutative
10119 operators. The POP0/POP1 operands are updated in place. The new
10120 comparison code is returned, or UNKNOWN if not implementable. */
10122 static enum rtx_code
10123 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10124 rtx *pop0, rtx *pop1)
10126 rtx tmp;
10128 switch (code)
10130 case LTGT:
10131 case UNEQ:
10132 /* We have no LTGT as an operator. We could implement it with
10133 NE & ORDERED, but this requires an extra temporary. It's
10134 not clear that it's worth it. */
10135 return UNKNOWN;
10137 case LT:
10138 case LE:
10139 case UNGT:
10140 case UNGE:
10141 /* These are supported directly. */
10142 break;
10144 case EQ:
10145 case NE:
10146 case UNORDERED:
10147 case ORDERED:
10148 /* For commutative operators, try to canonicalize the destination
10149 operand to be first in the comparison - this helps reload to
10150 avoid extra moves. */
10151 if (!dest || !rtx_equal_p (dest, *pop1))
10152 break;
10153 /* FALLTHRU */
10155 case GE:
10156 case GT:
10157 case UNLE:
10158 case UNLT:
10159 /* These are not supported directly. Swap the comparison operands
10160 to transform into something that is supported. */
10161 tmp = *pop0;
10162 *pop0 = *pop1;
10163 *pop1 = tmp;
10164 code = swap_condition (code);
10165 break;
10167 default:
10168 gcc_unreachable ();
10171 return code;
10174 /* Detect conditional moves that exactly match min/max operational
10175 semantics. Note that this is IEEE safe, as long as we don't
10176 interchange the operands.
10178 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10179 and TRUE if the operation is successful and instructions are emitted. */
10181 static bool
10182 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10183 rtx cmp_op1, rtx if_true, rtx if_false)
10185 enum machine_mode mode;
10186 bool is_min;
10187 rtx tmp;
10189 if (code == LT)
10191 else if (code == UNGE)
10193 tmp = if_true;
10194 if_true = if_false;
10195 if_false = tmp;
10197 else
10198 return false;
10200 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10201 is_min = true;
10202 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10203 is_min = false;
10204 else
10205 return false;
10207 mode = GET_MODE (dest);
10209 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10210 but MODE may be a vector mode and thus not appropriate. */
10211 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10213 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10214 rtvec v;
10216 if_true = force_reg (mode, if_true);
10217 v = gen_rtvec (2, if_true, if_false);
10218 tmp = gen_rtx_UNSPEC (mode, v, u);
10220 else
10222 code = is_min ? SMIN : SMAX;
10223 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10226 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10227 return true;
10230 /* Expand an sse vector comparison. Return the register with the result. */
10232 static rtx
10233 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10234 rtx op_true, rtx op_false)
10236 enum machine_mode mode = GET_MODE (dest);
10237 rtx x;
10239 cmp_op0 = force_reg (mode, cmp_op0);
10240 if (!nonimmediate_operand (cmp_op1, mode))
10241 cmp_op1 = force_reg (mode, cmp_op1);
10243 if (optimize
10244 || reg_overlap_mentioned_p (dest, op_true)
10245 || reg_overlap_mentioned_p (dest, op_false))
10246 dest = gen_reg_rtx (mode);
10248 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10249 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10251 return dest;
10254 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10255 operations. This is used for both scalar and vector conditional moves. */
10257 static void
10258 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10260 enum machine_mode mode = GET_MODE (dest);
10261 rtx t2, t3, x;
10263 if (op_false == CONST0_RTX (mode))
10265 op_true = force_reg (mode, op_true);
10266 x = gen_rtx_AND (mode, cmp, op_true);
10267 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10269 else if (op_true == CONST0_RTX (mode))
10271 op_false = force_reg (mode, op_false);
10272 x = gen_rtx_NOT (mode, cmp);
10273 x = gen_rtx_AND (mode, x, op_false);
10274 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10276 else
10278 op_true = force_reg (mode, op_true);
10279 op_false = force_reg (mode, op_false);
10281 t2 = gen_reg_rtx (mode);
10282 if (optimize)
10283 t3 = gen_reg_rtx (mode);
10284 else
10285 t3 = dest;
10287 x = gen_rtx_AND (mode, op_true, cmp);
10288 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10290 x = gen_rtx_NOT (mode, cmp);
10291 x = gen_rtx_AND (mode, x, op_false);
10292 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10294 x = gen_rtx_IOR (mode, t3, t2);
10295 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10299 /* Expand a floating-point conditional move. Return true if successful. */
10302 ix86_expand_fp_movcc (rtx operands[])
10304 enum machine_mode mode = GET_MODE (operands[0]);
10305 enum rtx_code code = GET_CODE (operands[1]);
10306 rtx tmp, compare_op, second_test, bypass_test;
10308 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10310 enum machine_mode cmode;
10312 /* Since we've no cmove for sse registers, don't force bad register
10313 allocation just to gain access to it. Deny movcc when the
10314 comparison mode doesn't match the move mode. */
10315 cmode = GET_MODE (ix86_compare_op0);
10316 if (cmode == VOIDmode)
10317 cmode = GET_MODE (ix86_compare_op1);
10318 if (cmode != mode)
10319 return 0;
10321 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10322 &ix86_compare_op0,
10323 &ix86_compare_op1);
10324 if (code == UNKNOWN)
10325 return 0;
10327 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10328 ix86_compare_op1, operands[2],
10329 operands[3]))
10330 return 1;
10332 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10333 ix86_compare_op1, operands[2], operands[3]);
10334 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10335 return 1;
10338 /* The floating point conditional move instructions don't directly
10339 support conditions resulting from a signed integer comparison. */
10341 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10343 /* The floating point conditional move instructions don't directly
10344 support signed integer comparisons. */
10346 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10348 gcc_assert (!second_test && !bypass_test);
10349 tmp = gen_reg_rtx (QImode);
10350 ix86_expand_setcc (code, tmp);
10351 code = NE;
10352 ix86_compare_op0 = tmp;
10353 ix86_compare_op1 = const0_rtx;
10354 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10356 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10358 tmp = gen_reg_rtx (mode);
10359 emit_move_insn (tmp, operands[3]);
10360 operands[3] = tmp;
10362 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10364 tmp = gen_reg_rtx (mode);
10365 emit_move_insn (tmp, operands[2]);
10366 operands[2] = tmp;
10369 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10370 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10371 operands[2], operands[3])));
10372 if (bypass_test)
10373 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10374 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10375 operands[3], operands[0])));
10376 if (second_test)
10377 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10378 gen_rtx_IF_THEN_ELSE (mode, second_test,
10379 operands[2], operands[0])));
10381 return 1;
10384 /* Expand a floating-point vector conditional move; a vcond operation
10385 rather than a movcc operation. */
10387 bool
10388 ix86_expand_fp_vcond (rtx operands[])
10390 enum rtx_code code = GET_CODE (operands[3]);
10391 rtx cmp;
10393 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10394 &operands[4], &operands[5]);
10395 if (code == UNKNOWN)
10396 return false;
10398 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
10399 operands[5], operands[1], operands[2]))
10400 return true;
10402 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10403 operands[1], operands[2]);
10404 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10405 return true;
10408 /* Expand a signed integral vector conditional move. */
10410 bool
10411 ix86_expand_int_vcond (rtx operands[], bool unsignedp)
10413 enum machine_mode mode = GET_MODE (operands[0]);
10414 enum rtx_code code = GET_CODE (operands[3]);
10415 rtx cmp, x;
10417 if (unsignedp)
10418 code = signed_condition (code);
10419 if (code == NE || code == LE || code == GE)
10421 /* Inverse of a supported code. */
10422 x = operands[1];
10423 operands[1] = operands[2];
10424 operands[2] = x;
10425 code = reverse_condition (code);
10427 if (code == LT)
10429 /* Swap of a supported code. */
10430 x = operands[4];
10431 operands[4] = operands[5];
10432 operands[5] = x;
10433 code = swap_condition (code);
10435 gcc_assert (code == EQ || code == GT);
10437 /* Unlike floating-point, we can rely on the optimizers to have already
10438 converted to MIN/MAX expressions, so we don't have to handle that. */
10440 /* Unsigned GT is not directly supported. We can zero-extend QI and
10441 HImode elements to the next wider element size, use a signed compare,
10442 then repack. For three extra instructions, this is definitely a win. */
10443 if (code == GT && unsignedp)
10445 rtx o0l, o0h, o1l, o1h, cl, ch, zero;
10446 enum machine_mode wider;
10447 rtx (*unpackl) (rtx, rtx, rtx);
10448 rtx (*unpackh) (rtx, rtx, rtx);
10449 rtx (*pack) (rtx, rtx, rtx);
10451 switch (mode)
10453 case V16QImode:
10454 wider = V8HImode;
10455 unpackl = gen_sse2_punpcklbw;
10456 unpackh = gen_sse2_punpckhbw;
10457 pack = gen_sse2_packsswb;
10458 break;
10459 case V8HImode:
10460 wider = V4SImode;
10461 unpackl = gen_sse2_punpcklwd;
10462 unpackh = gen_sse2_punpckhwd;
10463 pack = gen_sse2_packssdw;
10464 break;
10465 default:
10466 gcc_unreachable ();
10469 operands[4] = force_reg (mode, operands[4]);
10470 operands[5] = force_reg (mode, operands[5]);
10472 o0l = gen_reg_rtx (wider);
10473 o0h = gen_reg_rtx (wider);
10474 o1l = gen_reg_rtx (wider);
10475 o1h = gen_reg_rtx (wider);
10476 cl = gen_reg_rtx (wider);
10477 ch = gen_reg_rtx (wider);
10478 cmp = gen_reg_rtx (mode);
10479 zero = force_reg (mode, CONST0_RTX (mode));
10481 emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
10482 emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
10483 emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
10484 emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
10486 x = gen_rtx_GT (wider, o0l, o1l);
10487 emit_insn (gen_rtx_SET (VOIDmode, cl, x));
10489 x = gen_rtx_GT (wider, o0h, o1h);
10490 emit_insn (gen_rtx_SET (VOIDmode, ch, x));
10492 emit_insn (pack (cmp, cl, ch));
10494 else
10495 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10496 operands[1], operands[2]);
10498 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10499 return true;
10502 /* Expand conditional increment or decrement using adb/sbb instructions.
10503 The default case using setcc followed by the conditional move can be
10504 done by generic code. */
10506 ix86_expand_int_addcc (rtx operands[])
10508 enum rtx_code code = GET_CODE (operands[1]);
10509 rtx compare_op;
10510 rtx val = const0_rtx;
10511 bool fpcmp = false;
10512 enum machine_mode mode = GET_MODE (operands[0]);
10514 if (operands[3] != const1_rtx
10515 && operands[3] != constm1_rtx)
10516 return 0;
10517 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10518 ix86_compare_op1, &compare_op))
10519 return 0;
10520 code = GET_CODE (compare_op);
10522 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10523 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10525 fpcmp = true;
10526 code = ix86_fp_compare_code_to_integer (code);
10529 if (code != LTU)
10531 val = constm1_rtx;
10532 if (fpcmp)
10533 PUT_CODE (compare_op,
10534 reverse_condition_maybe_unordered
10535 (GET_CODE (compare_op)));
10536 else
10537 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10539 PUT_MODE (compare_op, mode);
10541 /* Construct either adc or sbb insn. */
10542 if ((code == LTU) == (operands[3] == constm1_rtx))
10544 switch (GET_MODE (operands[0]))
10546 case QImode:
10547 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10548 break;
10549 case HImode:
10550 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10551 break;
10552 case SImode:
10553 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10554 break;
10555 case DImode:
10556 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10557 break;
10558 default:
10559 gcc_unreachable ();
10562 else
10564 switch (GET_MODE (operands[0]))
10566 case QImode:
10567 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10568 break;
10569 case HImode:
10570 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10571 break;
10572 case SImode:
10573 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10574 break;
10575 case DImode:
10576 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10577 break;
10578 default:
10579 gcc_unreachable ();
10582 return 1; /* DONE */
10586 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10587 works for floating pointer parameters and nonoffsetable memories.
10588 For pushes, it returns just stack offsets; the values will be saved
10589 in the right order. Maximally three parts are generated. */
10591 static int
10592 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10594 int size;
10596 if (!TARGET_64BIT)
10597 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10598 else
10599 size = (GET_MODE_SIZE (mode) + 4) / 8;
10601 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
10602 gcc_assert (size >= 2 && size <= 3);
10604 /* Optimize constant pool reference to immediates. This is used by fp
10605 moves, that force all constants to memory to allow combining. */
10606 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10608 rtx tmp = maybe_get_pool_constant (operand);
10609 if (tmp)
10610 operand = tmp;
10613 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10615 /* The only non-offsetable memories we handle are pushes. */
10616 int ok = push_operand (operand, VOIDmode);
10618 gcc_assert (ok);
10620 operand = copy_rtx (operand);
10621 PUT_MODE (operand, Pmode);
10622 parts[0] = parts[1] = parts[2] = operand;
10623 return size;
10626 if (GET_CODE (operand) == CONST_VECTOR)
10628 enum machine_mode imode = int_mode_for_mode (mode);
10629 /* Caution: if we looked through a constant pool memory above,
10630 the operand may actually have a different mode now. That's
10631 ok, since we want to pun this all the way back to an integer. */
10632 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
10633 gcc_assert (operand != NULL);
10634 mode = imode;
10637 if (!TARGET_64BIT)
10639 if (mode == DImode)
10640 split_di (&operand, 1, &parts[0], &parts[1]);
10641 else
10643 if (REG_P (operand))
10645 gcc_assert (reload_completed);
10646 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10647 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10648 if (size == 3)
10649 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10651 else if (offsettable_memref_p (operand))
10653 operand = adjust_address (operand, SImode, 0);
10654 parts[0] = operand;
10655 parts[1] = adjust_address (operand, SImode, 4);
10656 if (size == 3)
10657 parts[2] = adjust_address (operand, SImode, 8);
10659 else if (GET_CODE (operand) == CONST_DOUBLE)
10661 REAL_VALUE_TYPE r;
10662 long l[4];
10664 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10665 switch (mode)
10667 case XFmode:
10668 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10669 parts[2] = gen_int_mode (l[2], SImode);
10670 break;
10671 case DFmode:
10672 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10673 break;
10674 default:
10675 gcc_unreachable ();
10677 parts[1] = gen_int_mode (l[1], SImode);
10678 parts[0] = gen_int_mode (l[0], SImode);
10680 else
10681 gcc_unreachable ();
10684 else
10686 if (mode == TImode)
10687 split_ti (&operand, 1, &parts[0], &parts[1]);
10688 if (mode == XFmode || mode == TFmode)
10690 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10691 if (REG_P (operand))
10693 gcc_assert (reload_completed);
10694 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10695 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10697 else if (offsettable_memref_p (operand))
10699 operand = adjust_address (operand, DImode, 0);
10700 parts[0] = operand;
10701 parts[1] = adjust_address (operand, upper_mode, 8);
10703 else if (GET_CODE (operand) == CONST_DOUBLE)
10705 REAL_VALUE_TYPE r;
10706 long l[4];
10708 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10709 real_to_target (l, &r, mode);
10711 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10712 if (HOST_BITS_PER_WIDE_INT >= 64)
10713 parts[0]
10714 = gen_int_mode
10715 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10716 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10717 DImode);
10718 else
10719 parts[0] = immed_double_const (l[0], l[1], DImode);
10721 if (upper_mode == SImode)
10722 parts[1] = gen_int_mode (l[2], SImode);
10723 else if (HOST_BITS_PER_WIDE_INT >= 64)
10724 parts[1]
10725 = gen_int_mode
10726 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10727 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10728 DImode);
10729 else
10730 parts[1] = immed_double_const (l[2], l[3], DImode);
10732 else
10733 gcc_unreachable ();
10737 return size;
10740 /* Emit insns to perform a move or push of DI, DF, and XF values.
10741 Return false when normal moves are needed; true when all required
10742 insns have been emitted. Operands 2-4 contain the input values
10743 int the correct order; operands 5-7 contain the output values. */
10745 void
10746 ix86_split_long_move (rtx operands[])
10748 rtx part[2][3];
10749 int nparts;
10750 int push = 0;
10751 int collisions = 0;
10752 enum machine_mode mode = GET_MODE (operands[0]);
10754 /* The DFmode expanders may ask us to move double.
10755 For 64bit target this is single move. By hiding the fact
10756 here we simplify i386.md splitters. */
10757 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10759 /* Optimize constant pool reference to immediates. This is used by
10760 fp moves, that force all constants to memory to allow combining. */
10762 if (GET_CODE (operands[1]) == MEM
10763 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10764 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10765 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10766 if (push_operand (operands[0], VOIDmode))
10768 operands[0] = copy_rtx (operands[0]);
10769 PUT_MODE (operands[0], Pmode);
10771 else
10772 operands[0] = gen_lowpart (DImode, operands[0]);
10773 operands[1] = gen_lowpart (DImode, operands[1]);
10774 emit_move_insn (operands[0], operands[1]);
10775 return;
10778 /* The only non-offsettable memory we handle is push. */
10779 if (push_operand (operands[0], VOIDmode))
10780 push = 1;
10781 else
10782 gcc_assert (GET_CODE (operands[0]) != MEM
10783 || offsettable_memref_p (operands[0]));
10785 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10786 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10788 /* When emitting push, take care for source operands on the stack. */
10789 if (push && GET_CODE (operands[1]) == MEM
10790 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10792 if (nparts == 3)
10793 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10794 XEXP (part[1][2], 0));
10795 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10796 XEXP (part[1][1], 0));
10799 /* We need to do copy in the right order in case an address register
10800 of the source overlaps the destination. */
10801 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10803 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10804 collisions++;
10805 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10806 collisions++;
10807 if (nparts == 3
10808 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10809 collisions++;
10811 /* Collision in the middle part can be handled by reordering. */
10812 if (collisions == 1 && nparts == 3
10813 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10815 rtx tmp;
10816 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10817 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10820 /* If there are more collisions, we can't handle it by reordering.
10821 Do an lea to the last part and use only one colliding move. */
10822 else if (collisions > 1)
10824 rtx base;
10826 collisions = 1;
10828 base = part[0][nparts - 1];
10830 /* Handle the case when the last part isn't valid for lea.
10831 Happens in 64-bit mode storing the 12-byte XFmode. */
10832 if (GET_MODE (base) != Pmode)
10833 base = gen_rtx_REG (Pmode, REGNO (base));
10835 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10836 part[1][0] = replace_equiv_address (part[1][0], base);
10837 part[1][1] = replace_equiv_address (part[1][1],
10838 plus_constant (base, UNITS_PER_WORD));
10839 if (nparts == 3)
10840 part[1][2] = replace_equiv_address (part[1][2],
10841 plus_constant (base, 8));
10845 if (push)
10847 if (!TARGET_64BIT)
10849 if (nparts == 3)
10851 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10852 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10853 emit_move_insn (part[0][2], part[1][2]);
10856 else
10858 /* In 64bit mode we don't have 32bit push available. In case this is
10859 register, it is OK - we will just use larger counterpart. We also
10860 retype memory - these comes from attempt to avoid REX prefix on
10861 moving of second half of TFmode value. */
10862 if (GET_MODE (part[1][1]) == SImode)
10864 switch (GET_CODE (part[1][1]))
10866 case MEM:
10867 part[1][1] = adjust_address (part[1][1], DImode, 0);
10868 break;
10870 case REG:
10871 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10872 break;
10874 default:
10875 gcc_unreachable ();
10878 if (GET_MODE (part[1][0]) == SImode)
10879 part[1][0] = part[1][1];
10882 emit_move_insn (part[0][1], part[1][1]);
10883 emit_move_insn (part[0][0], part[1][0]);
10884 return;
10887 /* Choose correct order to not overwrite the source before it is copied. */
10888 if ((REG_P (part[0][0])
10889 && REG_P (part[1][1])
10890 && (REGNO (part[0][0]) == REGNO (part[1][1])
10891 || (nparts == 3
10892 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10893 || (collisions > 0
10894 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10896 if (nparts == 3)
10898 operands[2] = part[0][2];
10899 operands[3] = part[0][1];
10900 operands[4] = part[0][0];
10901 operands[5] = part[1][2];
10902 operands[6] = part[1][1];
10903 operands[7] = part[1][0];
10905 else
10907 operands[2] = part[0][1];
10908 operands[3] = part[0][0];
10909 operands[5] = part[1][1];
10910 operands[6] = part[1][0];
10913 else
10915 if (nparts == 3)
10917 operands[2] = part[0][0];
10918 operands[3] = part[0][1];
10919 operands[4] = part[0][2];
10920 operands[5] = part[1][0];
10921 operands[6] = part[1][1];
10922 operands[7] = part[1][2];
10924 else
10926 operands[2] = part[0][0];
10927 operands[3] = part[0][1];
10928 operands[5] = part[1][0];
10929 operands[6] = part[1][1];
10933 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10934 if (optimize_size)
10936 if (GET_CODE (operands[5]) == CONST_INT
10937 && operands[5] != const0_rtx
10938 && REG_P (operands[2]))
10940 if (GET_CODE (operands[6]) == CONST_INT
10941 && INTVAL (operands[6]) == INTVAL (operands[5]))
10942 operands[6] = operands[2];
10944 if (nparts == 3
10945 && GET_CODE (operands[7]) == CONST_INT
10946 && INTVAL (operands[7]) == INTVAL (operands[5]))
10947 operands[7] = operands[2];
10950 if (nparts == 3
10951 && GET_CODE (operands[6]) == CONST_INT
10952 && operands[6] != const0_rtx
10953 && REG_P (operands[3])
10954 && GET_CODE (operands[7]) == CONST_INT
10955 && INTVAL (operands[7]) == INTVAL (operands[6]))
10956 operands[7] = operands[3];
10959 emit_move_insn (operands[2], operands[5]);
10960 emit_move_insn (operands[3], operands[6]);
10961 if (nparts == 3)
10962 emit_move_insn (operands[4], operands[7]);
10964 return;
10967 /* Helper function of ix86_split_ashldi used to generate an SImode
10968 left shift by a constant, either using a single shift or
10969 a sequence of add instructions. */
10971 static void
10972 ix86_expand_ashlsi3_const (rtx operand, int count)
10974 if (count == 1)
10975 emit_insn (gen_addsi3 (operand, operand, operand));
10976 else if (!optimize_size
10977 && count * ix86_cost->add <= ix86_cost->shift_const)
10979 int i;
10980 for (i=0; i<count; i++)
10981 emit_insn (gen_addsi3 (operand, operand, operand));
10983 else
10984 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10987 void
10988 ix86_split_ashldi (rtx *operands, rtx scratch)
10990 rtx low[2], high[2];
10991 int count;
10993 if (GET_CODE (operands[2]) == CONST_INT)
10995 split_di (operands, 2, low, high);
10996 count = INTVAL (operands[2]) & 63;
10998 if (count >= 32)
11000 emit_move_insn (high[0], low[1]);
11001 emit_move_insn (low[0], const0_rtx);
11003 if (count > 32)
11004 ix86_expand_ashlsi3_const (high[0], count - 32);
11006 else
11008 if (!rtx_equal_p (operands[0], operands[1]))
11009 emit_move_insn (operands[0], operands[1]);
11010 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
11011 ix86_expand_ashlsi3_const (low[0], count);
11013 return;
11016 split_di (operands, 1, low, high);
11018 if (operands[1] == const1_rtx)
11020 /* Assuming we've chosen a QImode capable registers, then 1LL << N
11021 can be done with two 32-bit shifts, no branches, no cmoves. */
11022 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11024 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11026 ix86_expand_clear (low[0]);
11027 ix86_expand_clear (high[0]);
11028 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
11030 d = gen_lowpart (QImode, low[0]);
11031 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11032 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11033 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11035 d = gen_lowpart (QImode, high[0]);
11036 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11037 s = gen_rtx_NE (QImode, flags, const0_rtx);
11038 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11041 /* Otherwise, we can get the same results by manually performing
11042 a bit extract operation on bit 5, and then performing the two
11043 shifts. The two methods of getting 0/1 into low/high are exactly
11044 the same size. Avoiding the shift in the bit extract case helps
11045 pentium4 a bit; no one else seems to care much either way. */
11046 else
11048 rtx x;
11050 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11051 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
11052 else
11053 x = gen_lowpart (SImode, operands[2]);
11054 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11056 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
11057 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
11058 emit_move_insn (low[0], high[0]);
11059 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
11062 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11063 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
11064 return;
11067 if (operands[1] == constm1_rtx)
11069 /* For -1LL << N, we can avoid the shld instruction, because we
11070 know that we're shifting 0...31 ones into a -1. */
11071 emit_move_insn (low[0], constm1_rtx);
11072 if (optimize_size)
11073 emit_move_insn (high[0], low[0]);
11074 else
11075 emit_move_insn (high[0], constm1_rtx);
11077 else
11079 if (!rtx_equal_p (operands[0], operands[1]))
11080 emit_move_insn (operands[0], operands[1]);
11082 split_di (operands, 1, low, high);
11083 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
11086 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11088 if (TARGET_CMOVE && scratch)
11090 ix86_expand_clear (scratch);
11091 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
11093 else
11094 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11097 void
11098 ix86_split_ashrdi (rtx *operands, rtx scratch)
11100 rtx low[2], high[2];
11101 int count;
11103 if (GET_CODE (operands[2]) == CONST_INT)
11105 split_di (operands, 2, low, high);
11106 count = INTVAL (operands[2]) & 63;
11108 if (count == 63)
11110 emit_move_insn (high[0], high[1]);
11111 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11112 emit_move_insn (low[0], high[0]);
11115 else if (count >= 32)
11117 emit_move_insn (low[0], high[1]);
11118 emit_move_insn (high[0], low[0]);
11119 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11120 if (count > 32)
11121 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11123 else
11125 if (!rtx_equal_p (operands[0], operands[1]))
11126 emit_move_insn (operands[0], operands[1]);
11127 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11128 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11131 else
11133 if (!rtx_equal_p (operands[0], operands[1]))
11134 emit_move_insn (operands[0], operands[1]);
11136 split_di (operands, 1, low, high);
11138 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11139 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11141 if (TARGET_CMOVE && scratch)
11143 emit_move_insn (scratch, high[0]);
11144 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11145 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11146 scratch));
11148 else
11149 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11153 void
11154 ix86_split_lshrdi (rtx *operands, rtx scratch)
11156 rtx low[2], high[2];
11157 int count;
11159 if (GET_CODE (operands[2]) == CONST_INT)
11161 split_di (operands, 2, low, high);
11162 count = INTVAL (operands[2]) & 63;
11164 if (count >= 32)
11166 emit_move_insn (low[0], high[1]);
11167 ix86_expand_clear (high[0]);
11169 if (count > 32)
11170 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11172 else
11174 if (!rtx_equal_p (operands[0], operands[1]))
11175 emit_move_insn (operands[0], operands[1]);
11176 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11177 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11180 else
11182 if (!rtx_equal_p (operands[0], operands[1]))
11183 emit_move_insn (operands[0], operands[1]);
11185 split_di (operands, 1, low, high);
11187 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11188 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11190 /* Heh. By reversing the arguments, we can reuse this pattern. */
11191 if (TARGET_CMOVE && scratch)
11193 ix86_expand_clear (scratch);
11194 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11195 scratch));
11197 else
11198 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11202 /* Helper function for the string operations below. Dest VARIABLE whether
11203 it is aligned to VALUE bytes. If true, jump to the label. */
11204 static rtx
11205 ix86_expand_aligntest (rtx variable, int value)
11207 rtx label = gen_label_rtx ();
11208 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11209 if (GET_MODE (variable) == DImode)
11210 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11211 else
11212 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11213 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11214 1, label);
11215 return label;
11218 /* Adjust COUNTER by the VALUE. */
11219 static void
11220 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11222 if (GET_MODE (countreg) == DImode)
11223 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11224 else
11225 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11228 /* Zero extend possibly SImode EXP to Pmode register. */
11230 ix86_zero_extend_to_Pmode (rtx exp)
11232 rtx r;
11233 if (GET_MODE (exp) == VOIDmode)
11234 return force_reg (Pmode, exp);
11235 if (GET_MODE (exp) == Pmode)
11236 return copy_to_mode_reg (Pmode, exp);
11237 r = gen_reg_rtx (Pmode);
11238 emit_insn (gen_zero_extendsidi2 (r, exp));
11239 return r;
11242 /* Expand string move (memcpy) operation. Use i386 string operations when
11243 profitable. expand_clrmem contains similar code. */
11245 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11247 rtx srcreg, destreg, countreg, srcexp, destexp;
11248 enum machine_mode counter_mode;
11249 HOST_WIDE_INT align = 0;
11250 unsigned HOST_WIDE_INT count = 0;
11252 if (GET_CODE (align_exp) == CONST_INT)
11253 align = INTVAL (align_exp);
11255 /* Can't use any of this if the user has appropriated esi or edi. */
11256 if (global_regs[4] || global_regs[5])
11257 return 0;
11259 /* This simple hack avoids all inlining code and simplifies code below. */
11260 if (!TARGET_ALIGN_STRINGOPS)
11261 align = 64;
11263 if (GET_CODE (count_exp) == CONST_INT)
11265 count = INTVAL (count_exp);
11266 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11267 return 0;
11270 /* Figure out proper mode for counter. For 32bits it is always SImode,
11271 for 64bits use SImode when possible, otherwise DImode.
11272 Set count to number of bytes copied when known at compile time. */
11273 if (!TARGET_64BIT
11274 || GET_MODE (count_exp) == SImode
11275 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11276 counter_mode = SImode;
11277 else
11278 counter_mode = DImode;
11280 gcc_assert (counter_mode == SImode || counter_mode == DImode);
11282 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11283 if (destreg != XEXP (dst, 0))
11284 dst = replace_equiv_address_nv (dst, destreg);
11285 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11286 if (srcreg != XEXP (src, 0))
11287 src = replace_equiv_address_nv (src, srcreg);
11289 /* When optimizing for size emit simple rep ; movsb instruction for
11290 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11291 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11292 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11293 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11294 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11295 known to be zero or not. The rep; movsb sequence causes higher
11296 register pressure though, so take that into account. */
11298 if ((!optimize || optimize_size)
11299 && (count == 0
11300 || ((count & 0x03)
11301 && (!optimize_size
11302 || count > 5 * 4
11303 || (count & 3) + count / 4 > 6))))
11305 emit_insn (gen_cld ());
11306 countreg = ix86_zero_extend_to_Pmode (count_exp);
11307 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11308 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11309 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11310 destexp, srcexp));
11313 /* For constant aligned (or small unaligned) copies use rep movsl
11314 followed by code copying the rest. For PentiumPro ensure 8 byte
11315 alignment to allow rep movsl acceleration. */
11317 else if (count != 0
11318 && (align >= 8
11319 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11320 || optimize_size || count < (unsigned int) 64))
11322 unsigned HOST_WIDE_INT offset = 0;
11323 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11324 rtx srcmem, dstmem;
11326 emit_insn (gen_cld ());
11327 if (count & ~(size - 1))
11329 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
11331 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
11333 while (offset < (count & ~(size - 1)))
11335 srcmem = adjust_automodify_address_nv (src, movs_mode,
11336 srcreg, offset);
11337 dstmem = adjust_automodify_address_nv (dst, movs_mode,
11338 destreg, offset);
11339 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11340 offset += size;
11343 else
11345 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
11346 & (TARGET_64BIT ? -1 : 0x3fffffff));
11347 countreg = copy_to_mode_reg (counter_mode, countreg);
11348 countreg = ix86_zero_extend_to_Pmode (countreg);
11350 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11351 GEN_INT (size == 4 ? 2 : 3));
11352 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11353 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11355 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11356 countreg, destexp, srcexp));
11357 offset = count & ~(size - 1);
11360 if (size == 8 && (count & 0x04))
11362 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11363 offset);
11364 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11365 offset);
11366 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11367 offset += 4;
11369 if (count & 0x02)
11371 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11372 offset);
11373 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11374 offset);
11375 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11376 offset += 2;
11378 if (count & 0x01)
11380 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11381 offset);
11382 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11383 offset);
11384 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11387 /* The generic code based on the glibc implementation:
11388 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11389 allowing accelerated copying there)
11390 - copy the data using rep movsl
11391 - copy the rest. */
11392 else
11394 rtx countreg2;
11395 rtx label = NULL;
11396 rtx srcmem, dstmem;
11397 int desired_alignment = (TARGET_PENTIUMPRO
11398 && (count == 0 || count >= (unsigned int) 260)
11399 ? 8 : UNITS_PER_WORD);
11400 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11401 dst = change_address (dst, BLKmode, destreg);
11402 src = change_address (src, BLKmode, srcreg);
11404 /* In case we don't know anything about the alignment, default to
11405 library version, since it is usually equally fast and result in
11406 shorter code.
11408 Also emit call when we know that the count is large and call overhead
11409 will not be important. */
11410 if (!TARGET_INLINE_ALL_STRINGOPS
11411 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11412 return 0;
11414 if (TARGET_SINGLE_STRINGOP)
11415 emit_insn (gen_cld ());
11417 countreg2 = gen_reg_rtx (Pmode);
11418 countreg = copy_to_mode_reg (counter_mode, count_exp);
11420 /* We don't use loops to align destination and to copy parts smaller
11421 than 4 bytes, because gcc is able to optimize such code better (in
11422 the case the destination or the count really is aligned, gcc is often
11423 able to predict the branches) and also it is friendlier to the
11424 hardware branch prediction.
11426 Using loops is beneficial for generic case, because we can
11427 handle small counts using the loops. Many CPUs (such as Athlon)
11428 have large REP prefix setup costs.
11430 This is quite costly. Maybe we can revisit this decision later or
11431 add some customizability to this code. */
11433 if (count == 0 && align < desired_alignment)
11435 label = gen_label_rtx ();
11436 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11437 LEU, 0, counter_mode, 1, label);
11439 if (align <= 1)
11441 rtx label = ix86_expand_aligntest (destreg, 1);
11442 srcmem = change_address (src, QImode, srcreg);
11443 dstmem = change_address (dst, QImode, destreg);
11444 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11445 ix86_adjust_counter (countreg, 1);
11446 emit_label (label);
11447 LABEL_NUSES (label) = 1;
11449 if (align <= 2)
11451 rtx label = ix86_expand_aligntest (destreg, 2);
11452 srcmem = change_address (src, HImode, srcreg);
11453 dstmem = change_address (dst, HImode, destreg);
11454 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11455 ix86_adjust_counter (countreg, 2);
11456 emit_label (label);
11457 LABEL_NUSES (label) = 1;
11459 if (align <= 4 && desired_alignment > 4)
11461 rtx label = ix86_expand_aligntest (destreg, 4);
11462 srcmem = change_address (src, SImode, srcreg);
11463 dstmem = change_address (dst, SImode, destreg);
11464 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11465 ix86_adjust_counter (countreg, 4);
11466 emit_label (label);
11467 LABEL_NUSES (label) = 1;
11470 if (label && desired_alignment > 4 && !TARGET_64BIT)
11472 emit_label (label);
11473 LABEL_NUSES (label) = 1;
11474 label = NULL_RTX;
11476 if (!TARGET_SINGLE_STRINGOP)
11477 emit_insn (gen_cld ());
11478 if (TARGET_64BIT)
11480 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11481 GEN_INT (3)));
11482 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11484 else
11486 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11487 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11489 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11490 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11491 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11492 countreg2, destexp, srcexp));
11494 if (label)
11496 emit_label (label);
11497 LABEL_NUSES (label) = 1;
11499 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11501 srcmem = change_address (src, SImode, srcreg);
11502 dstmem = change_address (dst, SImode, destreg);
11503 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11505 if ((align <= 4 || count == 0) && TARGET_64BIT)
11507 rtx label = ix86_expand_aligntest (countreg, 4);
11508 srcmem = change_address (src, SImode, srcreg);
11509 dstmem = change_address (dst, SImode, destreg);
11510 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11511 emit_label (label);
11512 LABEL_NUSES (label) = 1;
11514 if (align > 2 && count != 0 && (count & 2))
11516 srcmem = change_address (src, HImode, srcreg);
11517 dstmem = change_address (dst, HImode, destreg);
11518 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11520 if (align <= 2 || count == 0)
11522 rtx label = ix86_expand_aligntest (countreg, 2);
11523 srcmem = change_address (src, HImode, srcreg);
11524 dstmem = change_address (dst, HImode, destreg);
11525 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11526 emit_label (label);
11527 LABEL_NUSES (label) = 1;
11529 if (align > 1 && count != 0 && (count & 1))
11531 srcmem = change_address (src, QImode, srcreg);
11532 dstmem = change_address (dst, QImode, destreg);
11533 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11535 if (align <= 1 || count == 0)
11537 rtx label = ix86_expand_aligntest (countreg, 1);
11538 srcmem = change_address (src, QImode, srcreg);
11539 dstmem = change_address (dst, QImode, destreg);
11540 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11541 emit_label (label);
11542 LABEL_NUSES (label) = 1;
11546 return 1;
11549 /* Expand string clear operation (bzero). Use i386 string operations when
11550 profitable. expand_movmem contains similar code. */
11552 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11554 rtx destreg, zeroreg, countreg, destexp;
11555 enum machine_mode counter_mode;
11556 HOST_WIDE_INT align = 0;
11557 unsigned HOST_WIDE_INT count = 0;
11559 if (GET_CODE (align_exp) == CONST_INT)
11560 align = INTVAL (align_exp);
11562 /* Can't use any of this if the user has appropriated esi. */
11563 if (global_regs[4])
11564 return 0;
11566 /* This simple hack avoids all inlining code and simplifies code below. */
11567 if (!TARGET_ALIGN_STRINGOPS)
11568 align = 32;
11570 if (GET_CODE (count_exp) == CONST_INT)
11572 count = INTVAL (count_exp);
11573 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11574 return 0;
11576 /* Figure out proper mode for counter. For 32bits it is always SImode,
11577 for 64bits use SImode when possible, otherwise DImode.
11578 Set count to number of bytes copied when known at compile time. */
11579 if (!TARGET_64BIT
11580 || GET_MODE (count_exp) == SImode
11581 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11582 counter_mode = SImode;
11583 else
11584 counter_mode = DImode;
11586 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11587 if (destreg != XEXP (dst, 0))
11588 dst = replace_equiv_address_nv (dst, destreg);
11591 /* When optimizing for size emit simple rep ; movsb instruction for
11592 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11593 sequence is 7 bytes long, so if optimizing for size and count is
11594 small enough that some stosl, stosw and stosb instructions without
11595 rep are shorter, fall back into the next if. */
11597 if ((!optimize || optimize_size)
11598 && (count == 0
11599 || ((count & 0x03)
11600 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11602 emit_insn (gen_cld ());
11604 countreg = ix86_zero_extend_to_Pmode (count_exp);
11605 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11606 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11607 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11609 else if (count != 0
11610 && (align >= 8
11611 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11612 || optimize_size || count < (unsigned int) 64))
11614 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11615 unsigned HOST_WIDE_INT offset = 0;
11617 emit_insn (gen_cld ());
11619 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11620 if (count & ~(size - 1))
11622 unsigned HOST_WIDE_INT repcount;
11623 unsigned int max_nonrep;
11625 repcount = count >> (size == 4 ? 2 : 3);
11626 if (!TARGET_64BIT)
11627 repcount &= 0x3fffffff;
11629 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11630 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11631 bytes. In both cases the latter seems to be faster for small
11632 values of N. */
11633 max_nonrep = size == 4 ? 7 : 4;
11634 if (!optimize_size)
11635 switch (ix86_tune)
11637 case PROCESSOR_PENTIUM4:
11638 case PROCESSOR_NOCONA:
11639 max_nonrep = 3;
11640 break;
11641 default:
11642 break;
11645 if (repcount <= max_nonrep)
11646 while (repcount-- > 0)
11648 rtx mem = adjust_automodify_address_nv (dst,
11649 GET_MODE (zeroreg),
11650 destreg, offset);
11651 emit_insn (gen_strset (destreg, mem, zeroreg));
11652 offset += size;
11654 else
11656 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11657 countreg = ix86_zero_extend_to_Pmode (countreg);
11658 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11659 GEN_INT (size == 4 ? 2 : 3));
11660 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11661 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11662 destexp));
11663 offset = count & ~(size - 1);
11666 if (size == 8 && (count & 0x04))
11668 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11669 offset);
11670 emit_insn (gen_strset (destreg, mem,
11671 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11672 offset += 4;
11674 if (count & 0x02)
11676 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11677 offset);
11678 emit_insn (gen_strset (destreg, mem,
11679 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11680 offset += 2;
11682 if (count & 0x01)
11684 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11685 offset);
11686 emit_insn (gen_strset (destreg, mem,
11687 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11690 else
11692 rtx countreg2;
11693 rtx label = NULL;
11694 /* Compute desired alignment of the string operation. */
11695 int desired_alignment = (TARGET_PENTIUMPRO
11696 && (count == 0 || count >= (unsigned int) 260)
11697 ? 8 : UNITS_PER_WORD);
11699 /* In case we don't know anything about the alignment, default to
11700 library version, since it is usually equally fast and result in
11701 shorter code.
11703 Also emit call when we know that the count is large and call overhead
11704 will not be important. */
11705 if (!TARGET_INLINE_ALL_STRINGOPS
11706 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11707 return 0;
11709 if (TARGET_SINGLE_STRINGOP)
11710 emit_insn (gen_cld ());
11712 countreg2 = gen_reg_rtx (Pmode);
11713 countreg = copy_to_mode_reg (counter_mode, count_exp);
11714 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11715 /* Get rid of MEM_OFFSET, it won't be accurate. */
11716 dst = change_address (dst, BLKmode, destreg);
11718 if (count == 0 && align < desired_alignment)
11720 label = gen_label_rtx ();
11721 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11722 LEU, 0, counter_mode, 1, label);
11724 if (align <= 1)
11726 rtx label = ix86_expand_aligntest (destreg, 1);
11727 emit_insn (gen_strset (destreg, dst,
11728 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11729 ix86_adjust_counter (countreg, 1);
11730 emit_label (label);
11731 LABEL_NUSES (label) = 1;
11733 if (align <= 2)
11735 rtx label = ix86_expand_aligntest (destreg, 2);
11736 emit_insn (gen_strset (destreg, dst,
11737 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11738 ix86_adjust_counter (countreg, 2);
11739 emit_label (label);
11740 LABEL_NUSES (label) = 1;
11742 if (align <= 4 && desired_alignment > 4)
11744 rtx label = ix86_expand_aligntest (destreg, 4);
11745 emit_insn (gen_strset (destreg, dst,
11746 (TARGET_64BIT
11747 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11748 : zeroreg)));
11749 ix86_adjust_counter (countreg, 4);
11750 emit_label (label);
11751 LABEL_NUSES (label) = 1;
11754 if (label && desired_alignment > 4 && !TARGET_64BIT)
11756 emit_label (label);
11757 LABEL_NUSES (label) = 1;
11758 label = NULL_RTX;
11761 if (!TARGET_SINGLE_STRINGOP)
11762 emit_insn (gen_cld ());
11763 if (TARGET_64BIT)
11765 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11766 GEN_INT (3)));
11767 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11769 else
11771 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11772 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11774 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11775 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11777 if (label)
11779 emit_label (label);
11780 LABEL_NUSES (label) = 1;
11783 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11784 emit_insn (gen_strset (destreg, dst,
11785 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11786 if (TARGET_64BIT && (align <= 4 || count == 0))
11788 rtx label = ix86_expand_aligntest (countreg, 4);
11789 emit_insn (gen_strset (destreg, dst,
11790 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11791 emit_label (label);
11792 LABEL_NUSES (label) = 1;
11794 if (align > 2 && count != 0 && (count & 2))
11795 emit_insn (gen_strset (destreg, dst,
11796 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11797 if (align <= 2 || count == 0)
11799 rtx label = ix86_expand_aligntest (countreg, 2);
11800 emit_insn (gen_strset (destreg, dst,
11801 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11802 emit_label (label);
11803 LABEL_NUSES (label) = 1;
11805 if (align > 1 && count != 0 && (count & 1))
11806 emit_insn (gen_strset (destreg, dst,
11807 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11808 if (align <= 1 || count == 0)
11810 rtx label = ix86_expand_aligntest (countreg, 1);
11811 emit_insn (gen_strset (destreg, dst,
11812 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11813 emit_label (label);
11814 LABEL_NUSES (label) = 1;
11817 return 1;
11820 /* Expand strlen. */
11822 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11824 rtx addr, scratch1, scratch2, scratch3, scratch4;
11826 /* The generic case of strlen expander is long. Avoid it's
11827 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11829 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11830 && !TARGET_INLINE_ALL_STRINGOPS
11831 && !optimize_size
11832 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11833 return 0;
11835 addr = force_reg (Pmode, XEXP (src, 0));
11836 scratch1 = gen_reg_rtx (Pmode);
11838 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11839 && !optimize_size)
11841 /* Well it seems that some optimizer does not combine a call like
11842 foo(strlen(bar), strlen(bar));
11843 when the move and the subtraction is done here. It does calculate
11844 the length just once when these instructions are done inside of
11845 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11846 often used and I use one fewer register for the lifetime of
11847 output_strlen_unroll() this is better. */
11849 emit_move_insn (out, addr);
11851 ix86_expand_strlensi_unroll_1 (out, src, align);
11853 /* strlensi_unroll_1 returns the address of the zero at the end of
11854 the string, like memchr(), so compute the length by subtracting
11855 the start address. */
11856 if (TARGET_64BIT)
11857 emit_insn (gen_subdi3 (out, out, addr));
11858 else
11859 emit_insn (gen_subsi3 (out, out, addr));
11861 else
11863 rtx unspec;
11864 scratch2 = gen_reg_rtx (Pmode);
11865 scratch3 = gen_reg_rtx (Pmode);
11866 scratch4 = force_reg (Pmode, constm1_rtx);
11868 emit_move_insn (scratch3, addr);
11869 eoschar = force_reg (QImode, eoschar);
11871 emit_insn (gen_cld ());
11872 src = replace_equiv_address_nv (src, scratch3);
11874 /* If .md starts supporting :P, this can be done in .md. */
11875 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11876 scratch4), UNSPEC_SCAS);
11877 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11878 if (TARGET_64BIT)
11880 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11881 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11883 else
11885 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11886 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11889 return 1;
11892 /* Expand the appropriate insns for doing strlen if not just doing
11893 repnz; scasb
11895 out = result, initialized with the start address
11896 align_rtx = alignment of the address.
11897 scratch = scratch register, initialized with the startaddress when
11898 not aligned, otherwise undefined
11900 This is just the body. It needs the initializations mentioned above and
11901 some address computing at the end. These things are done in i386.md. */
11903 static void
11904 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11906 int align;
11907 rtx tmp;
11908 rtx align_2_label = NULL_RTX;
11909 rtx align_3_label = NULL_RTX;
11910 rtx align_4_label = gen_label_rtx ();
11911 rtx end_0_label = gen_label_rtx ();
11912 rtx mem;
11913 rtx tmpreg = gen_reg_rtx (SImode);
11914 rtx scratch = gen_reg_rtx (SImode);
11915 rtx cmp;
11917 align = 0;
11918 if (GET_CODE (align_rtx) == CONST_INT)
11919 align = INTVAL (align_rtx);
11921 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11923 /* Is there a known alignment and is it less than 4? */
11924 if (align < 4)
11926 rtx scratch1 = gen_reg_rtx (Pmode);
11927 emit_move_insn (scratch1, out);
11928 /* Is there a known alignment and is it not 2? */
11929 if (align != 2)
11931 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11932 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11934 /* Leave just the 3 lower bits. */
11935 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11936 NULL_RTX, 0, OPTAB_WIDEN);
11938 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11939 Pmode, 1, align_4_label);
11940 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11941 Pmode, 1, align_2_label);
11942 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11943 Pmode, 1, align_3_label);
11945 else
11947 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11948 check if is aligned to 4 - byte. */
11950 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11951 NULL_RTX, 0, OPTAB_WIDEN);
11953 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11954 Pmode, 1, align_4_label);
11957 mem = change_address (src, QImode, out);
11959 /* Now compare the bytes. */
11961 /* Compare the first n unaligned byte on a byte per byte basis. */
11962 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11963 QImode, 1, end_0_label);
11965 /* Increment the address. */
11966 if (TARGET_64BIT)
11967 emit_insn (gen_adddi3 (out, out, const1_rtx));
11968 else
11969 emit_insn (gen_addsi3 (out, out, const1_rtx));
11971 /* Not needed with an alignment of 2 */
11972 if (align != 2)
11974 emit_label (align_2_label);
11976 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11977 end_0_label);
11979 if (TARGET_64BIT)
11980 emit_insn (gen_adddi3 (out, out, const1_rtx));
11981 else
11982 emit_insn (gen_addsi3 (out, out, const1_rtx));
11984 emit_label (align_3_label);
11987 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11988 end_0_label);
11990 if (TARGET_64BIT)
11991 emit_insn (gen_adddi3 (out, out, const1_rtx));
11992 else
11993 emit_insn (gen_addsi3 (out, out, const1_rtx));
11996 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11997 align this loop. It gives only huge programs, but does not help to
11998 speed up. */
11999 emit_label (align_4_label);
12001 mem = change_address (src, SImode, out);
12002 emit_move_insn (scratch, mem);
12003 if (TARGET_64BIT)
12004 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12005 else
12006 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12008 /* This formula yields a nonzero result iff one of the bytes is zero.
12009 This saves three branches inside loop and many cycles. */
12011 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12012 emit_insn (gen_one_cmplsi2 (scratch, scratch));
12013 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12014 emit_insn (gen_andsi3 (tmpreg, tmpreg,
12015 gen_int_mode (0x80808080, SImode)));
12016 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12017 align_4_label);
12019 if (TARGET_CMOVE)
12021 rtx reg = gen_reg_rtx (SImode);
12022 rtx reg2 = gen_reg_rtx (Pmode);
12023 emit_move_insn (reg, tmpreg);
12024 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12026 /* If zero is not in the first two bytes, move two bytes forward. */
12027 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12028 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12029 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12030 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12031 gen_rtx_IF_THEN_ELSE (SImode, tmp,
12032 reg,
12033 tmpreg)));
12034 /* Emit lea manually to avoid clobbering of flags. */
12035 emit_insn (gen_rtx_SET (SImode, reg2,
12036 gen_rtx_PLUS (Pmode, out, const2_rtx)));
12038 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12039 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12040 emit_insn (gen_rtx_SET (VOIDmode, out,
12041 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12042 reg2,
12043 out)));
12046 else
12048 rtx end_2_label = gen_label_rtx ();
12049 /* Is zero in the first two bytes? */
12051 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12052 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12053 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12054 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12055 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12056 pc_rtx);
12057 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12058 JUMP_LABEL (tmp) = end_2_label;
12060 /* Not in the first two. Move two bytes forward. */
12061 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12062 if (TARGET_64BIT)
12063 emit_insn (gen_adddi3 (out, out, const2_rtx));
12064 else
12065 emit_insn (gen_addsi3 (out, out, const2_rtx));
12067 emit_label (end_2_label);
12071 /* Avoid branch in fixing the byte. */
12072 tmpreg = gen_lowpart (QImode, tmpreg);
12073 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12074 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12075 if (TARGET_64BIT)
12076 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12077 else
12078 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12080 emit_label (end_0_label);
12083 void
12084 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12085 rtx callarg2 ATTRIBUTE_UNUSED,
12086 rtx pop, int sibcall)
12088 rtx use = NULL, call;
12090 if (pop == const0_rtx)
12091 pop = NULL;
12092 gcc_assert (!TARGET_64BIT || !pop);
12094 #if TARGET_MACHO
12095 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12096 fnaddr = machopic_indirect_call_target (fnaddr);
12097 #else
12098 /* Static functions and indirect calls don't need the pic register. */
12099 if (! TARGET_64BIT && flag_pic
12100 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12101 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12102 use_reg (&use, pic_offset_table_rtx);
12104 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12106 rtx al = gen_rtx_REG (QImode, 0);
12107 emit_move_insn (al, callarg2);
12108 use_reg (&use, al);
12110 #endif /* TARGET_MACHO */
12112 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12114 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12115 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12117 if (sibcall && TARGET_64BIT
12118 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12120 rtx addr;
12121 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12122 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12123 emit_move_insn (fnaddr, addr);
12124 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12127 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12128 if (retval)
12129 call = gen_rtx_SET (VOIDmode, retval, call);
12130 if (pop)
12132 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12133 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12134 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12137 call = emit_call_insn (call);
12138 if (use)
12139 CALL_INSN_FUNCTION_USAGE (call) = use;
12143 /* Clear stack slot assignments remembered from previous functions.
12144 This is called from INIT_EXPANDERS once before RTL is emitted for each
12145 function. */
12147 static struct machine_function *
12148 ix86_init_machine_status (void)
12150 struct machine_function *f;
12152 f = ggc_alloc_cleared (sizeof (struct machine_function));
12153 f->use_fast_prologue_epilogue_nregs = -1;
12155 return f;
12158 /* Return a MEM corresponding to a stack slot with mode MODE.
12159 Allocate a new slot if necessary.
12161 The RTL for a function can have several slots available: N is
12162 which slot to use. */
12165 assign_386_stack_local (enum machine_mode mode, int n)
12167 struct stack_local_entry *s;
12169 gcc_assert (n >= 0 && n < MAX_386_STACK_LOCALS);
12171 for (s = ix86_stack_locals; s; s = s->next)
12172 if (s->mode == mode && s->n == n)
12173 return s->rtl;
12175 s = (struct stack_local_entry *)
12176 ggc_alloc (sizeof (struct stack_local_entry));
12177 s->n = n;
12178 s->mode = mode;
12179 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12181 s->next = ix86_stack_locals;
12182 ix86_stack_locals = s;
12183 return s->rtl;
12186 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12188 static GTY(()) rtx ix86_tls_symbol;
12190 ix86_tls_get_addr (void)
12193 if (!ix86_tls_symbol)
12195 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12196 (TARGET_GNU_TLS && !TARGET_64BIT)
12197 ? "___tls_get_addr"
12198 : "__tls_get_addr");
12201 return ix86_tls_symbol;
12204 /* Calculate the length of the memory address in the instruction
12205 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12208 memory_address_length (rtx addr)
12210 struct ix86_address parts;
12211 rtx base, index, disp;
12212 int len;
12213 int ok;
12215 if (GET_CODE (addr) == PRE_DEC
12216 || GET_CODE (addr) == POST_INC
12217 || GET_CODE (addr) == PRE_MODIFY
12218 || GET_CODE (addr) == POST_MODIFY)
12219 return 0;
12221 ok = ix86_decompose_address (addr, &parts);
12222 gcc_assert (ok);
12224 if (parts.base && GET_CODE (parts.base) == SUBREG)
12225 parts.base = SUBREG_REG (parts.base);
12226 if (parts.index && GET_CODE (parts.index) == SUBREG)
12227 parts.index = SUBREG_REG (parts.index);
12229 base = parts.base;
12230 index = parts.index;
12231 disp = parts.disp;
12232 len = 0;
12234 /* Rule of thumb:
12235 - esp as the base always wants an index,
12236 - ebp as the base always wants a displacement. */
12238 /* Register Indirect. */
12239 if (base && !index && !disp)
12241 /* esp (for its index) and ebp (for its displacement) need
12242 the two-byte modrm form. */
12243 if (addr == stack_pointer_rtx
12244 || addr == arg_pointer_rtx
12245 || addr == frame_pointer_rtx
12246 || addr == hard_frame_pointer_rtx)
12247 len = 1;
12250 /* Direct Addressing. */
12251 else if (disp && !base && !index)
12252 len = 4;
12254 else
12256 /* Find the length of the displacement constant. */
12257 if (disp)
12259 if (GET_CODE (disp) == CONST_INT
12260 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12261 && base)
12262 len = 1;
12263 else
12264 len = 4;
12266 /* ebp always wants a displacement. */
12267 else if (base == hard_frame_pointer_rtx)
12268 len = 1;
12270 /* An index requires the two-byte modrm form.... */
12271 if (index
12272 /* ...like esp, which always wants an index. */
12273 || base == stack_pointer_rtx
12274 || base == arg_pointer_rtx
12275 || base == frame_pointer_rtx)
12276 len += 1;
12279 return len;
12282 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12283 is set, expect that insn have 8bit immediate alternative. */
12285 ix86_attr_length_immediate_default (rtx insn, int shortform)
12287 int len = 0;
12288 int i;
12289 extract_insn_cached (insn);
12290 for (i = recog_data.n_operands - 1; i >= 0; --i)
12291 if (CONSTANT_P (recog_data.operand[i]))
12293 gcc_assert (!len);
12294 if (shortform
12295 && GET_CODE (recog_data.operand[i]) == CONST_INT
12296 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12297 len = 1;
12298 else
12300 switch (get_attr_mode (insn))
12302 case MODE_QI:
12303 len+=1;
12304 break;
12305 case MODE_HI:
12306 len+=2;
12307 break;
12308 case MODE_SI:
12309 len+=4;
12310 break;
12311 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12312 case MODE_DI:
12313 len+=4;
12314 break;
12315 default:
12316 fatal_insn ("unknown insn mode", insn);
12320 return len;
12322 /* Compute default value for "length_address" attribute. */
12324 ix86_attr_length_address_default (rtx insn)
12326 int i;
12328 if (get_attr_type (insn) == TYPE_LEA)
12330 rtx set = PATTERN (insn);
12332 if (GET_CODE (set) == PARALLEL)
12333 set = XVECEXP (set, 0, 0);
12335 gcc_assert (GET_CODE (set) == SET);
12337 return memory_address_length (SET_SRC (set));
12340 extract_insn_cached (insn);
12341 for (i = recog_data.n_operands - 1; i >= 0; --i)
12342 if (GET_CODE (recog_data.operand[i]) == MEM)
12344 return memory_address_length (XEXP (recog_data.operand[i], 0));
12345 break;
12347 return 0;
12350 /* Return the maximum number of instructions a cpu can issue. */
12352 static int
12353 ix86_issue_rate (void)
12355 switch (ix86_tune)
12357 case PROCESSOR_PENTIUM:
12358 case PROCESSOR_K6:
12359 return 2;
12361 case PROCESSOR_PENTIUMPRO:
12362 case PROCESSOR_PENTIUM4:
12363 case PROCESSOR_ATHLON:
12364 case PROCESSOR_K8:
12365 case PROCESSOR_NOCONA:
12366 return 3;
12368 default:
12369 return 1;
12373 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12374 by DEP_INSN and nothing set by DEP_INSN. */
12376 static int
12377 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12379 rtx set, set2;
12381 /* Simplify the test for uninteresting insns. */
12382 if (insn_type != TYPE_SETCC
12383 && insn_type != TYPE_ICMOV
12384 && insn_type != TYPE_FCMOV
12385 && insn_type != TYPE_IBR)
12386 return 0;
12388 if ((set = single_set (dep_insn)) != 0)
12390 set = SET_DEST (set);
12391 set2 = NULL_RTX;
12393 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12394 && XVECLEN (PATTERN (dep_insn), 0) == 2
12395 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12396 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12398 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12399 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12401 else
12402 return 0;
12404 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12405 return 0;
12407 /* This test is true if the dependent insn reads the flags but
12408 not any other potentially set register. */
12409 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12410 return 0;
12412 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12413 return 0;
12415 return 1;
12418 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12419 address with operands set by DEP_INSN. */
12421 static int
12422 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12424 rtx addr;
12426 if (insn_type == TYPE_LEA
12427 && TARGET_PENTIUM)
12429 addr = PATTERN (insn);
12431 if (GET_CODE (addr) == PARALLEL)
12432 addr = XVECEXP (addr, 0, 0);
12434 gcc_assert (GET_CODE (addr) == SET);
12436 addr = SET_SRC (addr);
12438 else
12440 int i;
12441 extract_insn_cached (insn);
12442 for (i = recog_data.n_operands - 1; i >= 0; --i)
12443 if (GET_CODE (recog_data.operand[i]) == MEM)
12445 addr = XEXP (recog_data.operand[i], 0);
12446 goto found;
12448 return 0;
12449 found:;
12452 return modified_in_p (addr, dep_insn);
12455 static int
12456 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12458 enum attr_type insn_type, dep_insn_type;
12459 enum attr_memory memory;
12460 rtx set, set2;
12461 int dep_insn_code_number;
12463 /* Anti and output dependencies have zero cost on all CPUs. */
12464 if (REG_NOTE_KIND (link) != 0)
12465 return 0;
12467 dep_insn_code_number = recog_memoized (dep_insn);
12469 /* If we can't recognize the insns, we can't really do anything. */
12470 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12471 return cost;
12473 insn_type = get_attr_type (insn);
12474 dep_insn_type = get_attr_type (dep_insn);
12476 switch (ix86_tune)
12478 case PROCESSOR_PENTIUM:
12479 /* Address Generation Interlock adds a cycle of latency. */
12480 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12481 cost += 1;
12483 /* ??? Compares pair with jump/setcc. */
12484 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12485 cost = 0;
12487 /* Floating point stores require value to be ready one cycle earlier. */
12488 if (insn_type == TYPE_FMOV
12489 && get_attr_memory (insn) == MEMORY_STORE
12490 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12491 cost += 1;
12492 break;
12494 case PROCESSOR_PENTIUMPRO:
12495 memory = get_attr_memory (insn);
12497 /* INT->FP conversion is expensive. */
12498 if (get_attr_fp_int_src (dep_insn))
12499 cost += 5;
12501 /* There is one cycle extra latency between an FP op and a store. */
12502 if (insn_type == TYPE_FMOV
12503 && (set = single_set (dep_insn)) != NULL_RTX
12504 && (set2 = single_set (insn)) != NULL_RTX
12505 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12506 && GET_CODE (SET_DEST (set2)) == MEM)
12507 cost += 1;
12509 /* Show ability of reorder buffer to hide latency of load by executing
12510 in parallel with previous instruction in case
12511 previous instruction is not needed to compute the address. */
12512 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12513 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12515 /* Claim moves to take one cycle, as core can issue one load
12516 at time and the next load can start cycle later. */
12517 if (dep_insn_type == TYPE_IMOV
12518 || dep_insn_type == TYPE_FMOV)
12519 cost = 1;
12520 else if (cost > 1)
12521 cost--;
12523 break;
12525 case PROCESSOR_K6:
12526 memory = get_attr_memory (insn);
12528 /* The esp dependency is resolved before the instruction is really
12529 finished. */
12530 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12531 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12532 return 1;
12534 /* INT->FP conversion is expensive. */
12535 if (get_attr_fp_int_src (dep_insn))
12536 cost += 5;
12538 /* Show ability of reorder buffer to hide latency of load by executing
12539 in parallel with previous instruction in case
12540 previous instruction is not needed to compute the address. */
12541 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12542 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12544 /* Claim moves to take one cycle, as core can issue one load
12545 at time and the next load can start cycle later. */
12546 if (dep_insn_type == TYPE_IMOV
12547 || dep_insn_type == TYPE_FMOV)
12548 cost = 1;
12549 else if (cost > 2)
12550 cost -= 2;
12551 else
12552 cost = 1;
12554 break;
12556 case PROCESSOR_ATHLON:
12557 case PROCESSOR_K8:
12558 memory = get_attr_memory (insn);
12560 /* Show ability of reorder buffer to hide latency of load by executing
12561 in parallel with previous instruction in case
12562 previous instruction is not needed to compute the address. */
12563 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12564 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12566 enum attr_unit unit = get_attr_unit (insn);
12567 int loadcost = 3;
12569 /* Because of the difference between the length of integer and
12570 floating unit pipeline preparation stages, the memory operands
12571 for floating point are cheaper.
12573 ??? For Athlon it the difference is most probably 2. */
12574 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12575 loadcost = 3;
12576 else
12577 loadcost = TARGET_ATHLON ? 2 : 0;
12579 if (cost >= loadcost)
12580 cost -= loadcost;
12581 else
12582 cost = 0;
12585 default:
12586 break;
12589 return cost;
12592 /* How many alternative schedules to try. This should be as wide as the
12593 scheduling freedom in the DFA, but no wider. Making this value too
12594 large results extra work for the scheduler. */
12596 static int
12597 ia32_multipass_dfa_lookahead (void)
12599 if (ix86_tune == PROCESSOR_PENTIUM)
12600 return 2;
12602 if (ix86_tune == PROCESSOR_PENTIUMPRO
12603 || ix86_tune == PROCESSOR_K6)
12604 return 1;
12606 else
12607 return 0;
12611 /* Compute the alignment given to a constant that is being placed in memory.
12612 EXP is the constant and ALIGN is the alignment that the object would
12613 ordinarily have.
12614 The value of this function is used instead of that alignment to align
12615 the object. */
12618 ix86_constant_alignment (tree exp, int align)
12620 if (TREE_CODE (exp) == REAL_CST)
12622 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12623 return 64;
12624 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12625 return 128;
12627 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12628 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12629 return BITS_PER_WORD;
12631 return align;
12634 /* Compute the alignment for a static variable.
12635 TYPE is the data type, and ALIGN is the alignment that
12636 the object would ordinarily have. The value of this function is used
12637 instead of that alignment to align the object. */
12640 ix86_data_alignment (tree type, int align)
12642 if (AGGREGATE_TYPE_P (type)
12643 && TYPE_SIZE (type)
12644 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12645 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12646 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12647 return 256;
12649 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12650 to 16byte boundary. */
12651 if (TARGET_64BIT)
12653 if (AGGREGATE_TYPE_P (type)
12654 && TYPE_SIZE (type)
12655 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12656 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12657 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12658 return 128;
12661 if (TREE_CODE (type) == ARRAY_TYPE)
12663 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12664 return 64;
12665 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12666 return 128;
12668 else if (TREE_CODE (type) == COMPLEX_TYPE)
12671 if (TYPE_MODE (type) == DCmode && align < 64)
12672 return 64;
12673 if (TYPE_MODE (type) == XCmode && align < 128)
12674 return 128;
12676 else if ((TREE_CODE (type) == RECORD_TYPE
12677 || TREE_CODE (type) == UNION_TYPE
12678 || TREE_CODE (type) == QUAL_UNION_TYPE)
12679 && TYPE_FIELDS (type))
12681 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12682 return 64;
12683 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12684 return 128;
12686 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12687 || TREE_CODE (type) == INTEGER_TYPE)
12689 if (TYPE_MODE (type) == DFmode && align < 64)
12690 return 64;
12691 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12692 return 128;
12695 return align;
12698 /* Compute the alignment for a local variable.
12699 TYPE is the data type, and ALIGN is the alignment that
12700 the object would ordinarily have. The value of this macro is used
12701 instead of that alignment to align the object. */
12704 ix86_local_alignment (tree type, int align)
12706 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12707 to 16byte boundary. */
12708 if (TARGET_64BIT)
12710 if (AGGREGATE_TYPE_P (type)
12711 && TYPE_SIZE (type)
12712 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12713 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12714 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12715 return 128;
12717 if (TREE_CODE (type) == ARRAY_TYPE)
12719 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12720 return 64;
12721 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12722 return 128;
12724 else if (TREE_CODE (type) == COMPLEX_TYPE)
12726 if (TYPE_MODE (type) == DCmode && align < 64)
12727 return 64;
12728 if (TYPE_MODE (type) == XCmode && align < 128)
12729 return 128;
12731 else if ((TREE_CODE (type) == RECORD_TYPE
12732 || TREE_CODE (type) == UNION_TYPE
12733 || TREE_CODE (type) == QUAL_UNION_TYPE)
12734 && TYPE_FIELDS (type))
12736 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12737 return 64;
12738 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12739 return 128;
12741 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12742 || TREE_CODE (type) == INTEGER_TYPE)
12745 if (TYPE_MODE (type) == DFmode && align < 64)
12746 return 64;
12747 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12748 return 128;
12750 return align;
12753 /* Emit RTL insns to initialize the variable parts of a trampoline.
12754 FNADDR is an RTX for the address of the function's pure code.
12755 CXT is an RTX for the static chain value for the function. */
12756 void
12757 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12759 if (!TARGET_64BIT)
12761 /* Compute offset from the end of the jmp to the target function. */
12762 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12763 plus_constant (tramp, 10),
12764 NULL_RTX, 1, OPTAB_DIRECT);
12765 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12766 gen_int_mode (0xb9, QImode));
12767 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12768 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12769 gen_int_mode (0xe9, QImode));
12770 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12772 else
12774 int offset = 0;
12775 /* Try to load address using shorter movl instead of movabs.
12776 We may want to support movq for kernel mode, but kernel does not use
12777 trampolines at the moment. */
12778 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12780 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12781 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12782 gen_int_mode (0xbb41, HImode));
12783 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12784 gen_lowpart (SImode, fnaddr));
12785 offset += 6;
12787 else
12789 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12790 gen_int_mode (0xbb49, HImode));
12791 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12792 fnaddr);
12793 offset += 10;
12795 /* Load static chain using movabs to r10. */
12796 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12797 gen_int_mode (0xba49, HImode));
12798 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12799 cxt);
12800 offset += 10;
12801 /* Jump to the r11 */
12802 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12803 gen_int_mode (0xff49, HImode));
12804 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12805 gen_int_mode (0xe3, QImode));
12806 offset += 3;
12807 gcc_assert (offset <= TRAMPOLINE_SIZE);
12810 #ifdef ENABLE_EXECUTE_STACK
12811 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12812 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12813 #endif
12816 /* Codes for all the SSE/MMX builtins. */
12817 enum ix86_builtins
12819 IX86_BUILTIN_ADDPS,
12820 IX86_BUILTIN_ADDSS,
12821 IX86_BUILTIN_DIVPS,
12822 IX86_BUILTIN_DIVSS,
12823 IX86_BUILTIN_MULPS,
12824 IX86_BUILTIN_MULSS,
12825 IX86_BUILTIN_SUBPS,
12826 IX86_BUILTIN_SUBSS,
12828 IX86_BUILTIN_CMPEQPS,
12829 IX86_BUILTIN_CMPLTPS,
12830 IX86_BUILTIN_CMPLEPS,
12831 IX86_BUILTIN_CMPGTPS,
12832 IX86_BUILTIN_CMPGEPS,
12833 IX86_BUILTIN_CMPNEQPS,
12834 IX86_BUILTIN_CMPNLTPS,
12835 IX86_BUILTIN_CMPNLEPS,
12836 IX86_BUILTIN_CMPNGTPS,
12837 IX86_BUILTIN_CMPNGEPS,
12838 IX86_BUILTIN_CMPORDPS,
12839 IX86_BUILTIN_CMPUNORDPS,
12840 IX86_BUILTIN_CMPNEPS,
12841 IX86_BUILTIN_CMPEQSS,
12842 IX86_BUILTIN_CMPLTSS,
12843 IX86_BUILTIN_CMPLESS,
12844 IX86_BUILTIN_CMPNEQSS,
12845 IX86_BUILTIN_CMPNLTSS,
12846 IX86_BUILTIN_CMPNLESS,
12847 IX86_BUILTIN_CMPNGTSS,
12848 IX86_BUILTIN_CMPNGESS,
12849 IX86_BUILTIN_CMPORDSS,
12850 IX86_BUILTIN_CMPUNORDSS,
12851 IX86_BUILTIN_CMPNESS,
12853 IX86_BUILTIN_COMIEQSS,
12854 IX86_BUILTIN_COMILTSS,
12855 IX86_BUILTIN_COMILESS,
12856 IX86_BUILTIN_COMIGTSS,
12857 IX86_BUILTIN_COMIGESS,
12858 IX86_BUILTIN_COMINEQSS,
12859 IX86_BUILTIN_UCOMIEQSS,
12860 IX86_BUILTIN_UCOMILTSS,
12861 IX86_BUILTIN_UCOMILESS,
12862 IX86_BUILTIN_UCOMIGTSS,
12863 IX86_BUILTIN_UCOMIGESS,
12864 IX86_BUILTIN_UCOMINEQSS,
12866 IX86_BUILTIN_CVTPI2PS,
12867 IX86_BUILTIN_CVTPS2PI,
12868 IX86_BUILTIN_CVTSI2SS,
12869 IX86_BUILTIN_CVTSI642SS,
12870 IX86_BUILTIN_CVTSS2SI,
12871 IX86_BUILTIN_CVTSS2SI64,
12872 IX86_BUILTIN_CVTTPS2PI,
12873 IX86_BUILTIN_CVTTSS2SI,
12874 IX86_BUILTIN_CVTTSS2SI64,
12876 IX86_BUILTIN_MAXPS,
12877 IX86_BUILTIN_MAXSS,
12878 IX86_BUILTIN_MINPS,
12879 IX86_BUILTIN_MINSS,
12881 IX86_BUILTIN_LOADUPS,
12882 IX86_BUILTIN_STOREUPS,
12883 IX86_BUILTIN_MOVSS,
12885 IX86_BUILTIN_MOVHLPS,
12886 IX86_BUILTIN_MOVLHPS,
12887 IX86_BUILTIN_LOADHPS,
12888 IX86_BUILTIN_LOADLPS,
12889 IX86_BUILTIN_STOREHPS,
12890 IX86_BUILTIN_STORELPS,
12892 IX86_BUILTIN_MASKMOVQ,
12893 IX86_BUILTIN_MOVMSKPS,
12894 IX86_BUILTIN_PMOVMSKB,
12896 IX86_BUILTIN_MOVNTPS,
12897 IX86_BUILTIN_MOVNTQ,
12899 IX86_BUILTIN_LOADDQU,
12900 IX86_BUILTIN_STOREDQU,
12902 IX86_BUILTIN_PACKSSWB,
12903 IX86_BUILTIN_PACKSSDW,
12904 IX86_BUILTIN_PACKUSWB,
12906 IX86_BUILTIN_PADDB,
12907 IX86_BUILTIN_PADDW,
12908 IX86_BUILTIN_PADDD,
12909 IX86_BUILTIN_PADDQ,
12910 IX86_BUILTIN_PADDSB,
12911 IX86_BUILTIN_PADDSW,
12912 IX86_BUILTIN_PADDUSB,
12913 IX86_BUILTIN_PADDUSW,
12914 IX86_BUILTIN_PSUBB,
12915 IX86_BUILTIN_PSUBW,
12916 IX86_BUILTIN_PSUBD,
12917 IX86_BUILTIN_PSUBQ,
12918 IX86_BUILTIN_PSUBSB,
12919 IX86_BUILTIN_PSUBSW,
12920 IX86_BUILTIN_PSUBUSB,
12921 IX86_BUILTIN_PSUBUSW,
12923 IX86_BUILTIN_PAND,
12924 IX86_BUILTIN_PANDN,
12925 IX86_BUILTIN_POR,
12926 IX86_BUILTIN_PXOR,
12928 IX86_BUILTIN_PAVGB,
12929 IX86_BUILTIN_PAVGW,
12931 IX86_BUILTIN_PCMPEQB,
12932 IX86_BUILTIN_PCMPEQW,
12933 IX86_BUILTIN_PCMPEQD,
12934 IX86_BUILTIN_PCMPGTB,
12935 IX86_BUILTIN_PCMPGTW,
12936 IX86_BUILTIN_PCMPGTD,
12938 IX86_BUILTIN_PMADDWD,
12940 IX86_BUILTIN_PMAXSW,
12941 IX86_BUILTIN_PMAXUB,
12942 IX86_BUILTIN_PMINSW,
12943 IX86_BUILTIN_PMINUB,
12945 IX86_BUILTIN_PMULHUW,
12946 IX86_BUILTIN_PMULHW,
12947 IX86_BUILTIN_PMULLW,
12949 IX86_BUILTIN_PSADBW,
12950 IX86_BUILTIN_PSHUFW,
12952 IX86_BUILTIN_PSLLW,
12953 IX86_BUILTIN_PSLLD,
12954 IX86_BUILTIN_PSLLQ,
12955 IX86_BUILTIN_PSRAW,
12956 IX86_BUILTIN_PSRAD,
12957 IX86_BUILTIN_PSRLW,
12958 IX86_BUILTIN_PSRLD,
12959 IX86_BUILTIN_PSRLQ,
12960 IX86_BUILTIN_PSLLWI,
12961 IX86_BUILTIN_PSLLDI,
12962 IX86_BUILTIN_PSLLQI,
12963 IX86_BUILTIN_PSRAWI,
12964 IX86_BUILTIN_PSRADI,
12965 IX86_BUILTIN_PSRLWI,
12966 IX86_BUILTIN_PSRLDI,
12967 IX86_BUILTIN_PSRLQI,
12969 IX86_BUILTIN_PUNPCKHBW,
12970 IX86_BUILTIN_PUNPCKHWD,
12971 IX86_BUILTIN_PUNPCKHDQ,
12972 IX86_BUILTIN_PUNPCKLBW,
12973 IX86_BUILTIN_PUNPCKLWD,
12974 IX86_BUILTIN_PUNPCKLDQ,
12976 IX86_BUILTIN_SHUFPS,
12978 IX86_BUILTIN_RCPPS,
12979 IX86_BUILTIN_RCPSS,
12980 IX86_BUILTIN_RSQRTPS,
12981 IX86_BUILTIN_RSQRTSS,
12982 IX86_BUILTIN_SQRTPS,
12983 IX86_BUILTIN_SQRTSS,
12985 IX86_BUILTIN_UNPCKHPS,
12986 IX86_BUILTIN_UNPCKLPS,
12988 IX86_BUILTIN_ANDPS,
12989 IX86_BUILTIN_ANDNPS,
12990 IX86_BUILTIN_ORPS,
12991 IX86_BUILTIN_XORPS,
12993 IX86_BUILTIN_EMMS,
12994 IX86_BUILTIN_LDMXCSR,
12995 IX86_BUILTIN_STMXCSR,
12996 IX86_BUILTIN_SFENCE,
12998 /* 3DNow! Original */
12999 IX86_BUILTIN_FEMMS,
13000 IX86_BUILTIN_PAVGUSB,
13001 IX86_BUILTIN_PF2ID,
13002 IX86_BUILTIN_PFACC,
13003 IX86_BUILTIN_PFADD,
13004 IX86_BUILTIN_PFCMPEQ,
13005 IX86_BUILTIN_PFCMPGE,
13006 IX86_BUILTIN_PFCMPGT,
13007 IX86_BUILTIN_PFMAX,
13008 IX86_BUILTIN_PFMIN,
13009 IX86_BUILTIN_PFMUL,
13010 IX86_BUILTIN_PFRCP,
13011 IX86_BUILTIN_PFRCPIT1,
13012 IX86_BUILTIN_PFRCPIT2,
13013 IX86_BUILTIN_PFRSQIT1,
13014 IX86_BUILTIN_PFRSQRT,
13015 IX86_BUILTIN_PFSUB,
13016 IX86_BUILTIN_PFSUBR,
13017 IX86_BUILTIN_PI2FD,
13018 IX86_BUILTIN_PMULHRW,
13020 /* 3DNow! Athlon Extensions */
13021 IX86_BUILTIN_PF2IW,
13022 IX86_BUILTIN_PFNACC,
13023 IX86_BUILTIN_PFPNACC,
13024 IX86_BUILTIN_PI2FW,
13025 IX86_BUILTIN_PSWAPDSI,
13026 IX86_BUILTIN_PSWAPDSF,
13028 /* SSE2 */
13029 IX86_BUILTIN_ADDPD,
13030 IX86_BUILTIN_ADDSD,
13031 IX86_BUILTIN_DIVPD,
13032 IX86_BUILTIN_DIVSD,
13033 IX86_BUILTIN_MULPD,
13034 IX86_BUILTIN_MULSD,
13035 IX86_BUILTIN_SUBPD,
13036 IX86_BUILTIN_SUBSD,
13038 IX86_BUILTIN_CMPEQPD,
13039 IX86_BUILTIN_CMPLTPD,
13040 IX86_BUILTIN_CMPLEPD,
13041 IX86_BUILTIN_CMPGTPD,
13042 IX86_BUILTIN_CMPGEPD,
13043 IX86_BUILTIN_CMPNEQPD,
13044 IX86_BUILTIN_CMPNLTPD,
13045 IX86_BUILTIN_CMPNLEPD,
13046 IX86_BUILTIN_CMPNGTPD,
13047 IX86_BUILTIN_CMPNGEPD,
13048 IX86_BUILTIN_CMPORDPD,
13049 IX86_BUILTIN_CMPUNORDPD,
13050 IX86_BUILTIN_CMPNEPD,
13051 IX86_BUILTIN_CMPEQSD,
13052 IX86_BUILTIN_CMPLTSD,
13053 IX86_BUILTIN_CMPLESD,
13054 IX86_BUILTIN_CMPNEQSD,
13055 IX86_BUILTIN_CMPNLTSD,
13056 IX86_BUILTIN_CMPNLESD,
13057 IX86_BUILTIN_CMPORDSD,
13058 IX86_BUILTIN_CMPUNORDSD,
13059 IX86_BUILTIN_CMPNESD,
13061 IX86_BUILTIN_COMIEQSD,
13062 IX86_BUILTIN_COMILTSD,
13063 IX86_BUILTIN_COMILESD,
13064 IX86_BUILTIN_COMIGTSD,
13065 IX86_BUILTIN_COMIGESD,
13066 IX86_BUILTIN_COMINEQSD,
13067 IX86_BUILTIN_UCOMIEQSD,
13068 IX86_BUILTIN_UCOMILTSD,
13069 IX86_BUILTIN_UCOMILESD,
13070 IX86_BUILTIN_UCOMIGTSD,
13071 IX86_BUILTIN_UCOMIGESD,
13072 IX86_BUILTIN_UCOMINEQSD,
13074 IX86_BUILTIN_MAXPD,
13075 IX86_BUILTIN_MAXSD,
13076 IX86_BUILTIN_MINPD,
13077 IX86_BUILTIN_MINSD,
13079 IX86_BUILTIN_ANDPD,
13080 IX86_BUILTIN_ANDNPD,
13081 IX86_BUILTIN_ORPD,
13082 IX86_BUILTIN_XORPD,
13084 IX86_BUILTIN_SQRTPD,
13085 IX86_BUILTIN_SQRTSD,
13087 IX86_BUILTIN_UNPCKHPD,
13088 IX86_BUILTIN_UNPCKLPD,
13090 IX86_BUILTIN_SHUFPD,
13092 IX86_BUILTIN_LOADUPD,
13093 IX86_BUILTIN_STOREUPD,
13094 IX86_BUILTIN_MOVSD,
13096 IX86_BUILTIN_LOADHPD,
13097 IX86_BUILTIN_LOADLPD,
13099 IX86_BUILTIN_CVTDQ2PD,
13100 IX86_BUILTIN_CVTDQ2PS,
13102 IX86_BUILTIN_CVTPD2DQ,
13103 IX86_BUILTIN_CVTPD2PI,
13104 IX86_BUILTIN_CVTPD2PS,
13105 IX86_BUILTIN_CVTTPD2DQ,
13106 IX86_BUILTIN_CVTTPD2PI,
13108 IX86_BUILTIN_CVTPI2PD,
13109 IX86_BUILTIN_CVTSI2SD,
13110 IX86_BUILTIN_CVTSI642SD,
13112 IX86_BUILTIN_CVTSD2SI,
13113 IX86_BUILTIN_CVTSD2SI64,
13114 IX86_BUILTIN_CVTSD2SS,
13115 IX86_BUILTIN_CVTSS2SD,
13116 IX86_BUILTIN_CVTTSD2SI,
13117 IX86_BUILTIN_CVTTSD2SI64,
13119 IX86_BUILTIN_CVTPS2DQ,
13120 IX86_BUILTIN_CVTPS2PD,
13121 IX86_BUILTIN_CVTTPS2DQ,
13123 IX86_BUILTIN_MOVNTI,
13124 IX86_BUILTIN_MOVNTPD,
13125 IX86_BUILTIN_MOVNTDQ,
13127 /* SSE2 MMX */
13128 IX86_BUILTIN_MASKMOVDQU,
13129 IX86_BUILTIN_MOVMSKPD,
13130 IX86_BUILTIN_PMOVMSKB128,
13132 IX86_BUILTIN_PACKSSWB128,
13133 IX86_BUILTIN_PACKSSDW128,
13134 IX86_BUILTIN_PACKUSWB128,
13136 IX86_BUILTIN_PADDB128,
13137 IX86_BUILTIN_PADDW128,
13138 IX86_BUILTIN_PADDD128,
13139 IX86_BUILTIN_PADDQ128,
13140 IX86_BUILTIN_PADDSB128,
13141 IX86_BUILTIN_PADDSW128,
13142 IX86_BUILTIN_PADDUSB128,
13143 IX86_BUILTIN_PADDUSW128,
13144 IX86_BUILTIN_PSUBB128,
13145 IX86_BUILTIN_PSUBW128,
13146 IX86_BUILTIN_PSUBD128,
13147 IX86_BUILTIN_PSUBQ128,
13148 IX86_BUILTIN_PSUBSB128,
13149 IX86_BUILTIN_PSUBSW128,
13150 IX86_BUILTIN_PSUBUSB128,
13151 IX86_BUILTIN_PSUBUSW128,
13153 IX86_BUILTIN_PAND128,
13154 IX86_BUILTIN_PANDN128,
13155 IX86_BUILTIN_POR128,
13156 IX86_BUILTIN_PXOR128,
13158 IX86_BUILTIN_PAVGB128,
13159 IX86_BUILTIN_PAVGW128,
13161 IX86_BUILTIN_PCMPEQB128,
13162 IX86_BUILTIN_PCMPEQW128,
13163 IX86_BUILTIN_PCMPEQD128,
13164 IX86_BUILTIN_PCMPGTB128,
13165 IX86_BUILTIN_PCMPGTW128,
13166 IX86_BUILTIN_PCMPGTD128,
13168 IX86_BUILTIN_PMADDWD128,
13170 IX86_BUILTIN_PMAXSW128,
13171 IX86_BUILTIN_PMAXUB128,
13172 IX86_BUILTIN_PMINSW128,
13173 IX86_BUILTIN_PMINUB128,
13175 IX86_BUILTIN_PMULUDQ,
13176 IX86_BUILTIN_PMULUDQ128,
13177 IX86_BUILTIN_PMULHUW128,
13178 IX86_BUILTIN_PMULHW128,
13179 IX86_BUILTIN_PMULLW128,
13181 IX86_BUILTIN_PSADBW128,
13182 IX86_BUILTIN_PSHUFHW,
13183 IX86_BUILTIN_PSHUFLW,
13184 IX86_BUILTIN_PSHUFD,
13186 IX86_BUILTIN_PSLLW128,
13187 IX86_BUILTIN_PSLLD128,
13188 IX86_BUILTIN_PSLLQ128,
13189 IX86_BUILTIN_PSRAW128,
13190 IX86_BUILTIN_PSRAD128,
13191 IX86_BUILTIN_PSRLW128,
13192 IX86_BUILTIN_PSRLD128,
13193 IX86_BUILTIN_PSRLQ128,
13194 IX86_BUILTIN_PSLLDQI128,
13195 IX86_BUILTIN_PSLLWI128,
13196 IX86_BUILTIN_PSLLDI128,
13197 IX86_BUILTIN_PSLLQI128,
13198 IX86_BUILTIN_PSRAWI128,
13199 IX86_BUILTIN_PSRADI128,
13200 IX86_BUILTIN_PSRLDQI128,
13201 IX86_BUILTIN_PSRLWI128,
13202 IX86_BUILTIN_PSRLDI128,
13203 IX86_BUILTIN_PSRLQI128,
13205 IX86_BUILTIN_PUNPCKHBW128,
13206 IX86_BUILTIN_PUNPCKHWD128,
13207 IX86_BUILTIN_PUNPCKHDQ128,
13208 IX86_BUILTIN_PUNPCKHQDQ128,
13209 IX86_BUILTIN_PUNPCKLBW128,
13210 IX86_BUILTIN_PUNPCKLWD128,
13211 IX86_BUILTIN_PUNPCKLDQ128,
13212 IX86_BUILTIN_PUNPCKLQDQ128,
13214 IX86_BUILTIN_CLFLUSH,
13215 IX86_BUILTIN_MFENCE,
13216 IX86_BUILTIN_LFENCE,
13218 /* Prescott New Instructions. */
13219 IX86_BUILTIN_ADDSUBPS,
13220 IX86_BUILTIN_HADDPS,
13221 IX86_BUILTIN_HSUBPS,
13222 IX86_BUILTIN_MOVSHDUP,
13223 IX86_BUILTIN_MOVSLDUP,
13224 IX86_BUILTIN_ADDSUBPD,
13225 IX86_BUILTIN_HADDPD,
13226 IX86_BUILTIN_HSUBPD,
13227 IX86_BUILTIN_LDDQU,
13229 IX86_BUILTIN_MONITOR,
13230 IX86_BUILTIN_MWAIT,
13232 IX86_BUILTIN_VEC_INIT_V2SI,
13233 IX86_BUILTIN_VEC_INIT_V4HI,
13234 IX86_BUILTIN_VEC_INIT_V8QI,
13235 IX86_BUILTIN_VEC_EXT_V2DF,
13236 IX86_BUILTIN_VEC_EXT_V2DI,
13237 IX86_BUILTIN_VEC_EXT_V4SF,
13238 IX86_BUILTIN_VEC_EXT_V4SI,
13239 IX86_BUILTIN_VEC_EXT_V8HI,
13240 IX86_BUILTIN_VEC_EXT_V2SI,
13241 IX86_BUILTIN_VEC_EXT_V4HI,
13242 IX86_BUILTIN_VEC_SET_V8HI,
13243 IX86_BUILTIN_VEC_SET_V4HI,
13245 IX86_BUILTIN_MAX
13248 #define def_builtin(MASK, NAME, TYPE, CODE) \
13249 do { \
13250 if ((MASK) & target_flags \
13251 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13252 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13253 NULL, NULL_TREE); \
13254 } while (0)
13256 /* Bits for builtin_description.flag. */
13258 /* Set when we don't support the comparison natively, and should
13259 swap_comparison in order to support it. */
13260 #define BUILTIN_DESC_SWAP_OPERANDS 1
13262 struct builtin_description
13264 const unsigned int mask;
13265 const enum insn_code icode;
13266 const char *const name;
13267 const enum ix86_builtins code;
13268 const enum rtx_code comparison;
13269 const unsigned int flag;
13272 static const struct builtin_description bdesc_comi[] =
13274 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13275 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13276 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13277 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13278 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13279 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13280 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13281 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13282 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13283 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13284 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13285 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13286 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13287 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13288 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13289 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13290 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13291 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13292 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13293 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13294 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13295 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13296 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13297 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13300 static const struct builtin_description bdesc_2arg[] =
13302 /* SSE */
13303 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13304 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13305 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13306 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13307 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13308 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13309 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13310 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13312 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13313 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13314 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13315 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13316 BUILTIN_DESC_SWAP_OPERANDS },
13317 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13318 BUILTIN_DESC_SWAP_OPERANDS },
13319 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13320 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13321 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13322 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13323 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13324 BUILTIN_DESC_SWAP_OPERANDS },
13325 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13326 BUILTIN_DESC_SWAP_OPERANDS },
13327 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13328 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13329 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13330 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13331 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13332 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13333 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13334 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13335 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13336 BUILTIN_DESC_SWAP_OPERANDS },
13337 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13338 BUILTIN_DESC_SWAP_OPERANDS },
13339 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13341 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13342 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13343 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13344 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13346 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13347 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13348 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13349 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13351 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13352 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13353 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13354 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13355 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13357 /* MMX */
13358 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13359 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13360 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13361 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13362 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13363 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13364 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13365 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13367 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13368 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13369 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13370 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13371 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13372 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13373 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13374 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13376 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13377 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13378 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13380 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13381 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13382 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13383 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13385 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13386 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13388 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13389 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13390 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13391 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13392 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13393 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13395 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13396 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13397 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13398 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13400 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13401 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13402 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13403 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13404 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13405 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13407 /* Special. */
13408 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13409 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13410 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13412 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13413 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13414 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13416 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13417 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13418 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13419 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13420 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13421 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13423 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13424 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13425 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13426 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13427 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13428 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13430 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13431 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13432 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13433 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13435 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13436 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13438 /* SSE2 */
13439 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13440 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13441 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13442 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13443 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13444 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13445 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13446 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13448 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13449 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13450 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13451 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13452 BUILTIN_DESC_SWAP_OPERANDS },
13453 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13454 BUILTIN_DESC_SWAP_OPERANDS },
13455 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13456 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13457 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13458 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13459 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13460 BUILTIN_DESC_SWAP_OPERANDS },
13461 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13462 BUILTIN_DESC_SWAP_OPERANDS },
13463 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13464 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13465 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13466 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13467 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13468 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13469 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13470 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13471 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13473 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13474 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13475 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13476 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13478 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13479 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13480 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13481 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13483 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13484 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13485 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13487 /* SSE2 MMX */
13488 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13489 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13490 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13491 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13492 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13493 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13494 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13495 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13497 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13498 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13499 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13500 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13501 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13502 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13503 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13504 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13506 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13507 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13509 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13510 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13511 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13512 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13514 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13515 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13517 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13518 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13519 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13520 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13521 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13522 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13524 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13525 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13526 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13527 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13529 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13530 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13531 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13532 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13533 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13534 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13535 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13536 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13538 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13539 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13540 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13542 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13543 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13545 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13546 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13548 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13549 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13550 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13552 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13553 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13554 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13556 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13557 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13559 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13561 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13562 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13563 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13564 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13566 /* SSE3 MMX */
13567 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13568 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13569 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13570 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13571 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13572 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13575 static const struct builtin_description bdesc_1arg[] =
13577 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13578 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13580 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13581 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13582 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13584 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13585 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13586 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13587 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13588 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13589 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13591 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13592 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13594 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13596 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13597 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13599 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13600 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13601 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13602 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13603 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13605 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13607 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13608 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13609 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13610 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13612 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13613 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13614 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13616 /* SSE3 */
13617 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13618 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13621 static void
13622 ix86_init_builtins (void)
13624 if (TARGET_MMX)
13625 ix86_init_mmx_sse_builtins ();
13628 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13629 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13630 builtins. */
13631 static void
13632 ix86_init_mmx_sse_builtins (void)
13634 const struct builtin_description * d;
13635 size_t i;
13637 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13638 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13639 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13640 tree V2DI_type_node
13641 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13642 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13643 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13644 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13645 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13646 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13647 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13649 tree pchar_type_node = build_pointer_type (char_type_node);
13650 tree pcchar_type_node = build_pointer_type (
13651 build_type_variant (char_type_node, 1, 0));
13652 tree pfloat_type_node = build_pointer_type (float_type_node);
13653 tree pcfloat_type_node = build_pointer_type (
13654 build_type_variant (float_type_node, 1, 0));
13655 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13656 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13657 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13659 /* Comparisons. */
13660 tree int_ftype_v4sf_v4sf
13661 = build_function_type_list (integer_type_node,
13662 V4SF_type_node, V4SF_type_node, NULL_TREE);
13663 tree v4si_ftype_v4sf_v4sf
13664 = build_function_type_list (V4SI_type_node,
13665 V4SF_type_node, V4SF_type_node, NULL_TREE);
13666 /* MMX/SSE/integer conversions. */
13667 tree int_ftype_v4sf
13668 = build_function_type_list (integer_type_node,
13669 V4SF_type_node, NULL_TREE);
13670 tree int64_ftype_v4sf
13671 = build_function_type_list (long_long_integer_type_node,
13672 V4SF_type_node, NULL_TREE);
13673 tree int_ftype_v8qi
13674 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13675 tree v4sf_ftype_v4sf_int
13676 = build_function_type_list (V4SF_type_node,
13677 V4SF_type_node, integer_type_node, NULL_TREE);
13678 tree v4sf_ftype_v4sf_int64
13679 = build_function_type_list (V4SF_type_node,
13680 V4SF_type_node, long_long_integer_type_node,
13681 NULL_TREE);
13682 tree v4sf_ftype_v4sf_v2si
13683 = build_function_type_list (V4SF_type_node,
13684 V4SF_type_node, V2SI_type_node, NULL_TREE);
13686 /* Miscellaneous. */
13687 tree v8qi_ftype_v4hi_v4hi
13688 = build_function_type_list (V8QI_type_node,
13689 V4HI_type_node, V4HI_type_node, NULL_TREE);
13690 tree v4hi_ftype_v2si_v2si
13691 = build_function_type_list (V4HI_type_node,
13692 V2SI_type_node, V2SI_type_node, NULL_TREE);
13693 tree v4sf_ftype_v4sf_v4sf_int
13694 = build_function_type_list (V4SF_type_node,
13695 V4SF_type_node, V4SF_type_node,
13696 integer_type_node, NULL_TREE);
13697 tree v2si_ftype_v4hi_v4hi
13698 = build_function_type_list (V2SI_type_node,
13699 V4HI_type_node, V4HI_type_node, NULL_TREE);
13700 tree v4hi_ftype_v4hi_int
13701 = build_function_type_list (V4HI_type_node,
13702 V4HI_type_node, integer_type_node, NULL_TREE);
13703 tree v4hi_ftype_v4hi_di
13704 = build_function_type_list (V4HI_type_node,
13705 V4HI_type_node, long_long_unsigned_type_node,
13706 NULL_TREE);
13707 tree v2si_ftype_v2si_di
13708 = build_function_type_list (V2SI_type_node,
13709 V2SI_type_node, long_long_unsigned_type_node,
13710 NULL_TREE);
13711 tree void_ftype_void
13712 = build_function_type (void_type_node, void_list_node);
13713 tree void_ftype_unsigned
13714 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13715 tree void_ftype_unsigned_unsigned
13716 = build_function_type_list (void_type_node, unsigned_type_node,
13717 unsigned_type_node, NULL_TREE);
13718 tree void_ftype_pcvoid_unsigned_unsigned
13719 = build_function_type_list (void_type_node, const_ptr_type_node,
13720 unsigned_type_node, unsigned_type_node,
13721 NULL_TREE);
13722 tree unsigned_ftype_void
13723 = build_function_type (unsigned_type_node, void_list_node);
13724 tree v2si_ftype_v4sf
13725 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13726 /* Loads/stores. */
13727 tree void_ftype_v8qi_v8qi_pchar
13728 = build_function_type_list (void_type_node,
13729 V8QI_type_node, V8QI_type_node,
13730 pchar_type_node, NULL_TREE);
13731 tree v4sf_ftype_pcfloat
13732 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13733 /* @@@ the type is bogus */
13734 tree v4sf_ftype_v4sf_pv2si
13735 = build_function_type_list (V4SF_type_node,
13736 V4SF_type_node, pv2si_type_node, NULL_TREE);
13737 tree void_ftype_pv2si_v4sf
13738 = build_function_type_list (void_type_node,
13739 pv2si_type_node, V4SF_type_node, NULL_TREE);
13740 tree void_ftype_pfloat_v4sf
13741 = build_function_type_list (void_type_node,
13742 pfloat_type_node, V4SF_type_node, NULL_TREE);
13743 tree void_ftype_pdi_di
13744 = build_function_type_list (void_type_node,
13745 pdi_type_node, long_long_unsigned_type_node,
13746 NULL_TREE);
13747 tree void_ftype_pv2di_v2di
13748 = build_function_type_list (void_type_node,
13749 pv2di_type_node, V2DI_type_node, NULL_TREE);
13750 /* Normal vector unops. */
13751 tree v4sf_ftype_v4sf
13752 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13754 /* Normal vector binops. */
13755 tree v4sf_ftype_v4sf_v4sf
13756 = build_function_type_list (V4SF_type_node,
13757 V4SF_type_node, V4SF_type_node, NULL_TREE);
13758 tree v8qi_ftype_v8qi_v8qi
13759 = build_function_type_list (V8QI_type_node,
13760 V8QI_type_node, V8QI_type_node, NULL_TREE);
13761 tree v4hi_ftype_v4hi_v4hi
13762 = build_function_type_list (V4HI_type_node,
13763 V4HI_type_node, V4HI_type_node, NULL_TREE);
13764 tree v2si_ftype_v2si_v2si
13765 = build_function_type_list (V2SI_type_node,
13766 V2SI_type_node, V2SI_type_node, NULL_TREE);
13767 tree di_ftype_di_di
13768 = build_function_type_list (long_long_unsigned_type_node,
13769 long_long_unsigned_type_node,
13770 long_long_unsigned_type_node, NULL_TREE);
13772 tree v2si_ftype_v2sf
13773 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13774 tree v2sf_ftype_v2si
13775 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13776 tree v2si_ftype_v2si
13777 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13778 tree v2sf_ftype_v2sf
13779 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13780 tree v2sf_ftype_v2sf_v2sf
13781 = build_function_type_list (V2SF_type_node,
13782 V2SF_type_node, V2SF_type_node, NULL_TREE);
13783 tree v2si_ftype_v2sf_v2sf
13784 = build_function_type_list (V2SI_type_node,
13785 V2SF_type_node, V2SF_type_node, NULL_TREE);
13786 tree pint_type_node = build_pointer_type (integer_type_node);
13787 tree pdouble_type_node = build_pointer_type (double_type_node);
13788 tree pcdouble_type_node = build_pointer_type (
13789 build_type_variant (double_type_node, 1, 0));
13790 tree int_ftype_v2df_v2df
13791 = build_function_type_list (integer_type_node,
13792 V2DF_type_node, V2DF_type_node, NULL_TREE);
13794 tree ti_ftype_ti_ti
13795 = build_function_type_list (intTI_type_node,
13796 intTI_type_node, intTI_type_node, NULL_TREE);
13797 tree void_ftype_pcvoid
13798 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13799 tree v4sf_ftype_v4si
13800 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13801 tree v4si_ftype_v4sf
13802 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13803 tree v2df_ftype_v4si
13804 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13805 tree v4si_ftype_v2df
13806 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13807 tree v2si_ftype_v2df
13808 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13809 tree v4sf_ftype_v2df
13810 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13811 tree v2df_ftype_v2si
13812 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13813 tree v2df_ftype_v4sf
13814 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13815 tree int_ftype_v2df
13816 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13817 tree int64_ftype_v2df
13818 = build_function_type_list (long_long_integer_type_node,
13819 V2DF_type_node, NULL_TREE);
13820 tree v2df_ftype_v2df_int
13821 = build_function_type_list (V2DF_type_node,
13822 V2DF_type_node, integer_type_node, NULL_TREE);
13823 tree v2df_ftype_v2df_int64
13824 = build_function_type_list (V2DF_type_node,
13825 V2DF_type_node, long_long_integer_type_node,
13826 NULL_TREE);
13827 tree v4sf_ftype_v4sf_v2df
13828 = build_function_type_list (V4SF_type_node,
13829 V4SF_type_node, V2DF_type_node, NULL_TREE);
13830 tree v2df_ftype_v2df_v4sf
13831 = build_function_type_list (V2DF_type_node,
13832 V2DF_type_node, V4SF_type_node, NULL_TREE);
13833 tree v2df_ftype_v2df_v2df_int
13834 = build_function_type_list (V2DF_type_node,
13835 V2DF_type_node, V2DF_type_node,
13836 integer_type_node,
13837 NULL_TREE);
13838 tree v2df_ftype_v2df_pcdouble
13839 = build_function_type_list (V2DF_type_node,
13840 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13841 tree void_ftype_pdouble_v2df
13842 = build_function_type_list (void_type_node,
13843 pdouble_type_node, V2DF_type_node, NULL_TREE);
13844 tree void_ftype_pint_int
13845 = build_function_type_list (void_type_node,
13846 pint_type_node, integer_type_node, NULL_TREE);
13847 tree void_ftype_v16qi_v16qi_pchar
13848 = build_function_type_list (void_type_node,
13849 V16QI_type_node, V16QI_type_node,
13850 pchar_type_node, NULL_TREE);
13851 tree v2df_ftype_pcdouble
13852 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13853 tree v2df_ftype_v2df_v2df
13854 = build_function_type_list (V2DF_type_node,
13855 V2DF_type_node, V2DF_type_node, NULL_TREE);
13856 tree v16qi_ftype_v16qi_v16qi
13857 = build_function_type_list (V16QI_type_node,
13858 V16QI_type_node, V16QI_type_node, NULL_TREE);
13859 tree v8hi_ftype_v8hi_v8hi
13860 = build_function_type_list (V8HI_type_node,
13861 V8HI_type_node, V8HI_type_node, NULL_TREE);
13862 tree v4si_ftype_v4si_v4si
13863 = build_function_type_list (V4SI_type_node,
13864 V4SI_type_node, V4SI_type_node, NULL_TREE);
13865 tree v2di_ftype_v2di_v2di
13866 = build_function_type_list (V2DI_type_node,
13867 V2DI_type_node, V2DI_type_node, NULL_TREE);
13868 tree v2di_ftype_v2df_v2df
13869 = build_function_type_list (V2DI_type_node,
13870 V2DF_type_node, V2DF_type_node, NULL_TREE);
13871 tree v2df_ftype_v2df
13872 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13873 tree v2di_ftype_v2di_int
13874 = build_function_type_list (V2DI_type_node,
13875 V2DI_type_node, integer_type_node, NULL_TREE);
13876 tree v4si_ftype_v4si_int
13877 = build_function_type_list (V4SI_type_node,
13878 V4SI_type_node, integer_type_node, NULL_TREE);
13879 tree v8hi_ftype_v8hi_int
13880 = build_function_type_list (V8HI_type_node,
13881 V8HI_type_node, integer_type_node, NULL_TREE);
13882 tree v8hi_ftype_v8hi_v2di
13883 = build_function_type_list (V8HI_type_node,
13884 V8HI_type_node, V2DI_type_node, NULL_TREE);
13885 tree v4si_ftype_v4si_v2di
13886 = build_function_type_list (V4SI_type_node,
13887 V4SI_type_node, V2DI_type_node, NULL_TREE);
13888 tree v4si_ftype_v8hi_v8hi
13889 = build_function_type_list (V4SI_type_node,
13890 V8HI_type_node, V8HI_type_node, NULL_TREE);
13891 tree di_ftype_v8qi_v8qi
13892 = build_function_type_list (long_long_unsigned_type_node,
13893 V8QI_type_node, V8QI_type_node, NULL_TREE);
13894 tree di_ftype_v2si_v2si
13895 = build_function_type_list (long_long_unsigned_type_node,
13896 V2SI_type_node, V2SI_type_node, NULL_TREE);
13897 tree v2di_ftype_v16qi_v16qi
13898 = build_function_type_list (V2DI_type_node,
13899 V16QI_type_node, V16QI_type_node, NULL_TREE);
13900 tree v2di_ftype_v4si_v4si
13901 = build_function_type_list (V2DI_type_node,
13902 V4SI_type_node, V4SI_type_node, NULL_TREE);
13903 tree int_ftype_v16qi
13904 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13905 tree v16qi_ftype_pcchar
13906 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13907 tree void_ftype_pchar_v16qi
13908 = build_function_type_list (void_type_node,
13909 pchar_type_node, V16QI_type_node, NULL_TREE);
13911 tree float80_type;
13912 tree float128_type;
13913 tree ftype;
13915 /* The __float80 type. */
13916 if (TYPE_MODE (long_double_type_node) == XFmode)
13917 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13918 "__float80");
13919 else
13921 /* The __float80 type. */
13922 float80_type = make_node (REAL_TYPE);
13923 TYPE_PRECISION (float80_type) = 80;
13924 layout_type (float80_type);
13925 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13928 float128_type = make_node (REAL_TYPE);
13929 TYPE_PRECISION (float128_type) = 128;
13930 layout_type (float128_type);
13931 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13933 /* Add all builtins that are more or less simple operations on two
13934 operands. */
13935 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13937 /* Use one of the operands; the target can have a different mode for
13938 mask-generating compares. */
13939 enum machine_mode mode;
13940 tree type;
13942 if (d->name == 0)
13943 continue;
13944 mode = insn_data[d->icode].operand[1].mode;
13946 switch (mode)
13948 case V16QImode:
13949 type = v16qi_ftype_v16qi_v16qi;
13950 break;
13951 case V8HImode:
13952 type = v8hi_ftype_v8hi_v8hi;
13953 break;
13954 case V4SImode:
13955 type = v4si_ftype_v4si_v4si;
13956 break;
13957 case V2DImode:
13958 type = v2di_ftype_v2di_v2di;
13959 break;
13960 case V2DFmode:
13961 type = v2df_ftype_v2df_v2df;
13962 break;
13963 case TImode:
13964 type = ti_ftype_ti_ti;
13965 break;
13966 case V4SFmode:
13967 type = v4sf_ftype_v4sf_v4sf;
13968 break;
13969 case V8QImode:
13970 type = v8qi_ftype_v8qi_v8qi;
13971 break;
13972 case V4HImode:
13973 type = v4hi_ftype_v4hi_v4hi;
13974 break;
13975 case V2SImode:
13976 type = v2si_ftype_v2si_v2si;
13977 break;
13978 case DImode:
13979 type = di_ftype_di_di;
13980 break;
13982 default:
13983 gcc_unreachable ();
13986 /* Override for comparisons. */
13987 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13988 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13989 type = v4si_ftype_v4sf_v4sf;
13991 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13992 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13993 type = v2di_ftype_v2df_v2df;
13995 def_builtin (d->mask, d->name, type, d->code);
13998 /* Add the remaining MMX insns with somewhat more complicated types. */
13999 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14000 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14001 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14002 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14004 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14005 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14006 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14008 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14009 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
14011 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
14012 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
14014 /* comi/ucomi insns. */
14015 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14016 if (d->mask == MASK_SSE2)
14017 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
14018 else
14019 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
14021 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
14022 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
14023 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
14025 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
14026 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
14027 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
14028 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
14029 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14030 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14031 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14032 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14033 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14034 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14035 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14037 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14039 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14040 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14042 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14043 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14044 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14045 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14047 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14048 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14049 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14050 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14052 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14054 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14056 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14057 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14058 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14059 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14060 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14061 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14063 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14065 /* Original 3DNow! */
14066 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14067 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14068 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14069 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14070 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14071 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14072 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14073 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14074 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14075 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14076 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14077 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14078 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14079 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14080 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14081 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14082 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14083 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14084 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14085 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14087 /* 3DNow! extension as used in the Athlon CPU. */
14088 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14089 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14090 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14091 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14092 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14093 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14095 /* SSE2 */
14096 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14098 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14099 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14101 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14102 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14104 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14105 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14106 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14107 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14108 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14110 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14111 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14112 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14113 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14115 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14116 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14118 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14120 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14121 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14123 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14124 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14125 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14126 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14127 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14129 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14131 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14132 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14133 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14134 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14136 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14137 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14138 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14140 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14141 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14142 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14143 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14145 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14146 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14147 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14149 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14150 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14152 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14153 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14155 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
14156 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
14157 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14159 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
14160 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
14161 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14163 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
14164 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
14166 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14167 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14168 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14169 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14171 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14172 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14173 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14174 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14176 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14177 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14179 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14181 /* Prescott New Instructions. */
14182 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14183 void_ftype_pcvoid_unsigned_unsigned,
14184 IX86_BUILTIN_MONITOR);
14185 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14186 void_ftype_unsigned_unsigned,
14187 IX86_BUILTIN_MWAIT);
14188 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14189 v4sf_ftype_v4sf,
14190 IX86_BUILTIN_MOVSHDUP);
14191 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14192 v4sf_ftype_v4sf,
14193 IX86_BUILTIN_MOVSLDUP);
14194 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14195 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14197 /* Access to the vec_init patterns. */
14198 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14199 integer_type_node, NULL_TREE);
14200 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14201 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14203 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14204 short_integer_type_node,
14205 short_integer_type_node,
14206 short_integer_type_node, NULL_TREE);
14207 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14208 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14210 ftype = build_function_type_list (V8QI_type_node, char_type_node,
14211 char_type_node, char_type_node,
14212 char_type_node, char_type_node,
14213 char_type_node, char_type_node,
14214 char_type_node, NULL_TREE);
14215 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14216 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14218 /* Access to the vec_extract patterns. */
14219 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14220 integer_type_node, NULL_TREE);
14221 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14222 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14224 ftype = build_function_type_list (long_long_integer_type_node,
14225 V2DI_type_node, integer_type_node,
14226 NULL_TREE);
14227 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14228 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14230 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14231 integer_type_node, NULL_TREE);
14232 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14233 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14235 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14236 integer_type_node, NULL_TREE);
14237 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14238 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14240 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14241 integer_type_node, NULL_TREE);
14242 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14243 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14245 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14246 integer_type_node, NULL_TREE);
14247 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14248 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14250 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14251 integer_type_node, NULL_TREE);
14252 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14253 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14255 /* Access to the vec_set patterns. */
14256 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14257 intHI_type_node,
14258 integer_type_node, NULL_TREE);
14259 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14260 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14262 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14263 intHI_type_node,
14264 integer_type_node, NULL_TREE);
14265 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14266 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14269 /* Errors in the source file can cause expand_expr to return const0_rtx
14270 where we expect a vector. To avoid crashing, use one of the vector
14271 clear instructions. */
14272 static rtx
14273 safe_vector_operand (rtx x, enum machine_mode mode)
14275 if (x == const0_rtx)
14276 x = CONST0_RTX (mode);
14277 return x;
14280 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14282 static rtx
14283 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14285 rtx pat, xops[3];
14286 tree arg0 = TREE_VALUE (arglist);
14287 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14288 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14289 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14290 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14291 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14292 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14294 if (VECTOR_MODE_P (mode0))
14295 op0 = safe_vector_operand (op0, mode0);
14296 if (VECTOR_MODE_P (mode1))
14297 op1 = safe_vector_operand (op1, mode1);
14299 if (optimize || !target
14300 || GET_MODE (target) != tmode
14301 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14302 target = gen_reg_rtx (tmode);
14304 if (GET_MODE (op1) == SImode && mode1 == TImode)
14306 rtx x = gen_reg_rtx (V4SImode);
14307 emit_insn (gen_sse2_loadd (x, op1));
14308 op1 = gen_lowpart (TImode, x);
14311 /* The insn must want input operands in the same modes as the
14312 result. */
14313 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
14314 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
14316 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14317 op0 = copy_to_mode_reg (mode0, op0);
14318 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14319 op1 = copy_to_mode_reg (mode1, op1);
14321 /* ??? Using ix86_fixup_binary_operands is problematic when
14322 we've got mismatched modes. Fake it. */
14324 xops[0] = target;
14325 xops[1] = op0;
14326 xops[2] = op1;
14328 if (tmode == mode0 && tmode == mode1)
14330 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14331 op0 = xops[1];
14332 op1 = xops[2];
14334 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14336 op0 = force_reg (mode0, op0);
14337 op1 = force_reg (mode1, op1);
14338 target = gen_reg_rtx (tmode);
14341 pat = GEN_FCN (icode) (target, op0, op1);
14342 if (! pat)
14343 return 0;
14344 emit_insn (pat);
14345 return target;
14348 /* Subroutine of ix86_expand_builtin to take care of stores. */
14350 static rtx
14351 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14353 rtx pat;
14354 tree arg0 = TREE_VALUE (arglist);
14355 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14356 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14357 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14358 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14359 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14361 if (VECTOR_MODE_P (mode1))
14362 op1 = safe_vector_operand (op1, mode1);
14364 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14365 op1 = copy_to_mode_reg (mode1, op1);
14367 pat = GEN_FCN (icode) (op0, op1);
14368 if (pat)
14369 emit_insn (pat);
14370 return 0;
14373 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14375 static rtx
14376 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14377 rtx target, int do_load)
14379 rtx pat;
14380 tree arg0 = TREE_VALUE (arglist);
14381 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14382 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14383 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14385 if (optimize || !target
14386 || GET_MODE (target) != tmode
14387 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14388 target = gen_reg_rtx (tmode);
14389 if (do_load)
14390 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14391 else
14393 if (VECTOR_MODE_P (mode0))
14394 op0 = safe_vector_operand (op0, mode0);
14396 if ((optimize && !register_operand (op0, mode0))
14397 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14398 op0 = copy_to_mode_reg (mode0, op0);
14401 pat = GEN_FCN (icode) (target, op0);
14402 if (! pat)
14403 return 0;
14404 emit_insn (pat);
14405 return target;
14408 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14409 sqrtss, rsqrtss, rcpss. */
14411 static rtx
14412 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14414 rtx pat;
14415 tree arg0 = TREE_VALUE (arglist);
14416 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14417 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14418 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14420 if (optimize || !target
14421 || GET_MODE (target) != tmode
14422 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14423 target = gen_reg_rtx (tmode);
14425 if (VECTOR_MODE_P (mode0))
14426 op0 = safe_vector_operand (op0, mode0);
14428 if ((optimize && !register_operand (op0, mode0))
14429 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14430 op0 = copy_to_mode_reg (mode0, op0);
14432 op1 = op0;
14433 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14434 op1 = copy_to_mode_reg (mode0, op1);
14436 pat = GEN_FCN (icode) (target, op0, op1);
14437 if (! pat)
14438 return 0;
14439 emit_insn (pat);
14440 return target;
14443 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14445 static rtx
14446 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14447 rtx target)
14449 rtx pat;
14450 tree arg0 = TREE_VALUE (arglist);
14451 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14452 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14453 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14454 rtx op2;
14455 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14456 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14457 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14458 enum rtx_code comparison = d->comparison;
14460 if (VECTOR_MODE_P (mode0))
14461 op0 = safe_vector_operand (op0, mode0);
14462 if (VECTOR_MODE_P (mode1))
14463 op1 = safe_vector_operand (op1, mode1);
14465 /* Swap operands if we have a comparison that isn't available in
14466 hardware. */
14467 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14469 rtx tmp = gen_reg_rtx (mode1);
14470 emit_move_insn (tmp, op1);
14471 op1 = op0;
14472 op0 = tmp;
14475 if (optimize || !target
14476 || GET_MODE (target) != tmode
14477 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14478 target = gen_reg_rtx (tmode);
14480 if ((optimize && !register_operand (op0, mode0))
14481 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14482 op0 = copy_to_mode_reg (mode0, op0);
14483 if ((optimize && !register_operand (op1, mode1))
14484 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14485 op1 = copy_to_mode_reg (mode1, op1);
14487 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14488 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14489 if (! pat)
14490 return 0;
14491 emit_insn (pat);
14492 return target;
14495 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14497 static rtx
14498 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14499 rtx target)
14501 rtx pat;
14502 tree arg0 = TREE_VALUE (arglist);
14503 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14504 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14505 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14506 rtx op2;
14507 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14508 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14509 enum rtx_code comparison = d->comparison;
14511 if (VECTOR_MODE_P (mode0))
14512 op0 = safe_vector_operand (op0, mode0);
14513 if (VECTOR_MODE_P (mode1))
14514 op1 = safe_vector_operand (op1, mode1);
14516 /* Swap operands if we have a comparison that isn't available in
14517 hardware. */
14518 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14520 rtx tmp = op1;
14521 op1 = op0;
14522 op0 = tmp;
14525 target = gen_reg_rtx (SImode);
14526 emit_move_insn (target, const0_rtx);
14527 target = gen_rtx_SUBREG (QImode, target, 0);
14529 if ((optimize && !register_operand (op0, mode0))
14530 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14531 op0 = copy_to_mode_reg (mode0, op0);
14532 if ((optimize && !register_operand (op1, mode1))
14533 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14534 op1 = copy_to_mode_reg (mode1, op1);
14536 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14537 pat = GEN_FCN (d->icode) (op0, op1);
14538 if (! pat)
14539 return 0;
14540 emit_insn (pat);
14541 emit_insn (gen_rtx_SET (VOIDmode,
14542 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14543 gen_rtx_fmt_ee (comparison, QImode,
14544 SET_DEST (pat),
14545 const0_rtx)));
14547 return SUBREG_REG (target);
14550 /* Return the integer constant in ARG. Constrain it to be in the range
14551 of the subparts of VEC_TYPE; issue an error if not. */
14553 static int
14554 get_element_number (tree vec_type, tree arg)
14556 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14558 if (!host_integerp (arg, 1)
14559 || (elt = tree_low_cst (arg, 1), elt > max))
14561 error ("selector must be an integer constant in the range 0..%i", max);
14562 return 0;
14565 return elt;
14568 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14569 ix86_expand_vector_init. We DO have language-level syntax for this, in
14570 the form of (type){ init-list }. Except that since we can't place emms
14571 instructions from inside the compiler, we can't allow the use of MMX
14572 registers unless the user explicitly asks for it. So we do *not* define
14573 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14574 we have builtins invoked by mmintrin.h that gives us license to emit
14575 these sorts of instructions. */
14577 static rtx
14578 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14580 enum machine_mode tmode = TYPE_MODE (type);
14581 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14582 int i, n_elt = GET_MODE_NUNITS (tmode);
14583 rtvec v = rtvec_alloc (n_elt);
14585 gcc_assert (VECTOR_MODE_P (tmode));
14587 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14589 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14590 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14593 gcc_assert (arglist == NULL);
14595 if (!target || !register_operand (target, tmode))
14596 target = gen_reg_rtx (tmode);
14598 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14599 return target;
14602 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14603 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14604 had a language-level syntax for referencing vector elements. */
14606 static rtx
14607 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14609 enum machine_mode tmode, mode0;
14610 tree arg0, arg1;
14611 int elt;
14612 rtx op0;
14614 arg0 = TREE_VALUE (arglist);
14615 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14617 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14618 elt = get_element_number (TREE_TYPE (arg0), arg1);
14620 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14621 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14622 gcc_assert (VECTOR_MODE_P (mode0));
14624 op0 = force_reg (mode0, op0);
14626 if (optimize || !target || !register_operand (target, tmode))
14627 target = gen_reg_rtx (tmode);
14629 ix86_expand_vector_extract (true, target, op0, elt);
14631 return target;
14634 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14635 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14636 a language-level syntax for referencing vector elements. */
14638 static rtx
14639 ix86_expand_vec_set_builtin (tree arglist)
14641 enum machine_mode tmode, mode1;
14642 tree arg0, arg1, arg2;
14643 int elt;
14644 rtx op0, op1;
14646 arg0 = TREE_VALUE (arglist);
14647 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14648 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14650 tmode = TYPE_MODE (TREE_TYPE (arg0));
14651 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14652 gcc_assert (VECTOR_MODE_P (tmode));
14654 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14655 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14656 elt = get_element_number (TREE_TYPE (arg0), arg2);
14658 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14659 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14661 op0 = force_reg (tmode, op0);
14662 op1 = force_reg (mode1, op1);
14664 ix86_expand_vector_set (true, op0, op1, elt);
14666 return op0;
14669 /* Expand an expression EXP that calls a built-in function,
14670 with result going to TARGET if that's convenient
14671 (and in mode MODE if that's convenient).
14672 SUBTARGET may be used as the target for computing one of EXP's operands.
14673 IGNORE is nonzero if the value is to be ignored. */
14675 static rtx
14676 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14677 enum machine_mode mode ATTRIBUTE_UNUSED,
14678 int ignore ATTRIBUTE_UNUSED)
14680 const struct builtin_description *d;
14681 size_t i;
14682 enum insn_code icode;
14683 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14684 tree arglist = TREE_OPERAND (exp, 1);
14685 tree arg0, arg1, arg2;
14686 rtx op0, op1, op2, pat;
14687 enum machine_mode tmode, mode0, mode1, mode2;
14688 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14690 switch (fcode)
14692 case IX86_BUILTIN_EMMS:
14693 emit_insn (gen_mmx_emms ());
14694 return 0;
14696 case IX86_BUILTIN_SFENCE:
14697 emit_insn (gen_sse_sfence ());
14698 return 0;
14700 case IX86_BUILTIN_MASKMOVQ:
14701 case IX86_BUILTIN_MASKMOVDQU:
14702 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14703 ? CODE_FOR_mmx_maskmovq
14704 : CODE_FOR_sse2_maskmovdqu);
14705 /* Note the arg order is different from the operand order. */
14706 arg1 = TREE_VALUE (arglist);
14707 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14708 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14709 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14710 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14711 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14712 mode0 = insn_data[icode].operand[0].mode;
14713 mode1 = insn_data[icode].operand[1].mode;
14714 mode2 = insn_data[icode].operand[2].mode;
14716 op0 = force_reg (Pmode, op0);
14717 op0 = gen_rtx_MEM (mode1, op0);
14719 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14720 op0 = copy_to_mode_reg (mode0, op0);
14721 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14722 op1 = copy_to_mode_reg (mode1, op1);
14723 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14724 op2 = copy_to_mode_reg (mode2, op2);
14725 pat = GEN_FCN (icode) (op0, op1, op2);
14726 if (! pat)
14727 return 0;
14728 emit_insn (pat);
14729 return 0;
14731 case IX86_BUILTIN_SQRTSS:
14732 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14733 case IX86_BUILTIN_RSQRTSS:
14734 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14735 case IX86_BUILTIN_RCPSS:
14736 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14738 case IX86_BUILTIN_LOADUPS:
14739 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14741 case IX86_BUILTIN_STOREUPS:
14742 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14744 case IX86_BUILTIN_LOADHPS:
14745 case IX86_BUILTIN_LOADLPS:
14746 case IX86_BUILTIN_LOADHPD:
14747 case IX86_BUILTIN_LOADLPD:
14748 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14749 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14750 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14751 : CODE_FOR_sse2_loadlpd);
14752 arg0 = TREE_VALUE (arglist);
14753 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14754 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14755 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14756 tmode = insn_data[icode].operand[0].mode;
14757 mode0 = insn_data[icode].operand[1].mode;
14758 mode1 = insn_data[icode].operand[2].mode;
14760 op0 = force_reg (mode0, op0);
14761 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14762 if (optimize || target == 0
14763 || GET_MODE (target) != tmode
14764 || !register_operand (target, tmode))
14765 target = gen_reg_rtx (tmode);
14766 pat = GEN_FCN (icode) (target, op0, op1);
14767 if (! pat)
14768 return 0;
14769 emit_insn (pat);
14770 return target;
14772 case IX86_BUILTIN_STOREHPS:
14773 case IX86_BUILTIN_STORELPS:
14774 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14775 : CODE_FOR_sse_storelps);
14776 arg0 = TREE_VALUE (arglist);
14777 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14778 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14779 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14780 mode0 = insn_data[icode].operand[0].mode;
14781 mode1 = insn_data[icode].operand[1].mode;
14783 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14784 op1 = force_reg (mode1, op1);
14786 pat = GEN_FCN (icode) (op0, op1);
14787 if (! pat)
14788 return 0;
14789 emit_insn (pat);
14790 return const0_rtx;
14792 case IX86_BUILTIN_MOVNTPS:
14793 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14794 case IX86_BUILTIN_MOVNTQ:
14795 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14797 case IX86_BUILTIN_LDMXCSR:
14798 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14799 target = assign_386_stack_local (SImode, 0);
14800 emit_move_insn (target, op0);
14801 emit_insn (gen_sse_ldmxcsr (target));
14802 return 0;
14804 case IX86_BUILTIN_STMXCSR:
14805 target = assign_386_stack_local (SImode, 0);
14806 emit_insn (gen_sse_stmxcsr (target));
14807 return copy_to_mode_reg (SImode, target);
14809 case IX86_BUILTIN_SHUFPS:
14810 case IX86_BUILTIN_SHUFPD:
14811 icode = (fcode == IX86_BUILTIN_SHUFPS
14812 ? CODE_FOR_sse_shufps
14813 : CODE_FOR_sse2_shufpd);
14814 arg0 = TREE_VALUE (arglist);
14815 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14816 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14817 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14818 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14819 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14820 tmode = insn_data[icode].operand[0].mode;
14821 mode0 = insn_data[icode].operand[1].mode;
14822 mode1 = insn_data[icode].operand[2].mode;
14823 mode2 = insn_data[icode].operand[3].mode;
14825 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14826 op0 = copy_to_mode_reg (mode0, op0);
14827 if ((optimize && !register_operand (op1, mode1))
14828 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14829 op1 = copy_to_mode_reg (mode1, op1);
14830 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14832 /* @@@ better error message */
14833 error ("mask must be an immediate");
14834 return gen_reg_rtx (tmode);
14836 if (optimize || target == 0
14837 || GET_MODE (target) != tmode
14838 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14839 target = gen_reg_rtx (tmode);
14840 pat = GEN_FCN (icode) (target, op0, op1, op2);
14841 if (! pat)
14842 return 0;
14843 emit_insn (pat);
14844 return target;
14846 case IX86_BUILTIN_PSHUFW:
14847 case IX86_BUILTIN_PSHUFD:
14848 case IX86_BUILTIN_PSHUFHW:
14849 case IX86_BUILTIN_PSHUFLW:
14850 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14851 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14852 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14853 : CODE_FOR_mmx_pshufw);
14854 arg0 = TREE_VALUE (arglist);
14855 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14856 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14857 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14858 tmode = insn_data[icode].operand[0].mode;
14859 mode1 = insn_data[icode].operand[1].mode;
14860 mode2 = insn_data[icode].operand[2].mode;
14862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14863 op0 = copy_to_mode_reg (mode1, op0);
14864 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14866 /* @@@ better error message */
14867 error ("mask must be an immediate");
14868 return const0_rtx;
14870 if (target == 0
14871 || GET_MODE (target) != tmode
14872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14873 target = gen_reg_rtx (tmode);
14874 pat = GEN_FCN (icode) (target, op0, op1);
14875 if (! pat)
14876 return 0;
14877 emit_insn (pat);
14878 return target;
14880 case IX86_BUILTIN_PSLLDQI128:
14881 case IX86_BUILTIN_PSRLDQI128:
14882 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14883 : CODE_FOR_sse2_lshrti3);
14884 arg0 = TREE_VALUE (arglist);
14885 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14886 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14887 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14888 tmode = insn_data[icode].operand[0].mode;
14889 mode1 = insn_data[icode].operand[1].mode;
14890 mode2 = insn_data[icode].operand[2].mode;
14892 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14894 op0 = copy_to_reg (op0);
14895 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14897 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14899 error ("shift must be an immediate");
14900 return const0_rtx;
14902 target = gen_reg_rtx (V2DImode);
14903 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14904 if (! pat)
14905 return 0;
14906 emit_insn (pat);
14907 return target;
14909 case IX86_BUILTIN_FEMMS:
14910 emit_insn (gen_mmx_femms ());
14911 return NULL_RTX;
14913 case IX86_BUILTIN_PAVGUSB:
14914 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14916 case IX86_BUILTIN_PF2ID:
14917 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14919 case IX86_BUILTIN_PFACC:
14920 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14922 case IX86_BUILTIN_PFADD:
14923 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14925 case IX86_BUILTIN_PFCMPEQ:
14926 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14928 case IX86_BUILTIN_PFCMPGE:
14929 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14931 case IX86_BUILTIN_PFCMPGT:
14932 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14934 case IX86_BUILTIN_PFMAX:
14935 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14937 case IX86_BUILTIN_PFMIN:
14938 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14940 case IX86_BUILTIN_PFMUL:
14941 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14943 case IX86_BUILTIN_PFRCP:
14944 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14946 case IX86_BUILTIN_PFRCPIT1:
14947 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14949 case IX86_BUILTIN_PFRCPIT2:
14950 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14952 case IX86_BUILTIN_PFRSQIT1:
14953 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14955 case IX86_BUILTIN_PFRSQRT:
14956 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14958 case IX86_BUILTIN_PFSUB:
14959 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14961 case IX86_BUILTIN_PFSUBR:
14962 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14964 case IX86_BUILTIN_PI2FD:
14965 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14967 case IX86_BUILTIN_PMULHRW:
14968 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14970 case IX86_BUILTIN_PF2IW:
14971 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14973 case IX86_BUILTIN_PFNACC:
14974 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14976 case IX86_BUILTIN_PFPNACC:
14977 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14979 case IX86_BUILTIN_PI2FW:
14980 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14982 case IX86_BUILTIN_PSWAPDSI:
14983 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14985 case IX86_BUILTIN_PSWAPDSF:
14986 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14988 case IX86_BUILTIN_SQRTSD:
14989 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14990 case IX86_BUILTIN_LOADUPD:
14991 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14992 case IX86_BUILTIN_STOREUPD:
14993 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14995 case IX86_BUILTIN_MFENCE:
14996 emit_insn (gen_sse2_mfence ());
14997 return 0;
14998 case IX86_BUILTIN_LFENCE:
14999 emit_insn (gen_sse2_lfence ());
15000 return 0;
15002 case IX86_BUILTIN_CLFLUSH:
15003 arg0 = TREE_VALUE (arglist);
15004 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15005 icode = CODE_FOR_sse2_clflush;
15006 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15007 op0 = copy_to_mode_reg (Pmode, op0);
15009 emit_insn (gen_sse2_clflush (op0));
15010 return 0;
15012 case IX86_BUILTIN_MOVNTPD:
15013 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
15014 case IX86_BUILTIN_MOVNTDQ:
15015 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
15016 case IX86_BUILTIN_MOVNTI:
15017 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
15019 case IX86_BUILTIN_LOADDQU:
15020 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
15021 case IX86_BUILTIN_STOREDQU:
15022 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
15024 case IX86_BUILTIN_MONITOR:
15025 arg0 = TREE_VALUE (arglist);
15026 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15027 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15028 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15029 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15030 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15031 if (!REG_P (op0))
15032 op0 = copy_to_mode_reg (SImode, op0);
15033 if (!REG_P (op1))
15034 op1 = copy_to_mode_reg (SImode, op1);
15035 if (!REG_P (op2))
15036 op2 = copy_to_mode_reg (SImode, op2);
15037 emit_insn (gen_sse3_monitor (op0, op1, op2));
15038 return 0;
15040 case IX86_BUILTIN_MWAIT:
15041 arg0 = TREE_VALUE (arglist);
15042 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15043 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15044 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15045 if (!REG_P (op0))
15046 op0 = copy_to_mode_reg (SImode, op0);
15047 if (!REG_P (op1))
15048 op1 = copy_to_mode_reg (SImode, op1);
15049 emit_insn (gen_sse3_mwait (op0, op1));
15050 return 0;
15052 case IX86_BUILTIN_LDDQU:
15053 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15054 target, 1);
15056 case IX86_BUILTIN_VEC_INIT_V2SI:
15057 case IX86_BUILTIN_VEC_INIT_V4HI:
15058 case IX86_BUILTIN_VEC_INIT_V8QI:
15059 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15061 case IX86_BUILTIN_VEC_EXT_V2DF:
15062 case IX86_BUILTIN_VEC_EXT_V2DI:
15063 case IX86_BUILTIN_VEC_EXT_V4SF:
15064 case IX86_BUILTIN_VEC_EXT_V4SI:
15065 case IX86_BUILTIN_VEC_EXT_V8HI:
15066 case IX86_BUILTIN_VEC_EXT_V2SI:
15067 case IX86_BUILTIN_VEC_EXT_V4HI:
15068 return ix86_expand_vec_ext_builtin (arglist, target);
15070 case IX86_BUILTIN_VEC_SET_V8HI:
15071 case IX86_BUILTIN_VEC_SET_V4HI:
15072 return ix86_expand_vec_set_builtin (arglist);
15074 default:
15075 break;
15078 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15079 if (d->code == fcode)
15081 /* Compares are treated specially. */
15082 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15083 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15084 || d->icode == CODE_FOR_sse2_maskcmpv2df3
15085 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15086 return ix86_expand_sse_compare (d, arglist, target);
15088 return ix86_expand_binop_builtin (d->icode, arglist, target);
15091 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15092 if (d->code == fcode)
15093 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15095 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15096 if (d->code == fcode)
15097 return ix86_expand_sse_comi (d, arglist, target);
15099 gcc_unreachable ();
15102 /* Store OPERAND to the memory after reload is completed. This means
15103 that we can't easily use assign_stack_local. */
15105 ix86_force_to_memory (enum machine_mode mode, rtx operand)
15107 rtx result;
15109 gcc_assert (reload_completed);
15110 if (TARGET_RED_ZONE)
15112 result = gen_rtx_MEM (mode,
15113 gen_rtx_PLUS (Pmode,
15114 stack_pointer_rtx,
15115 GEN_INT (-RED_ZONE_SIZE)));
15116 emit_move_insn (result, operand);
15118 else if (!TARGET_RED_ZONE && TARGET_64BIT)
15120 switch (mode)
15122 case HImode:
15123 case SImode:
15124 operand = gen_lowpart (DImode, operand);
15125 /* FALLTHRU */
15126 case DImode:
15127 emit_insn (
15128 gen_rtx_SET (VOIDmode,
15129 gen_rtx_MEM (DImode,
15130 gen_rtx_PRE_DEC (DImode,
15131 stack_pointer_rtx)),
15132 operand));
15133 break;
15134 default:
15135 gcc_unreachable ();
15137 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15139 else
15141 switch (mode)
15143 case DImode:
15145 rtx operands[2];
15146 split_di (&operand, 1, operands, operands + 1);
15147 emit_insn (
15148 gen_rtx_SET (VOIDmode,
15149 gen_rtx_MEM (SImode,
15150 gen_rtx_PRE_DEC (Pmode,
15151 stack_pointer_rtx)),
15152 operands[1]));
15153 emit_insn (
15154 gen_rtx_SET (VOIDmode,
15155 gen_rtx_MEM (SImode,
15156 gen_rtx_PRE_DEC (Pmode,
15157 stack_pointer_rtx)),
15158 operands[0]));
15160 break;
15161 case HImode:
15162 /* It is better to store HImodes as SImodes. */
15163 if (!TARGET_PARTIAL_REG_STALL)
15164 operand = gen_lowpart (SImode, operand);
15165 /* FALLTHRU */
15166 case SImode:
15167 emit_insn (
15168 gen_rtx_SET (VOIDmode,
15169 gen_rtx_MEM (GET_MODE (operand),
15170 gen_rtx_PRE_DEC (SImode,
15171 stack_pointer_rtx)),
15172 operand));
15173 break;
15174 default:
15175 gcc_unreachable ();
15177 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15179 return result;
15182 /* Free operand from the memory. */
15183 void
15184 ix86_free_from_memory (enum machine_mode mode)
15186 if (!TARGET_RED_ZONE)
15188 int size;
15190 if (mode == DImode || TARGET_64BIT)
15191 size = 8;
15192 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
15193 size = 2;
15194 else
15195 size = 4;
15196 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15197 to pop or add instruction if registers are available. */
15198 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15199 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15200 GEN_INT (size))));
15204 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15205 QImode must go into class Q_REGS.
15206 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
15207 movdf to do mem-to-mem moves through integer regs. */
15208 enum reg_class
15209 ix86_preferred_reload_class (rtx x, enum reg_class class)
15211 /* We're only allowed to return a subclass of CLASS. Many of the
15212 following checks fail for NO_REGS, so eliminate that early. */
15213 if (class == NO_REGS)
15214 return NO_REGS;
15216 /* All classes can load zeros. */
15217 if (x == CONST0_RTX (GET_MODE (x)))
15218 return class;
15220 /* Floating-point constants need more complex checks. */
15221 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
15223 /* General regs can load everything. */
15224 if (reg_class_subset_p (class, GENERAL_REGS))
15225 return class;
15227 /* Floats can load 0 and 1 plus some others. Note that we eliminated
15228 zero above. We only want to wind up preferring 80387 registers if
15229 we plan on doing computation with them. */
15230 if (TARGET_80387
15231 && (TARGET_MIX_SSE_I387
15232 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
15233 && standard_80387_constant_p (x))
15235 /* Limit class to non-sse. */
15236 if (class == FLOAT_SSE_REGS)
15237 return FLOAT_REGS;
15238 if (class == FP_TOP_SSE_REGS)
15239 return FP_TOP_REG;
15240 if (class == FP_SECOND_SSE_REGS)
15241 return FP_SECOND_REG;
15242 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
15243 return class;
15246 return NO_REGS;
15248 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
15249 return NO_REGS;
15250 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
15251 return NO_REGS;
15253 /* Generally when we see PLUS here, it's the function invariant
15254 (plus soft-fp const_int). Which can only be computed into general
15255 regs. */
15256 if (GET_CODE (x) == PLUS)
15257 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
15259 /* QImode constants are easy to load, but non-constant QImode data
15260 must go into Q_REGS. */
15261 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15263 if (reg_class_subset_p (class, Q_REGS))
15264 return class;
15265 if (reg_class_subset_p (Q_REGS, class))
15266 return Q_REGS;
15267 return NO_REGS;
15270 return class;
15273 /* If we are copying between general and FP registers, we need a memory
15274 location. The same is true for SSE and MMX registers.
15276 The macro can't work reliably when one of the CLASSES is class containing
15277 registers from multiple units (SSE, MMX, integer). We avoid this by never
15278 combining those units in single alternative in the machine description.
15279 Ensure that this constraint holds to avoid unexpected surprises.
15281 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15282 enforce these sanity checks. */
15285 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15286 enum machine_mode mode, int strict)
15288 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15289 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15290 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15291 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15292 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15293 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15295 gcc_assert (!strict);
15296 return true;
15299 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15300 return true;
15302 /* ??? This is a lie. We do have moves between mmx/general, and for
15303 mmx/sse2. But by saying we need secondary memory we discourage the
15304 register allocator from using the mmx registers unless needed. */
15305 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15306 return true;
15308 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15310 /* SSE1 doesn't have any direct moves from other classes. */
15311 if (!TARGET_SSE2)
15312 return true;
15314 /* If the target says that inter-unit moves are more expensive
15315 than moving through memory, then don't generate them. */
15316 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15317 return true;
15319 /* Between SSE and general, we have moves no larger than word size. */
15320 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15321 return true;
15323 /* ??? For the cost of one register reformat penalty, we could use
15324 the same instructions to move SFmode and DFmode data, but the
15325 relevant move patterns don't support those alternatives. */
15326 if (mode == SFmode || mode == DFmode)
15327 return true;
15330 return false;
15333 /* Return true if the registers in CLASS cannot represent the change from
15334 modes FROM to TO. */
15336 bool
15337 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
15338 enum reg_class class)
15340 if (from == to)
15341 return false;
15343 /* x87 registers can't do subreg at all, as all values are reformated
15344 to extended precision. */
15345 if (MAYBE_FLOAT_CLASS_P (class))
15346 return true;
15348 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
15350 /* Vector registers do not support QI or HImode loads. If we don't
15351 disallow a change to these modes, reload will assume it's ok to
15352 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
15353 the vec_dupv4hi pattern. */
15354 if (GET_MODE_SIZE (from) < 4)
15355 return true;
15357 /* Vector registers do not support subreg with nonzero offsets, which
15358 are otherwise valid for integer registers. Since we can't see
15359 whether we have a nonzero offset from here, prohibit all
15360 nonparadoxical subregs changing size. */
15361 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
15362 return true;
15365 return false;
15368 /* Return the cost of moving data from a register in class CLASS1 to
15369 one in class CLASS2.
15371 It is not required that the cost always equal 2 when FROM is the same as TO;
15372 on some machines it is expensive to move between registers if they are not
15373 general registers. */
15376 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15377 enum reg_class class2)
15379 /* In case we require secondary memory, compute cost of the store followed
15380 by load. In order to avoid bad register allocation choices, we need
15381 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15383 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15385 int cost = 1;
15387 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15388 MEMORY_MOVE_COST (mode, class1, 1));
15389 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15390 MEMORY_MOVE_COST (mode, class2, 1));
15392 /* In case of copying from general_purpose_register we may emit multiple
15393 stores followed by single load causing memory size mismatch stall.
15394 Count this as arbitrarily high cost of 20. */
15395 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15396 cost += 20;
15398 /* In the case of FP/MMX moves, the registers actually overlap, and we
15399 have to switch modes in order to treat them differently. */
15400 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15401 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15402 cost += 20;
15404 return cost;
15407 /* Moves between SSE/MMX and integer unit are expensive. */
15408 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15409 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15410 return ix86_cost->mmxsse_to_integer;
15411 if (MAYBE_FLOAT_CLASS_P (class1))
15412 return ix86_cost->fp_move;
15413 if (MAYBE_SSE_CLASS_P (class1))
15414 return ix86_cost->sse_move;
15415 if (MAYBE_MMX_CLASS_P (class1))
15416 return ix86_cost->mmx_move;
15417 return 2;
15420 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15422 bool
15423 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15425 /* Flags and only flags can only hold CCmode values. */
15426 if (CC_REGNO_P (regno))
15427 return GET_MODE_CLASS (mode) == MODE_CC;
15428 if (GET_MODE_CLASS (mode) == MODE_CC
15429 || GET_MODE_CLASS (mode) == MODE_RANDOM
15430 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15431 return 0;
15432 if (FP_REGNO_P (regno))
15433 return VALID_FP_MODE_P (mode);
15434 if (SSE_REGNO_P (regno))
15436 /* We implement the move patterns for all vector modes into and
15437 out of SSE registers, even when no operation instructions
15438 are available. */
15439 return (VALID_SSE_REG_MODE (mode)
15440 || VALID_SSE2_REG_MODE (mode)
15441 || VALID_MMX_REG_MODE (mode)
15442 || VALID_MMX_REG_MODE_3DNOW (mode));
15444 if (MMX_REGNO_P (regno))
15446 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15447 so if the register is available at all, then we can move data of
15448 the given mode into or out of it. */
15449 return (VALID_MMX_REG_MODE (mode)
15450 || VALID_MMX_REG_MODE_3DNOW (mode));
15453 if (mode == QImode)
15455 /* Take care for QImode values - they can be in non-QI regs,
15456 but then they do cause partial register stalls. */
15457 if (regno < 4 || TARGET_64BIT)
15458 return 1;
15459 if (!TARGET_PARTIAL_REG_STALL)
15460 return 1;
15461 return reload_in_progress || reload_completed;
15463 /* We handle both integer and floats in the general purpose registers. */
15464 else if (VALID_INT_MODE_P (mode))
15465 return 1;
15466 else if (VALID_FP_MODE_P (mode))
15467 return 1;
15468 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15469 on to use that value in smaller contexts, this can easily force a
15470 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15471 supporting DImode, allow it. */
15472 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15473 return 1;
15475 return 0;
15478 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15479 tieable integer mode. */
15481 static bool
15482 ix86_tieable_integer_mode_p (enum machine_mode mode)
15484 switch (mode)
15486 case HImode:
15487 case SImode:
15488 return true;
15490 case QImode:
15491 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15493 case DImode:
15494 return TARGET_64BIT;
15496 default:
15497 return false;
15501 /* Return true if MODE1 is accessible in a register that can hold MODE2
15502 without copying. That is, all register classes that can hold MODE2
15503 can also hold MODE1. */
15505 bool
15506 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15508 if (mode1 == mode2)
15509 return true;
15511 if (ix86_tieable_integer_mode_p (mode1)
15512 && ix86_tieable_integer_mode_p (mode2))
15513 return true;
15515 /* MODE2 being XFmode implies fp stack or general regs, which means we
15516 can tie any smaller floating point modes to it. Note that we do not
15517 tie this with TFmode. */
15518 if (mode2 == XFmode)
15519 return mode1 == SFmode || mode1 == DFmode;
15521 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15522 that we can tie it with SFmode. */
15523 if (mode2 == DFmode)
15524 return mode1 == SFmode;
15526 /* If MODE2 is only appropriate for an SSE register, then tie with
15527 any other mode acceptable to SSE registers. */
15528 if (GET_MODE_SIZE (mode2) >= 8
15529 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
15530 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15532 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15533 with any other mode acceptable to MMX registers. */
15534 if (GET_MODE_SIZE (mode2) == 8
15535 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
15536 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15538 return false;
15541 /* Return the cost of moving data of mode M between a
15542 register and memory. A value of 2 is the default; this cost is
15543 relative to those in `REGISTER_MOVE_COST'.
15545 If moving between registers and memory is more expensive than
15546 between two registers, you should define this macro to express the
15547 relative cost.
15549 Model also increased moving costs of QImode registers in non
15550 Q_REGS classes.
15553 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15555 if (FLOAT_CLASS_P (class))
15557 int index;
15558 switch (mode)
15560 case SFmode:
15561 index = 0;
15562 break;
15563 case DFmode:
15564 index = 1;
15565 break;
15566 case XFmode:
15567 index = 2;
15568 break;
15569 default:
15570 return 100;
15572 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15574 if (SSE_CLASS_P (class))
15576 int index;
15577 switch (GET_MODE_SIZE (mode))
15579 case 4:
15580 index = 0;
15581 break;
15582 case 8:
15583 index = 1;
15584 break;
15585 case 16:
15586 index = 2;
15587 break;
15588 default:
15589 return 100;
15591 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15593 if (MMX_CLASS_P (class))
15595 int index;
15596 switch (GET_MODE_SIZE (mode))
15598 case 4:
15599 index = 0;
15600 break;
15601 case 8:
15602 index = 1;
15603 break;
15604 default:
15605 return 100;
15607 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15609 switch (GET_MODE_SIZE (mode))
15611 case 1:
15612 if (in)
15613 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15614 : ix86_cost->movzbl_load);
15615 else
15616 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15617 : ix86_cost->int_store[0] + 4);
15618 break;
15619 case 2:
15620 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15621 default:
15622 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15623 if (mode == TFmode)
15624 mode = XFmode;
15625 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15626 * (((int) GET_MODE_SIZE (mode)
15627 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15631 /* Compute a (partial) cost for rtx X. Return true if the complete
15632 cost has been computed, and false if subexpressions should be
15633 scanned. In either case, *TOTAL contains the cost result. */
15635 static bool
15636 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15638 enum machine_mode mode = GET_MODE (x);
15640 switch (code)
15642 case CONST_INT:
15643 case CONST:
15644 case LABEL_REF:
15645 case SYMBOL_REF:
15646 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15647 *total = 3;
15648 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15649 *total = 2;
15650 else if (flag_pic && SYMBOLIC_CONST (x)
15651 && (!TARGET_64BIT
15652 || (!GET_CODE (x) != LABEL_REF
15653 && (GET_CODE (x) != SYMBOL_REF
15654 || !SYMBOL_REF_LOCAL_P (x)))))
15655 *total = 1;
15656 else
15657 *total = 0;
15658 return true;
15660 case CONST_DOUBLE:
15661 if (mode == VOIDmode)
15662 *total = 0;
15663 else
15664 switch (standard_80387_constant_p (x))
15666 case 1: /* 0.0 */
15667 *total = 1;
15668 break;
15669 default: /* Other constants */
15670 *total = 2;
15671 break;
15672 case 0:
15673 case -1:
15674 /* Start with (MEM (SYMBOL_REF)), since that's where
15675 it'll probably end up. Add a penalty for size. */
15676 *total = (COSTS_N_INSNS (1)
15677 + (flag_pic != 0 && !TARGET_64BIT)
15678 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15679 break;
15681 return true;
15683 case ZERO_EXTEND:
15684 /* The zero extensions is often completely free on x86_64, so make
15685 it as cheap as possible. */
15686 if (TARGET_64BIT && mode == DImode
15687 && GET_MODE (XEXP (x, 0)) == SImode)
15688 *total = 1;
15689 else if (TARGET_ZERO_EXTEND_WITH_AND)
15690 *total = COSTS_N_INSNS (ix86_cost->add);
15691 else
15692 *total = COSTS_N_INSNS (ix86_cost->movzx);
15693 return false;
15695 case SIGN_EXTEND:
15696 *total = COSTS_N_INSNS (ix86_cost->movsx);
15697 return false;
15699 case ASHIFT:
15700 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15701 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15703 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15704 if (value == 1)
15706 *total = COSTS_N_INSNS (ix86_cost->add);
15707 return false;
15709 if ((value == 2 || value == 3)
15710 && ix86_cost->lea <= ix86_cost->shift_const)
15712 *total = COSTS_N_INSNS (ix86_cost->lea);
15713 return false;
15716 /* FALLTHRU */
15718 case ROTATE:
15719 case ASHIFTRT:
15720 case LSHIFTRT:
15721 case ROTATERT:
15722 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15724 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15726 if (INTVAL (XEXP (x, 1)) > 32)
15727 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15728 else
15729 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15731 else
15733 if (GET_CODE (XEXP (x, 1)) == AND)
15734 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15735 else
15736 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15739 else
15741 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15742 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15743 else
15744 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15746 return false;
15748 case MULT:
15749 if (FLOAT_MODE_P (mode))
15751 *total = COSTS_N_INSNS (ix86_cost->fmul);
15752 return false;
15754 else
15756 rtx op0 = XEXP (x, 0);
15757 rtx op1 = XEXP (x, 1);
15758 int nbits;
15759 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15761 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15762 for (nbits = 0; value != 0; value &= value - 1)
15763 nbits++;
15765 else
15766 /* This is arbitrary. */
15767 nbits = 7;
15769 /* Compute costs correctly for widening multiplication. */
15770 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15771 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15772 == GET_MODE_SIZE (mode))
15774 int is_mulwiden = 0;
15775 enum machine_mode inner_mode = GET_MODE (op0);
15777 if (GET_CODE (op0) == GET_CODE (op1))
15778 is_mulwiden = 1, op1 = XEXP (op1, 0);
15779 else if (GET_CODE (op1) == CONST_INT)
15781 if (GET_CODE (op0) == SIGN_EXTEND)
15782 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15783 == INTVAL (op1);
15784 else
15785 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15788 if (is_mulwiden)
15789 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15792 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15793 + nbits * ix86_cost->mult_bit)
15794 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15796 return true;
15799 case DIV:
15800 case UDIV:
15801 case MOD:
15802 case UMOD:
15803 if (FLOAT_MODE_P (mode))
15804 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15805 else
15806 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15807 return false;
15809 case PLUS:
15810 if (FLOAT_MODE_P (mode))
15811 *total = COSTS_N_INSNS (ix86_cost->fadd);
15812 else if (GET_MODE_CLASS (mode) == MODE_INT
15813 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15815 if (GET_CODE (XEXP (x, 0)) == PLUS
15816 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15817 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15818 && CONSTANT_P (XEXP (x, 1)))
15820 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15821 if (val == 2 || val == 4 || val == 8)
15823 *total = COSTS_N_INSNS (ix86_cost->lea);
15824 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15825 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15826 outer_code);
15827 *total += rtx_cost (XEXP (x, 1), outer_code);
15828 return true;
15831 else if (GET_CODE (XEXP (x, 0)) == MULT
15832 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15834 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15835 if (val == 2 || val == 4 || val == 8)
15837 *total = COSTS_N_INSNS (ix86_cost->lea);
15838 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15839 *total += rtx_cost (XEXP (x, 1), outer_code);
15840 return true;
15843 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15845 *total = COSTS_N_INSNS (ix86_cost->lea);
15846 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15847 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15848 *total += rtx_cost (XEXP (x, 1), outer_code);
15849 return true;
15852 /* FALLTHRU */
15854 case MINUS:
15855 if (FLOAT_MODE_P (mode))
15857 *total = COSTS_N_INSNS (ix86_cost->fadd);
15858 return false;
15860 /* FALLTHRU */
15862 case AND:
15863 case IOR:
15864 case XOR:
15865 if (!TARGET_64BIT && mode == DImode)
15867 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15868 + (rtx_cost (XEXP (x, 0), outer_code)
15869 << (GET_MODE (XEXP (x, 0)) != DImode))
15870 + (rtx_cost (XEXP (x, 1), outer_code)
15871 << (GET_MODE (XEXP (x, 1)) != DImode)));
15872 return true;
15874 /* FALLTHRU */
15876 case NEG:
15877 if (FLOAT_MODE_P (mode))
15879 *total = COSTS_N_INSNS (ix86_cost->fchs);
15880 return false;
15882 /* FALLTHRU */
15884 case NOT:
15885 if (!TARGET_64BIT && mode == DImode)
15886 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15887 else
15888 *total = COSTS_N_INSNS (ix86_cost->add);
15889 return false;
15891 case COMPARE:
15892 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15893 && XEXP (XEXP (x, 0), 1) == const1_rtx
15894 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15895 && XEXP (x, 1) == const0_rtx)
15897 /* This kind of construct is implemented using test[bwl].
15898 Treat it as if we had an AND. */
15899 *total = (COSTS_N_INSNS (ix86_cost->add)
15900 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15901 + rtx_cost (const1_rtx, outer_code));
15902 return true;
15904 return false;
15906 case FLOAT_EXTEND:
15907 if (!TARGET_SSE_MATH
15908 || mode == XFmode
15909 || (mode == DFmode && !TARGET_SSE2))
15910 *total = 0;
15911 return false;
15913 case ABS:
15914 if (FLOAT_MODE_P (mode))
15915 *total = COSTS_N_INSNS (ix86_cost->fabs);
15916 return false;
15918 case SQRT:
15919 if (FLOAT_MODE_P (mode))
15920 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15921 return false;
15923 case UNSPEC:
15924 if (XINT (x, 1) == UNSPEC_TP)
15925 *total = 0;
15926 return false;
15928 default:
15929 return false;
15933 #if TARGET_MACHO
15935 static int current_machopic_label_num;
15937 /* Given a symbol name and its associated stub, write out the
15938 definition of the stub. */
15940 void
15941 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15943 unsigned int length;
15944 char *binder_name, *symbol_name, lazy_ptr_name[32];
15945 int label = ++current_machopic_label_num;
15947 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15948 symb = (*targetm.strip_name_encoding) (symb);
15950 length = strlen (stub);
15951 binder_name = alloca (length + 32);
15952 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15954 length = strlen (symb);
15955 symbol_name = alloca (length + 32);
15956 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15958 sprintf (lazy_ptr_name, "L%d$lz", label);
15960 if (MACHOPIC_PURE)
15961 machopic_picsymbol_stub_section ();
15962 else
15963 machopic_symbol_stub_section ();
15965 fprintf (file, "%s:\n", stub);
15966 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15968 if (MACHOPIC_PURE)
15970 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15971 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15972 fprintf (file, "\tjmp %%edx\n");
15974 else
15975 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15977 fprintf (file, "%s:\n", binder_name);
15979 if (MACHOPIC_PURE)
15981 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15982 fprintf (file, "\tpushl %%eax\n");
15984 else
15985 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15987 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15989 machopic_lazy_symbol_ptr_section ();
15990 fprintf (file, "%s:\n", lazy_ptr_name);
15991 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15992 fprintf (file, "\t.long %s\n", binder_name);
15994 #endif /* TARGET_MACHO */
15996 /* Order the registers for register allocator. */
15998 void
15999 x86_order_regs_for_local_alloc (void)
16001 int pos = 0;
16002 int i;
16004 /* First allocate the local general purpose registers. */
16005 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16006 if (GENERAL_REGNO_P (i) && call_used_regs[i])
16007 reg_alloc_order [pos++] = i;
16009 /* Global general purpose registers. */
16010 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16011 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
16012 reg_alloc_order [pos++] = i;
16014 /* x87 registers come first in case we are doing FP math
16015 using them. */
16016 if (!TARGET_SSE_MATH)
16017 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16018 reg_alloc_order [pos++] = i;
16020 /* SSE registers. */
16021 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16022 reg_alloc_order [pos++] = i;
16023 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16024 reg_alloc_order [pos++] = i;
16026 /* x87 registers. */
16027 if (TARGET_SSE_MATH)
16028 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16029 reg_alloc_order [pos++] = i;
16031 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16032 reg_alloc_order [pos++] = i;
16034 /* Initialize the rest of array as we do not allocate some registers
16035 at all. */
16036 while (pos < FIRST_PSEUDO_REGISTER)
16037 reg_alloc_order [pos++] = 0;
16040 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16041 struct attribute_spec.handler. */
16042 static tree
16043 ix86_handle_struct_attribute (tree *node, tree name,
16044 tree args ATTRIBUTE_UNUSED,
16045 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16047 tree *type = NULL;
16048 if (DECL_P (*node))
16050 if (TREE_CODE (*node) == TYPE_DECL)
16051 type = &TREE_TYPE (*node);
16053 else
16054 type = node;
16056 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16057 || TREE_CODE (*type) == UNION_TYPE)))
16059 warning (OPT_Wattributes, "%qs attribute ignored",
16060 IDENTIFIER_POINTER (name));
16061 *no_add_attrs = true;
16064 else if ((is_attribute_p ("ms_struct", name)
16065 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16066 || ((is_attribute_p ("gcc_struct", name)
16067 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16069 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
16070 IDENTIFIER_POINTER (name));
16071 *no_add_attrs = true;
16074 return NULL_TREE;
16077 static bool
16078 ix86_ms_bitfield_layout_p (tree record_type)
16080 return (TARGET_MS_BITFIELD_LAYOUT &&
16081 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16082 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16085 /* Returns an expression indicating where the this parameter is
16086 located on entry to the FUNCTION. */
16088 static rtx
16089 x86_this_parameter (tree function)
16091 tree type = TREE_TYPE (function);
16093 if (TARGET_64BIT)
16095 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16096 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16099 if (ix86_function_regparm (type, function) > 0)
16101 tree parm;
16103 parm = TYPE_ARG_TYPES (type);
16104 /* Figure out whether or not the function has a variable number of
16105 arguments. */
16106 for (; parm; parm = TREE_CHAIN (parm))
16107 if (TREE_VALUE (parm) == void_type_node)
16108 break;
16109 /* If not, the this parameter is in the first argument. */
16110 if (parm)
16112 int regno = 0;
16113 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16114 regno = 2;
16115 return gen_rtx_REG (SImode, regno);
16119 if (aggregate_value_p (TREE_TYPE (type), type))
16120 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16121 else
16122 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16125 /* Determine whether x86_output_mi_thunk can succeed. */
16127 static bool
16128 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16129 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16130 HOST_WIDE_INT vcall_offset, tree function)
16132 /* 64-bit can handle anything. */
16133 if (TARGET_64BIT)
16134 return true;
16136 /* For 32-bit, everything's fine if we have one free register. */
16137 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16138 return true;
16140 /* Need a free register for vcall_offset. */
16141 if (vcall_offset)
16142 return false;
16144 /* Need a free register for GOT references. */
16145 if (flag_pic && !(*targetm.binds_local_p) (function))
16146 return false;
16148 /* Otherwise ok. */
16149 return true;
16152 /* Output the assembler code for a thunk function. THUNK_DECL is the
16153 declaration for the thunk function itself, FUNCTION is the decl for
16154 the target function. DELTA is an immediate constant offset to be
16155 added to THIS. If VCALL_OFFSET is nonzero, the word at
16156 *(*this + vcall_offset) should be added to THIS. */
16158 static void
16159 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16160 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16161 HOST_WIDE_INT vcall_offset, tree function)
16163 rtx xops[3];
16164 rtx this = x86_this_parameter (function);
16165 rtx this_reg, tmp;
16167 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16168 pull it in now and let DELTA benefit. */
16169 if (REG_P (this))
16170 this_reg = this;
16171 else if (vcall_offset)
16173 /* Put the this parameter into %eax. */
16174 xops[0] = this;
16175 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16176 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16178 else
16179 this_reg = NULL_RTX;
16181 /* Adjust the this parameter by a fixed constant. */
16182 if (delta)
16184 xops[0] = GEN_INT (delta);
16185 xops[1] = this_reg ? this_reg : this;
16186 if (TARGET_64BIT)
16188 if (!x86_64_general_operand (xops[0], DImode))
16190 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16191 xops[1] = tmp;
16192 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16193 xops[0] = tmp;
16194 xops[1] = this;
16196 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16198 else
16199 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16202 /* Adjust the this parameter by a value stored in the vtable. */
16203 if (vcall_offset)
16205 if (TARGET_64BIT)
16206 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16207 else
16209 int tmp_regno = 2 /* ECX */;
16210 if (lookup_attribute ("fastcall",
16211 TYPE_ATTRIBUTES (TREE_TYPE (function))))
16212 tmp_regno = 0 /* EAX */;
16213 tmp = gen_rtx_REG (SImode, tmp_regno);
16216 xops[0] = gen_rtx_MEM (Pmode, this_reg);
16217 xops[1] = tmp;
16218 if (TARGET_64BIT)
16219 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16220 else
16221 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16223 /* Adjust the this parameter. */
16224 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
16225 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
16227 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
16228 xops[0] = GEN_INT (vcall_offset);
16229 xops[1] = tmp2;
16230 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16231 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
16233 xops[1] = this_reg;
16234 if (TARGET_64BIT)
16235 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16236 else
16237 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16240 /* If necessary, drop THIS back to its stack slot. */
16241 if (this_reg && this_reg != this)
16243 xops[0] = this_reg;
16244 xops[1] = this;
16245 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16248 xops[0] = XEXP (DECL_RTL (function), 0);
16249 if (TARGET_64BIT)
16251 if (!flag_pic || (*targetm.binds_local_p) (function))
16252 output_asm_insn ("jmp\t%P0", xops);
16253 else
16255 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
16256 tmp = gen_rtx_CONST (Pmode, tmp);
16257 tmp = gen_rtx_MEM (QImode, tmp);
16258 xops[0] = tmp;
16259 output_asm_insn ("jmp\t%A0", xops);
16262 else
16264 if (!flag_pic || (*targetm.binds_local_p) (function))
16265 output_asm_insn ("jmp\t%P0", xops);
16266 else
16267 #if TARGET_MACHO
16268 if (TARGET_MACHO)
16270 rtx sym_ref = XEXP (DECL_RTL (function), 0);
16271 tmp = (gen_rtx_SYMBOL_REF
16272 (Pmode,
16273 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
16274 tmp = gen_rtx_MEM (QImode, tmp);
16275 xops[0] = tmp;
16276 output_asm_insn ("jmp\t%0", xops);
16278 else
16279 #endif /* TARGET_MACHO */
16281 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
16282 output_set_got (tmp);
16284 xops[1] = tmp;
16285 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
16286 output_asm_insn ("jmp\t{*}%1", xops);
16291 static void
16292 x86_file_start (void)
16294 default_file_start ();
16295 if (X86_FILE_START_VERSION_DIRECTIVE)
16296 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16297 if (X86_FILE_START_FLTUSED)
16298 fputs ("\t.global\t__fltused\n", asm_out_file);
16299 if (ix86_asm_dialect == ASM_INTEL)
16300 fputs ("\t.intel_syntax\n", asm_out_file);
16304 x86_field_alignment (tree field, int computed)
16306 enum machine_mode mode;
16307 tree type = TREE_TYPE (field);
16309 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16310 return computed;
16311 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16312 ? get_inner_array_type (type) : type);
16313 if (mode == DFmode || mode == DCmode
16314 || GET_MODE_CLASS (mode) == MODE_INT
16315 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16316 return MIN (32, computed);
16317 return computed;
16320 /* Output assembler code to FILE to increment profiler label # LABELNO
16321 for profiling a function entry. */
16322 void
16323 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16325 if (TARGET_64BIT)
16326 if (flag_pic)
16328 #ifndef NO_PROFILE_COUNTERS
16329 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16330 #endif
16331 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
16333 else
16335 #ifndef NO_PROFILE_COUNTERS
16336 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
16337 #endif
16338 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16340 else if (flag_pic)
16342 #ifndef NO_PROFILE_COUNTERS
16343 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16344 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
16345 #endif
16346 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
16348 else
16350 #ifndef NO_PROFILE_COUNTERS
16351 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16352 PROFILE_COUNT_REGISTER);
16353 #endif
16354 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16358 /* We don't have exact information about the insn sizes, but we may assume
16359 quite safely that we are informed about all 1 byte insns and memory
16360 address sizes. This is enough to eliminate unnecessary padding in
16361 99% of cases. */
16363 static int
16364 min_insn_size (rtx insn)
16366 int l = 0;
16368 if (!INSN_P (insn) || !active_insn_p (insn))
16369 return 0;
16371 /* Discard alignments we've emit and jump instructions. */
16372 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16373 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16374 return 0;
16375 if (GET_CODE (insn) == JUMP_INSN
16376 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16377 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16378 return 0;
16380 /* Important case - calls are always 5 bytes.
16381 It is common to have many calls in the row. */
16382 if (GET_CODE (insn) == CALL_INSN
16383 && symbolic_reference_mentioned_p (PATTERN (insn))
16384 && !SIBLING_CALL_P (insn))
16385 return 5;
16386 if (get_attr_length (insn) <= 1)
16387 return 1;
16389 /* For normal instructions we may rely on the sizes of addresses
16390 and the presence of symbol to require 4 bytes of encoding.
16391 This is not the case for jumps where references are PC relative. */
16392 if (GET_CODE (insn) != JUMP_INSN)
16394 l = get_attr_length_address (insn);
16395 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16396 l = 4;
16398 if (l)
16399 return 1+l;
16400 else
16401 return 2;
16404 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16405 window. */
16407 static void
16408 ix86_avoid_jump_misspredicts (void)
16410 rtx insn, start = get_insns ();
16411 int nbytes = 0, njumps = 0;
16412 int isjump = 0;
16414 /* Look for all minimal intervals of instructions containing 4 jumps.
16415 The intervals are bounded by START and INSN. NBYTES is the total
16416 size of instructions in the interval including INSN and not including
16417 START. When the NBYTES is smaller than 16 bytes, it is possible
16418 that the end of START and INSN ends up in the same 16byte page.
16420 The smallest offset in the page INSN can start is the case where START
16421 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16422 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16424 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16427 nbytes += min_insn_size (insn);
16428 if (dump_file)
16429 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16430 INSN_UID (insn), min_insn_size (insn));
16431 if ((GET_CODE (insn) == JUMP_INSN
16432 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16433 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16434 || GET_CODE (insn) == CALL_INSN)
16435 njumps++;
16436 else
16437 continue;
16439 while (njumps > 3)
16441 start = NEXT_INSN (start);
16442 if ((GET_CODE (start) == JUMP_INSN
16443 && GET_CODE (PATTERN (start)) != ADDR_VEC
16444 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16445 || GET_CODE (start) == CALL_INSN)
16446 njumps--, isjump = 1;
16447 else
16448 isjump = 0;
16449 nbytes -= min_insn_size (start);
16451 gcc_assert (njumps >= 0);
16452 if (dump_file)
16453 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16454 INSN_UID (start), INSN_UID (insn), nbytes);
16456 if (njumps == 3 && isjump && nbytes < 16)
16458 int padsize = 15 - nbytes + min_insn_size (insn);
16460 if (dump_file)
16461 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16462 INSN_UID (insn), padsize);
16463 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16468 /* AMD Athlon works faster
16469 when RET is not destination of conditional jump or directly preceded
16470 by other jump instruction. We avoid the penalty by inserting NOP just
16471 before the RET instructions in such cases. */
16472 static void
16473 ix86_pad_returns (void)
16475 edge e;
16476 edge_iterator ei;
16478 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16480 basic_block bb = e->src;
16481 rtx ret = BB_END (bb);
16482 rtx prev;
16483 bool replace = false;
16485 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16486 || !maybe_hot_bb_p (bb))
16487 continue;
16488 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16489 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16490 break;
16491 if (prev && GET_CODE (prev) == CODE_LABEL)
16493 edge e;
16494 edge_iterator ei;
16496 FOR_EACH_EDGE (e, ei, bb->preds)
16497 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16498 && !(e->flags & EDGE_FALLTHRU))
16499 replace = true;
16501 if (!replace)
16503 prev = prev_active_insn (ret);
16504 if (prev
16505 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16506 || GET_CODE (prev) == CALL_INSN))
16507 replace = true;
16508 /* Empty functions get branch mispredict even when the jump destination
16509 is not visible to us. */
16510 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16511 replace = true;
16513 if (replace)
16515 emit_insn_before (gen_return_internal_long (), ret);
16516 delete_insn (ret);
16521 /* Implement machine specific optimizations. We implement padding of returns
16522 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16523 static void
16524 ix86_reorg (void)
16526 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16527 ix86_pad_returns ();
16528 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16529 ix86_avoid_jump_misspredicts ();
16532 /* Return nonzero when QImode register that must be represented via REX prefix
16533 is used. */
16534 bool
16535 x86_extended_QIreg_mentioned_p (rtx insn)
16537 int i;
16538 extract_insn_cached (insn);
16539 for (i = 0; i < recog_data.n_operands; i++)
16540 if (REG_P (recog_data.operand[i])
16541 && REGNO (recog_data.operand[i]) >= 4)
16542 return true;
16543 return false;
16546 /* Return nonzero when P points to register encoded via REX prefix.
16547 Called via for_each_rtx. */
16548 static int
16549 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16551 unsigned int regno;
16552 if (!REG_P (*p))
16553 return 0;
16554 regno = REGNO (*p);
16555 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16558 /* Return true when INSN mentions register that must be encoded using REX
16559 prefix. */
16560 bool
16561 x86_extended_reg_mentioned_p (rtx insn)
16563 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16566 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16567 optabs would emit if we didn't have TFmode patterns. */
16569 void
16570 x86_emit_floatuns (rtx operands[2])
16572 rtx neglab, donelab, i0, i1, f0, in, out;
16573 enum machine_mode mode, inmode;
16575 inmode = GET_MODE (operands[1]);
16576 gcc_assert (inmode == SImode || inmode == DImode);
16578 out = operands[0];
16579 in = force_reg (inmode, operands[1]);
16580 mode = GET_MODE (out);
16581 neglab = gen_label_rtx ();
16582 donelab = gen_label_rtx ();
16583 i1 = gen_reg_rtx (Pmode);
16584 f0 = gen_reg_rtx (mode);
16586 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16588 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16589 emit_jump_insn (gen_jump (donelab));
16590 emit_barrier ();
16592 emit_label (neglab);
16594 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16595 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16596 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16597 expand_float (f0, i0, 0);
16598 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16600 emit_label (donelab);
16603 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16604 with all elements equal to VAR. Return true if successful. */
16606 static bool
16607 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16608 rtx target, rtx val)
16610 enum machine_mode smode, wsmode, wvmode;
16611 rtx x;
16613 switch (mode)
16615 case V2SImode:
16616 case V2SFmode:
16617 if (!mmx_ok && !TARGET_SSE)
16618 return false;
16619 /* FALLTHRU */
16621 case V2DFmode:
16622 case V2DImode:
16623 case V4SFmode:
16624 case V4SImode:
16625 val = force_reg (GET_MODE_INNER (mode), val);
16626 x = gen_rtx_VEC_DUPLICATE (mode, val);
16627 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16628 return true;
16630 case V4HImode:
16631 if (!mmx_ok)
16632 return false;
16633 if (TARGET_SSE || TARGET_3DNOW_A)
16635 val = gen_lowpart (SImode, val);
16636 x = gen_rtx_TRUNCATE (HImode, val);
16637 x = gen_rtx_VEC_DUPLICATE (mode, x);
16638 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16639 return true;
16641 else
16643 smode = HImode;
16644 wsmode = SImode;
16645 wvmode = V2SImode;
16646 goto widen;
16649 case V8QImode:
16650 if (!mmx_ok)
16651 return false;
16652 smode = QImode;
16653 wsmode = HImode;
16654 wvmode = V4HImode;
16655 goto widen;
16656 case V8HImode:
16657 smode = HImode;
16658 wsmode = SImode;
16659 wvmode = V4SImode;
16660 goto widen;
16661 case V16QImode:
16662 smode = QImode;
16663 wsmode = HImode;
16664 wvmode = V8HImode;
16665 goto widen;
16666 widen:
16667 /* Replicate the value once into the next wider mode and recurse. */
16668 val = convert_modes (wsmode, smode, val, true);
16669 x = expand_simple_binop (wsmode, ASHIFT, val,
16670 GEN_INT (GET_MODE_BITSIZE (smode)),
16671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16672 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16674 x = gen_reg_rtx (wvmode);
16675 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16676 gcc_unreachable ();
16677 emit_move_insn (target, gen_lowpart (mode, x));
16678 return true;
16680 default:
16681 return false;
16685 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16686 whose low element is VAR, and other elements are zero. Return true
16687 if successful. */
16689 static bool
16690 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16691 rtx target, rtx var)
16693 enum machine_mode vsimode;
16694 rtx x;
16696 switch (mode)
16698 case V2SFmode:
16699 case V2SImode:
16700 if (!mmx_ok && !TARGET_SSE)
16701 return false;
16702 /* FALLTHRU */
16704 case V2DFmode:
16705 case V2DImode:
16706 var = force_reg (GET_MODE_INNER (mode), var);
16707 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16708 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16709 return true;
16711 case V4SFmode:
16712 case V4SImode:
16713 var = force_reg (GET_MODE_INNER (mode), var);
16714 x = gen_rtx_VEC_DUPLICATE (mode, var);
16715 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16716 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16717 return true;
16719 case V8HImode:
16720 case V16QImode:
16721 vsimode = V4SImode;
16722 goto widen;
16723 case V4HImode:
16724 case V8QImode:
16725 if (!mmx_ok)
16726 return false;
16727 vsimode = V2SImode;
16728 goto widen;
16729 widen:
16730 /* Zero extend the variable element to SImode and recurse. */
16731 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16733 x = gen_reg_rtx (vsimode);
16734 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16735 gcc_unreachable ();
16737 emit_move_insn (target, gen_lowpart (mode, x));
16738 return true;
16740 default:
16741 return false;
16745 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16746 consisting of the values in VALS. It is known that all elements
16747 except ONE_VAR are constants. Return true if successful. */
16749 static bool
16750 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16751 rtx target, rtx vals, int one_var)
16753 rtx var = XVECEXP (vals, 0, one_var);
16754 enum machine_mode wmode;
16755 rtx const_vec, x;
16757 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16758 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16760 switch (mode)
16762 case V2DFmode:
16763 case V2DImode:
16764 case V2SFmode:
16765 case V2SImode:
16766 /* For the two element vectors, it's just as easy to use
16767 the general case. */
16768 return false;
16770 case V4SFmode:
16771 case V4SImode:
16772 case V8HImode:
16773 case V4HImode:
16774 break;
16776 case V16QImode:
16777 wmode = V8HImode;
16778 goto widen;
16779 case V8QImode:
16780 wmode = V4HImode;
16781 goto widen;
16782 widen:
16783 /* There's no way to set one QImode entry easily. Combine
16784 the variable value with its adjacent constant value, and
16785 promote to an HImode set. */
16786 x = XVECEXP (vals, 0, one_var ^ 1);
16787 if (one_var & 1)
16789 var = convert_modes (HImode, QImode, var, true);
16790 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16791 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16792 x = GEN_INT (INTVAL (x) & 0xff);
16794 else
16796 var = convert_modes (HImode, QImode, var, true);
16797 x = gen_int_mode (INTVAL (x) << 8, HImode);
16799 if (x != const0_rtx)
16800 var = expand_simple_binop (HImode, IOR, var, x, var,
16801 1, OPTAB_LIB_WIDEN);
16803 x = gen_reg_rtx (wmode);
16804 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16805 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16807 emit_move_insn (target, gen_lowpart (mode, x));
16808 return true;
16810 default:
16811 return false;
16814 emit_move_insn (target, const_vec);
16815 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16816 return true;
16819 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16820 all values variable, and none identical. */
16822 static void
16823 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16824 rtx target, rtx vals)
16826 enum machine_mode half_mode = GET_MODE_INNER (mode);
16827 rtx op0 = NULL, op1 = NULL;
16828 bool use_vec_concat = false;
16830 switch (mode)
16832 case V2SFmode:
16833 case V2SImode:
16834 if (!mmx_ok && !TARGET_SSE)
16835 break;
16836 /* FALLTHRU */
16838 case V2DFmode:
16839 case V2DImode:
16840 /* For the two element vectors, we always implement VEC_CONCAT. */
16841 op0 = XVECEXP (vals, 0, 0);
16842 op1 = XVECEXP (vals, 0, 1);
16843 use_vec_concat = true;
16844 break;
16846 case V4SFmode:
16847 half_mode = V2SFmode;
16848 goto half;
16849 case V4SImode:
16850 half_mode = V2SImode;
16851 goto half;
16852 half:
16854 rtvec v;
16856 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16857 Recurse to load the two halves. */
16859 op0 = gen_reg_rtx (half_mode);
16860 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16861 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16863 op1 = gen_reg_rtx (half_mode);
16864 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16865 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16867 use_vec_concat = true;
16869 break;
16871 case V8HImode:
16872 case V16QImode:
16873 case V4HImode:
16874 case V8QImode:
16875 break;
16877 default:
16878 gcc_unreachable ();
16881 if (use_vec_concat)
16883 if (!register_operand (op0, half_mode))
16884 op0 = force_reg (half_mode, op0);
16885 if (!register_operand (op1, half_mode))
16886 op1 = force_reg (half_mode, op1);
16888 emit_insn (gen_rtx_SET (VOIDmode, target,
16889 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16891 else
16893 int i, j, n_elts, n_words, n_elt_per_word;
16894 enum machine_mode inner_mode;
16895 rtx words[4], shift;
16897 inner_mode = GET_MODE_INNER (mode);
16898 n_elts = GET_MODE_NUNITS (mode);
16899 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16900 n_elt_per_word = n_elts / n_words;
16901 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16903 for (i = 0; i < n_words; ++i)
16905 rtx word = NULL_RTX;
16907 for (j = 0; j < n_elt_per_word; ++j)
16909 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16910 elt = convert_modes (word_mode, inner_mode, elt, true);
16912 if (j == 0)
16913 word = elt;
16914 else
16916 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16917 word, 1, OPTAB_LIB_WIDEN);
16918 word = expand_simple_binop (word_mode, IOR, word, elt,
16919 word, 1, OPTAB_LIB_WIDEN);
16923 words[i] = word;
16926 if (n_words == 1)
16927 emit_move_insn (target, gen_lowpart (mode, words[0]));
16928 else if (n_words == 2)
16930 rtx tmp = gen_reg_rtx (mode);
16931 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16932 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16933 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16934 emit_move_insn (target, tmp);
16936 else if (n_words == 4)
16938 rtx tmp = gen_reg_rtx (V4SImode);
16939 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16940 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16941 emit_move_insn (target, gen_lowpart (mode, tmp));
16943 else
16944 gcc_unreachable ();
16948 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16949 instructions unless MMX_OK is true. */
16951 void
16952 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16954 enum machine_mode mode = GET_MODE (target);
16955 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16956 int n_elts = GET_MODE_NUNITS (mode);
16957 int n_var = 0, one_var = -1;
16958 bool all_same = true, all_const_zero = true;
16959 int i;
16960 rtx x;
16962 for (i = 0; i < n_elts; ++i)
16964 x = XVECEXP (vals, 0, i);
16965 if (!CONSTANT_P (x))
16966 n_var++, one_var = i;
16967 else if (x != CONST0_RTX (inner_mode))
16968 all_const_zero = false;
16969 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16970 all_same = false;
16973 /* Constants are best loaded from the constant pool. */
16974 if (n_var == 0)
16976 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16977 return;
16980 /* If all values are identical, broadcast the value. */
16981 if (all_same
16982 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16983 XVECEXP (vals, 0, 0)))
16984 return;
16986 /* Values where only one field is non-constant are best loaded from
16987 the pool and overwritten via move later. */
16988 if (n_var == 1)
16990 if (all_const_zero && one_var == 0
16991 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16992 XVECEXP (vals, 0, 0)))
16993 return;
16995 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16996 return;
16999 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17002 void
17003 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
17005 enum machine_mode mode = GET_MODE (target);
17006 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17007 bool use_vec_merge = false;
17008 rtx tmp;
17010 switch (mode)
17012 case V2SFmode:
17013 case V2SImode:
17014 if (mmx_ok)
17016 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
17017 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
17018 if (elt == 0)
17019 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
17020 else
17021 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
17022 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17023 return;
17025 break;
17027 case V2DFmode:
17028 case V2DImode:
17030 rtx op0, op1;
17032 /* For the two element vectors, we implement a VEC_CONCAT with
17033 the extraction of the other element. */
17035 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17036 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17038 if (elt == 0)
17039 op0 = val, op1 = tmp;
17040 else
17041 op0 = tmp, op1 = val;
17043 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17044 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17046 return;
17048 case V4SFmode:
17049 switch (elt)
17051 case 0:
17052 use_vec_merge = true;
17053 break;
17055 case 1:
17056 /* tmp = target = A B C D */
17057 tmp = copy_to_reg (target);
17058 /* target = A A B B */
17059 emit_insn (gen_sse_unpcklps (target, target, target));
17060 /* target = X A B B */
17061 ix86_expand_vector_set (false, target, val, 0);
17062 /* target = A X C D */
17063 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17064 GEN_INT (1), GEN_INT (0),
17065 GEN_INT (2+4), GEN_INT (3+4)));
17066 return;
17068 case 2:
17069 /* tmp = target = A B C D */
17070 tmp = copy_to_reg (target);
17071 /* tmp = X B C D */
17072 ix86_expand_vector_set (false, tmp, val, 0);
17073 /* target = A B X D */
17074 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17075 GEN_INT (0), GEN_INT (1),
17076 GEN_INT (0+4), GEN_INT (3+4)));
17077 return;
17079 case 3:
17080 /* tmp = target = A B C D */
17081 tmp = copy_to_reg (target);
17082 /* tmp = X B C D */
17083 ix86_expand_vector_set (false, tmp, val, 0);
17084 /* target = A B X D */
17085 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17086 GEN_INT (0), GEN_INT (1),
17087 GEN_INT (2+4), GEN_INT (0+4)));
17088 return;
17090 default:
17091 gcc_unreachable ();
17093 break;
17095 case V4SImode:
17096 /* Element 0 handled by vec_merge below. */
17097 if (elt == 0)
17099 use_vec_merge = true;
17100 break;
17103 if (TARGET_SSE2)
17105 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17106 store into element 0, then shuffle them back. */
17108 rtx order[4];
17110 order[0] = GEN_INT (elt);
17111 order[1] = const1_rtx;
17112 order[2] = const2_rtx;
17113 order[3] = GEN_INT (3);
17114 order[elt] = const0_rtx;
17116 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17117 order[1], order[2], order[3]));
17119 ix86_expand_vector_set (false, target, val, 0);
17121 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17122 order[1], order[2], order[3]));
17124 else
17126 /* For SSE1, we have to reuse the V4SF code. */
17127 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17128 gen_lowpart (SFmode, val), elt);
17130 return;
17132 case V8HImode:
17133 use_vec_merge = TARGET_SSE2;
17134 break;
17135 case V4HImode:
17136 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17137 break;
17139 case V16QImode:
17140 case V8QImode:
17141 default:
17142 break;
17145 if (use_vec_merge)
17147 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17148 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17149 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17151 else
17153 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17155 emit_move_insn (mem, target);
17157 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17158 emit_move_insn (tmp, val);
17160 emit_move_insn (target, mem);
17164 void
17165 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17167 enum machine_mode mode = GET_MODE (vec);
17168 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17169 bool use_vec_extr = false;
17170 rtx tmp;
17172 switch (mode)
17174 case V2SImode:
17175 case V2SFmode:
17176 if (!mmx_ok)
17177 break;
17178 /* FALLTHRU */
17180 case V2DFmode:
17181 case V2DImode:
17182 use_vec_extr = true;
17183 break;
17185 case V4SFmode:
17186 switch (elt)
17188 case 0:
17189 tmp = vec;
17190 break;
17192 case 1:
17193 case 3:
17194 tmp = gen_reg_rtx (mode);
17195 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17196 GEN_INT (elt), GEN_INT (elt),
17197 GEN_INT (elt+4), GEN_INT (elt+4)));
17198 break;
17200 case 2:
17201 tmp = gen_reg_rtx (mode);
17202 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
17203 break;
17205 default:
17206 gcc_unreachable ();
17208 vec = tmp;
17209 use_vec_extr = true;
17210 elt = 0;
17211 break;
17213 case V4SImode:
17214 if (TARGET_SSE2)
17216 switch (elt)
17218 case 0:
17219 tmp = vec;
17220 break;
17222 case 1:
17223 case 3:
17224 tmp = gen_reg_rtx (mode);
17225 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
17226 GEN_INT (elt), GEN_INT (elt),
17227 GEN_INT (elt), GEN_INT (elt)));
17228 break;
17230 case 2:
17231 tmp = gen_reg_rtx (mode);
17232 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
17233 break;
17235 default:
17236 gcc_unreachable ();
17238 vec = tmp;
17239 use_vec_extr = true;
17240 elt = 0;
17242 else
17244 /* For SSE1, we have to reuse the V4SF code. */
17245 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
17246 gen_lowpart (V4SFmode, vec), elt);
17247 return;
17249 break;
17251 case V8HImode:
17252 use_vec_extr = TARGET_SSE2;
17253 break;
17254 case V4HImode:
17255 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17256 break;
17258 case V16QImode:
17259 case V8QImode:
17260 /* ??? Could extract the appropriate HImode element and shift. */
17261 default:
17262 break;
17265 if (use_vec_extr)
17267 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
17268 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
17270 /* Let the rtl optimizers know about the zero extension performed. */
17271 if (inner_mode == HImode)
17273 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
17274 target = gen_lowpart (SImode, target);
17277 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17279 else
17281 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17283 emit_move_insn (mem, vec);
17285 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17286 emit_move_insn (target, tmp);
17290 /* Implements target hook vector_mode_supported_p. */
17291 static bool
17292 ix86_vector_mode_supported_p (enum machine_mode mode)
17294 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17295 return true;
17296 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17297 return true;
17298 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17299 return true;
17300 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17301 return true;
17302 return false;
17305 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17307 We do this in the new i386 backend to maintain source compatibility
17308 with the old cc0-based compiler. */
17310 static tree
17311 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
17312 tree inputs ATTRIBUTE_UNUSED,
17313 tree clobbers)
17315 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
17316 clobbers);
17317 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
17318 clobbers);
17319 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
17320 clobbers);
17321 return clobbers;
17324 /* Worker function for REVERSE_CONDITION. */
17326 enum rtx_code
17327 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
17329 return (mode != CCFPmode && mode != CCFPUmode
17330 ? reverse_condition (code)
17331 : reverse_condition_maybe_unordered (code));
17334 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17335 to OPERANDS[0]. */
17337 const char *
17338 output_387_reg_move (rtx insn, rtx *operands)
17340 if (REG_P (operands[1])
17341 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17343 if (REGNO (operands[0]) == FIRST_STACK_REG
17344 && TARGET_USE_FFREEP)
17345 return "ffreep\t%y0";
17346 return "fstp\t%y0";
17348 if (STACK_TOP_P (operands[0]))
17349 return "fld%z1\t%y1";
17350 return "fst\t%y0";
17353 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17354 FP status register is set. */
17356 void
17357 ix86_emit_fp_unordered_jump (rtx label)
17359 rtx reg = gen_reg_rtx (HImode);
17360 rtx temp;
17362 emit_insn (gen_x86_fnstsw_1 (reg));
17364 if (TARGET_USE_SAHF)
17366 emit_insn (gen_x86_sahf_1 (reg));
17368 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17369 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17371 else
17373 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17375 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17376 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17379 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17380 gen_rtx_LABEL_REF (VOIDmode, label),
17381 pc_rtx);
17382 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17383 emit_jump_insn (temp);
17386 /* Output code to perform a log1p XFmode calculation. */
17388 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17390 rtx label1 = gen_label_rtx ();
17391 rtx label2 = gen_label_rtx ();
17393 rtx tmp = gen_reg_rtx (XFmode);
17394 rtx tmp2 = gen_reg_rtx (XFmode);
17396 emit_insn (gen_absxf2 (tmp, op1));
17397 emit_insn (gen_cmpxf (tmp,
17398 CONST_DOUBLE_FROM_REAL_VALUE (
17399 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17400 XFmode)));
17401 emit_jump_insn (gen_bge (label1));
17403 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17404 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17405 emit_jump (label2);
17407 emit_label (label1);
17408 emit_move_insn (tmp, CONST1_RTX (XFmode));
17409 emit_insn (gen_addxf3 (tmp, op1, tmp));
17410 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17411 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17413 emit_label (label2);
17416 /* Solaris named-section hook. Parameters are as for
17417 named_section_real. */
17419 static void
17420 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17421 tree decl)
17423 /* With Binutils 2.15, the "@unwind" marker must be specified on
17424 every occurrence of the ".eh_frame" section, not just the first
17425 one. */
17426 if (TARGET_64BIT
17427 && strcmp (name, ".eh_frame") == 0)
17429 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17430 flags & SECTION_WRITE ? "aw" : "a");
17431 return;
17433 default_elf_asm_named_section (name, flags, decl);
17436 #include "gt-i386.h"