2004-08-23 Eric Christopher <echristo@redhat.com>
[official-gcc.git] / gcc / config / i386 / i386.c
blob22a062d6fd7283121b9d7b40e9b8872f237e5a03
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
738 [arguments]
739 <- ARG_POINTER
740 saved pc
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
746 [padding1] \
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
752 [padding2] /
754 struct ix86_frame
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 char internal_label_prefix[16];
833 int internal_label_prefix_len;
835 static void output_pic_addr_const (FILE *, rtx, int);
836 static void put_condition_code (enum rtx_code, enum machine_mode,
837 int, int, FILE *);
838 static const char *get_some_local_dynamic_name (void);
839 static int get_some_local_dynamic_name_1 (rtx *, void *);
840 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
841 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
842 rtx *);
843 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
844 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
845 enum machine_mode);
846 static rtx get_thread_pointer (int);
847 static rtx legitimize_tls_address (rtx, enum tls_model, int);
848 static void get_pc_thunk_name (char [32], unsigned int);
849 static rtx gen_push (rtx);
850 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
851 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
852 static struct machine_function * ix86_init_machine_status (void);
853 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
854 static int ix86_nsaved_regs (void);
855 static void ix86_emit_save_regs (void);
856 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
857 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
858 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
859 static HOST_WIDE_INT ix86_GOT_alias_set (void);
860 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
861 static rtx ix86_expand_aligntest (rtx, int);
862 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
863 static int ix86_issue_rate (void);
864 static int ix86_adjust_cost (rtx, rtx, rtx, int);
865 static int ia32_multipass_dfa_lookahead (void);
866 static void ix86_init_mmx_sse_builtins (void);
867 static rtx x86_this_parameter (tree);
868 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
869 HOST_WIDE_INT, tree);
870 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
871 static void x86_file_start (void);
872 static void ix86_reorg (void);
873 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
874 static tree ix86_build_builtin_va_list (void);
875 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
876 tree, int *, int);
877 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
878 static bool ix86_vector_mode_supported_p (enum machine_mode);
880 static int ix86_address_cost (rtx);
881 static bool ix86_cannot_force_const_mem (rtx);
882 static rtx ix86_delegitimize_address (rtx);
884 struct builtin_description;
885 static rtx ix86_expand_sse_comi (const struct builtin_description *,
886 tree, rtx);
887 static rtx ix86_expand_sse_compare (const struct builtin_description *,
888 tree, rtx);
889 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
890 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
891 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
892 static rtx ix86_expand_store_builtin (enum insn_code, tree);
893 static rtx safe_vector_operand (rtx, enum machine_mode);
894 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
895 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
896 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
897 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
898 static int ix86_fp_comparison_cost (enum rtx_code code);
899 static unsigned int ix86_select_alt_pic_regnum (void);
900 static int ix86_save_reg (unsigned int, int);
901 static void ix86_compute_frame_layout (struct ix86_frame *);
902 static int ix86_comp_type_attributes (tree, tree);
903 static int ix86_function_regparm (tree, tree);
904 const struct attribute_spec ix86_attribute_table[];
905 static bool ix86_function_ok_for_sibcall (tree, tree);
906 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
907 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
908 static int ix86_value_regno (enum machine_mode);
909 static bool contains_128bit_aligned_vector_p (tree);
910 static rtx ix86_struct_value_rtx (tree, int);
911 static bool ix86_ms_bitfield_layout_p (tree);
912 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
913 static int extended_reg_mentioned_1 (rtx *, void *);
914 static bool ix86_rtx_costs (rtx, int, int, int *);
915 static int min_insn_size (rtx);
916 static tree ix86_md_asm_clobbers (tree clobbers);
917 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
918 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
919 tree, bool);
921 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
922 static void ix86_svr3_asm_out_constructor (rtx, int);
923 #endif
925 /* Register class used for passing given 64bit part of the argument.
926 These represent classes as documented by the PS ABI, with the exception
927 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
928 use SF or DFmode move instead of DImode to avoid reformatting penalties.
930 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
931 whenever possible (upper half does contain padding).
933 enum x86_64_reg_class
935 X86_64_NO_CLASS,
936 X86_64_INTEGER_CLASS,
937 X86_64_INTEGERSI_CLASS,
938 X86_64_SSE_CLASS,
939 X86_64_SSESF_CLASS,
940 X86_64_SSEDF_CLASS,
941 X86_64_SSEUP_CLASS,
942 X86_64_X87_CLASS,
943 X86_64_X87UP_CLASS,
944 X86_64_MEMORY_CLASS
946 static const char * const x86_64_reg_class_name[] =
947 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
949 #define MAX_CLASSES 4
950 static int classify_argument (enum machine_mode, tree,
951 enum x86_64_reg_class [MAX_CLASSES], int);
952 static int examine_argument (enum machine_mode, tree, int, int *, int *);
953 static rtx construct_container (enum machine_mode, tree, int, int, int,
954 const int *, int);
955 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
956 enum x86_64_reg_class);
958 /* Table of constants used by fldpi, fldln2, etc.... */
959 static REAL_VALUE_TYPE ext_80387_constants_table [5];
960 static bool ext_80387_constants_init = 0;
961 static void init_ext_80387_constants (void);
963 /* Initialize the GCC target structure. */
964 #undef TARGET_ATTRIBUTE_TABLE
965 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967 # undef TARGET_MERGE_DECL_ATTRIBUTES
968 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
969 #endif
971 #undef TARGET_COMP_TYPE_ATTRIBUTES
972 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
974 #undef TARGET_INIT_BUILTINS
975 #define TARGET_INIT_BUILTINS ix86_init_builtins
977 #undef TARGET_EXPAND_BUILTIN
978 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
980 #undef TARGET_ASM_FUNCTION_EPILOGUE
981 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
983 #undef TARGET_ASM_OPEN_PAREN
984 #define TARGET_ASM_OPEN_PAREN ""
985 #undef TARGET_ASM_CLOSE_PAREN
986 #define TARGET_ASM_CLOSE_PAREN ""
988 #undef TARGET_ASM_ALIGNED_HI_OP
989 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
990 #undef TARGET_ASM_ALIGNED_SI_OP
991 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
992 #ifdef ASM_QUAD
993 #undef TARGET_ASM_ALIGNED_DI_OP
994 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
995 #endif
997 #undef TARGET_ASM_UNALIGNED_HI_OP
998 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
999 #undef TARGET_ASM_UNALIGNED_SI_OP
1000 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1001 #undef TARGET_ASM_UNALIGNED_DI_OP
1002 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1004 #undef TARGET_SCHED_ADJUST_COST
1005 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1006 #undef TARGET_SCHED_ISSUE_RATE
1007 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1008 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1009 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1010 ia32_multipass_dfa_lookahead
1012 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1013 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1015 #ifdef HAVE_AS_TLS
1016 #undef TARGET_HAVE_TLS
1017 #define TARGET_HAVE_TLS true
1018 #endif
1019 #undef TARGET_CANNOT_FORCE_CONST_MEM
1020 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1022 #undef TARGET_DELEGITIMIZE_ADDRESS
1023 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1025 #undef TARGET_MS_BITFIELD_LAYOUT_P
1026 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1028 #undef TARGET_ASM_OUTPUT_MI_THUNK
1029 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1030 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1031 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1033 #undef TARGET_ASM_FILE_START
1034 #define TARGET_ASM_FILE_START x86_file_start
1036 #undef TARGET_RTX_COSTS
1037 #define TARGET_RTX_COSTS ix86_rtx_costs
1038 #undef TARGET_ADDRESS_COST
1039 #define TARGET_ADDRESS_COST ix86_address_cost
1041 #undef TARGET_FIXED_CONDITION_CODE_REGS
1042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043 #undef TARGET_CC_MODES_COMPATIBLE
1044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046 #undef TARGET_MACHINE_DEPENDENT_REORG
1047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049 #undef TARGET_BUILD_BUILTIN_VA_LIST
1050 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052 #undef TARGET_MD_ASM_CLOBBERS
1053 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055 #undef TARGET_PROMOTE_PROTOTYPES
1056 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1057 #undef TARGET_STRUCT_VALUE_RTX
1058 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1059 #undef TARGET_SETUP_INCOMING_VARARGS
1060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1061 #undef TARGET_MUST_PASS_IN_STACK
1062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1063 #undef TARGET_PASS_BY_REFERENCE
1064 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1066 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1070 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1072 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1073 #undef TARGET_INSERT_ATTRIBUTES
1074 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1075 #endif
1077 struct gcc_target targetm = TARGET_INITIALIZER;
1080 /* The svr4 ABI for the i386 says that records and unions are returned
1081 in memory. */
1082 #ifndef DEFAULT_PCC_STRUCT_RETURN
1083 #define DEFAULT_PCC_STRUCT_RETURN 1
1084 #endif
1086 /* Sometimes certain combinations of command options do not make
1087 sense on a particular target machine. You can define a macro
1088 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1089 defined, is executed once just after all the command options have
1090 been parsed.
1092 Don't use this macro to turn on various extra optimizations for
1093 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1095 void
1096 override_options (void)
1098 int i;
1099 int ix86_tune_defaulted = 0;
1101 /* Comes from final.c -- no real reason to change it. */
1102 #define MAX_CODE_ALIGN 16
1104 static struct ptt
1106 const struct processor_costs *cost; /* Processor costs */
1107 const int target_enable; /* Target flags to enable. */
1108 const int target_disable; /* Target flags to disable. */
1109 const int align_loop; /* Default alignments. */
1110 const int align_loop_max_skip;
1111 const int align_jump;
1112 const int align_jump_max_skip;
1113 const int align_func;
1115 const processor_target_table[PROCESSOR_max] =
1117 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1118 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1119 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1120 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1121 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1122 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1123 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1124 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1125 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1128 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1129 static struct pta
1131 const char *const name; /* processor name or nickname. */
1132 const enum processor_type processor;
1133 const enum pta_flags
1135 PTA_SSE = 1,
1136 PTA_SSE2 = 2,
1137 PTA_SSE3 = 4,
1138 PTA_MMX = 8,
1139 PTA_PREFETCH_SSE = 16,
1140 PTA_3DNOW = 32,
1141 PTA_3DNOW_A = 64,
1142 PTA_64BIT = 128
1143 } flags;
1145 const processor_alias_table[] =
1147 {"i386", PROCESSOR_I386, 0},
1148 {"i486", PROCESSOR_I486, 0},
1149 {"i586", PROCESSOR_PENTIUM, 0},
1150 {"pentium", PROCESSOR_PENTIUM, 0},
1151 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1152 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1153 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1155 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1156 {"i686", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1159 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1160 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1161 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1162 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1163 | PTA_MMX | PTA_PREFETCH_SSE},
1164 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1165 | PTA_MMX | PTA_PREFETCH_SSE},
1166 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1167 | PTA_MMX | PTA_PREFETCH_SSE},
1168 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1169 | PTA_MMX | PTA_PREFETCH_SSE},
1170 {"k6", PROCESSOR_K6, PTA_MMX},
1171 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1173 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1174 | PTA_3DNOW_A},
1175 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1176 | PTA_3DNOW | PTA_3DNOW_A},
1177 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1178 | PTA_3DNOW_A | PTA_SSE},
1179 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1180 | PTA_3DNOW_A | PTA_SSE},
1181 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1182 | PTA_3DNOW_A | PTA_SSE},
1183 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1184 | PTA_SSE | PTA_SSE2 },
1185 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1186 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1187 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1188 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1189 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1190 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1191 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1192 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1195 int const pta_size = ARRAY_SIZE (processor_alias_table);
1197 /* Set the default values for switches whose default depends on TARGET_64BIT
1198 in case they weren't overwritten by command line options. */
1199 if (TARGET_64BIT)
1201 if (flag_omit_frame_pointer == 2)
1202 flag_omit_frame_pointer = 1;
1203 if (flag_asynchronous_unwind_tables == 2)
1204 flag_asynchronous_unwind_tables = 1;
1205 if (flag_pcc_struct_return == 2)
1206 flag_pcc_struct_return = 0;
1208 else
1210 if (flag_omit_frame_pointer == 2)
1211 flag_omit_frame_pointer = 0;
1212 if (flag_asynchronous_unwind_tables == 2)
1213 flag_asynchronous_unwind_tables = 0;
1214 if (flag_pcc_struct_return == 2)
1215 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1218 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1219 SUBTARGET_OVERRIDE_OPTIONS;
1220 #endif
1222 if (!ix86_tune_string && ix86_arch_string)
1223 ix86_tune_string = ix86_arch_string;
1224 if (!ix86_tune_string)
1226 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1227 ix86_tune_defaulted = 1;
1229 if (!ix86_arch_string)
1230 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1232 if (ix86_cmodel_string != 0)
1234 if (!strcmp (ix86_cmodel_string, "small"))
1235 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1236 else if (flag_pic)
1237 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1238 else if (!strcmp (ix86_cmodel_string, "32"))
1239 ix86_cmodel = CM_32;
1240 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1241 ix86_cmodel = CM_KERNEL;
1242 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1243 ix86_cmodel = CM_MEDIUM;
1244 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1245 ix86_cmodel = CM_LARGE;
1246 else
1247 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1249 else
1251 ix86_cmodel = CM_32;
1252 if (TARGET_64BIT)
1253 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1255 if (ix86_asm_string != 0)
1257 if (!strcmp (ix86_asm_string, "intel"))
1258 ix86_asm_dialect = ASM_INTEL;
1259 else if (!strcmp (ix86_asm_string, "att"))
1260 ix86_asm_dialect = ASM_ATT;
1261 else
1262 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1264 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1265 error ("code model `%s' not supported in the %s bit mode",
1266 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1267 if (ix86_cmodel == CM_LARGE)
1268 sorry ("code model `large' not supported yet");
1269 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1270 sorry ("%i-bit mode not compiled in",
1271 (target_flags & MASK_64BIT) ? 64 : 32);
1273 for (i = 0; i < pta_size; i++)
1274 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1276 ix86_arch = processor_alias_table[i].processor;
1277 /* Default cpu tuning to the architecture. */
1278 ix86_tune = ix86_arch;
1279 if (processor_alias_table[i].flags & PTA_MMX
1280 && !(target_flags_explicit & MASK_MMX))
1281 target_flags |= MASK_MMX;
1282 if (processor_alias_table[i].flags & PTA_3DNOW
1283 && !(target_flags_explicit & MASK_3DNOW))
1284 target_flags |= MASK_3DNOW;
1285 if (processor_alias_table[i].flags & PTA_3DNOW_A
1286 && !(target_flags_explicit & MASK_3DNOW_A))
1287 target_flags |= MASK_3DNOW_A;
1288 if (processor_alias_table[i].flags & PTA_SSE
1289 && !(target_flags_explicit & MASK_SSE))
1290 target_flags |= MASK_SSE;
1291 if (processor_alias_table[i].flags & PTA_SSE2
1292 && !(target_flags_explicit & MASK_SSE2))
1293 target_flags |= MASK_SSE2;
1294 if (processor_alias_table[i].flags & PTA_SSE3
1295 && !(target_flags_explicit & MASK_SSE3))
1296 target_flags |= MASK_SSE3;
1297 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1298 x86_prefetch_sse = true;
1299 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1301 if (ix86_tune_defaulted)
1303 ix86_tune_string = "x86-64";
1304 for (i = 0; i < pta_size; i++)
1305 if (! strcmp (ix86_tune_string,
1306 processor_alias_table[i].name))
1307 break;
1308 ix86_tune = processor_alias_table[i].processor;
1310 else
1311 error ("CPU you selected does not support x86-64 "
1312 "instruction set");
1314 break;
1317 if (i == pta_size)
1318 error ("bad value (%s) for -march= switch", ix86_arch_string);
1320 for (i = 0; i < pta_size; i++)
1321 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1323 ix86_tune = processor_alias_table[i].processor;
1324 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1325 error ("CPU you selected does not support x86-64 instruction set");
1327 /* Intel CPUs have always interpreted SSE prefetch instructions as
1328 NOPs; so, we can enable SSE prefetch instructions even when
1329 -mtune (rather than -march) points us to a processor that has them.
1330 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1331 higher processors. */
1332 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1333 x86_prefetch_sse = true;
1334 break;
1336 if (i == pta_size)
1337 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1339 if (optimize_size)
1340 ix86_cost = &size_cost;
1341 else
1342 ix86_cost = processor_target_table[ix86_tune].cost;
1343 target_flags |= processor_target_table[ix86_tune].target_enable;
1344 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1346 /* Arrange to set up i386_stack_locals for all functions. */
1347 init_machine_status = ix86_init_machine_status;
1349 /* Validate -mregparm= value. */
1350 if (ix86_regparm_string)
1352 i = atoi (ix86_regparm_string);
1353 if (i < 0 || i > REGPARM_MAX)
1354 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1355 else
1356 ix86_regparm = i;
1358 else
1359 if (TARGET_64BIT)
1360 ix86_regparm = REGPARM_MAX;
1362 /* If the user has provided any of the -malign-* options,
1363 warn and use that value only if -falign-* is not set.
1364 Remove this code in GCC 3.2 or later. */
1365 if (ix86_align_loops_string)
1367 warning ("-malign-loops is obsolete, use -falign-loops");
1368 if (align_loops == 0)
1370 i = atoi (ix86_align_loops_string);
1371 if (i < 0 || i > MAX_CODE_ALIGN)
1372 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1373 else
1374 align_loops = 1 << i;
1378 if (ix86_align_jumps_string)
1380 warning ("-malign-jumps is obsolete, use -falign-jumps");
1381 if (align_jumps == 0)
1383 i = atoi (ix86_align_jumps_string);
1384 if (i < 0 || i > MAX_CODE_ALIGN)
1385 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1386 else
1387 align_jumps = 1 << i;
1391 if (ix86_align_funcs_string)
1393 warning ("-malign-functions is obsolete, use -falign-functions");
1394 if (align_functions == 0)
1396 i = atoi (ix86_align_funcs_string);
1397 if (i < 0 || i > MAX_CODE_ALIGN)
1398 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1399 else
1400 align_functions = 1 << i;
1404 /* Default align_* from the processor table. */
1405 if (align_loops == 0)
1407 align_loops = processor_target_table[ix86_tune].align_loop;
1408 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1410 if (align_jumps == 0)
1412 align_jumps = processor_target_table[ix86_tune].align_jump;
1413 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1415 if (align_functions == 0)
1417 align_functions = processor_target_table[ix86_tune].align_func;
1420 /* Validate -mpreferred-stack-boundary= value, or provide default.
1421 The default of 128 bits is for Pentium III's SSE __m128, but we
1422 don't want additional code to keep the stack aligned when
1423 optimizing for code size. */
1424 ix86_preferred_stack_boundary = (optimize_size
1425 ? TARGET_64BIT ? 128 : 32
1426 : 128);
1427 if (ix86_preferred_stack_boundary_string)
1429 i = atoi (ix86_preferred_stack_boundary_string);
1430 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1431 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1432 TARGET_64BIT ? 4 : 2);
1433 else
1434 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1437 /* Validate -mbranch-cost= value, or provide default. */
1438 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1439 if (ix86_branch_cost_string)
1441 i = atoi (ix86_branch_cost_string);
1442 if (i < 0 || i > 5)
1443 error ("-mbranch-cost=%d is not between 0 and 5", i);
1444 else
1445 ix86_branch_cost = i;
1448 if (ix86_tls_dialect_string)
1450 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1451 ix86_tls_dialect = TLS_DIALECT_GNU;
1452 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1453 ix86_tls_dialect = TLS_DIALECT_SUN;
1454 else
1455 error ("bad value (%s) for -mtls-dialect= switch",
1456 ix86_tls_dialect_string);
1459 /* Keep nonleaf frame pointers. */
1460 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1461 flag_omit_frame_pointer = 1;
1463 /* If we're doing fast math, we don't care about comparison order
1464 wrt NaNs. This lets us use a shorter comparison sequence. */
1465 if (flag_unsafe_math_optimizations)
1466 target_flags &= ~MASK_IEEE_FP;
1468 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1469 since the insns won't need emulation. */
1470 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1471 target_flags &= ~MASK_NO_FANCY_MATH_387;
1473 /* Turn on SSE2 builtins for -msse3. */
1474 if (TARGET_SSE3)
1475 target_flags |= MASK_SSE2;
1477 /* Turn on SSE builtins for -msse2. */
1478 if (TARGET_SSE2)
1479 target_flags |= MASK_SSE;
1481 if (TARGET_64BIT)
1483 if (TARGET_ALIGN_DOUBLE)
1484 error ("-malign-double makes no sense in the 64bit mode");
1485 if (TARGET_RTD)
1486 error ("-mrtd calling convention not supported in the 64bit mode");
1487 /* Enable by default the SSE and MMX builtins. */
1488 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1489 ix86_fpmath = FPMATH_SSE;
1491 else
1493 ix86_fpmath = FPMATH_387;
1494 /* i386 ABI does not specify red zone. It still makes sense to use it
1495 when programmer takes care to stack from being destroyed. */
1496 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1497 target_flags |= MASK_NO_RED_ZONE;
1500 if (ix86_fpmath_string != 0)
1502 if (! strcmp (ix86_fpmath_string, "387"))
1503 ix86_fpmath = FPMATH_387;
1504 else if (! strcmp (ix86_fpmath_string, "sse"))
1506 if (!TARGET_SSE)
1508 warning ("SSE instruction set disabled, using 387 arithmetics");
1509 ix86_fpmath = FPMATH_387;
1511 else
1512 ix86_fpmath = FPMATH_SSE;
1514 else if (! strcmp (ix86_fpmath_string, "387,sse")
1515 || ! strcmp (ix86_fpmath_string, "sse,387"))
1517 if (!TARGET_SSE)
1519 warning ("SSE instruction set disabled, using 387 arithmetics");
1520 ix86_fpmath = FPMATH_387;
1522 else if (!TARGET_80387)
1524 warning ("387 instruction set disabled, using SSE arithmetics");
1525 ix86_fpmath = FPMATH_SSE;
1527 else
1528 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1530 else
1531 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1534 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1535 on by -msse. */
1536 if (TARGET_SSE)
1538 target_flags |= MASK_MMX;
1539 x86_prefetch_sse = true;
1542 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1543 if (TARGET_3DNOW)
1545 target_flags |= MASK_MMX;
1546 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1547 extensions it adds. */
1548 if (x86_3dnow_a & (1 << ix86_arch))
1549 target_flags |= MASK_3DNOW_A;
1551 if ((x86_accumulate_outgoing_args & TUNEMASK)
1552 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1553 && !optimize_size)
1554 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1556 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1558 char *p;
1559 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1560 p = strchr (internal_label_prefix, 'X');
1561 internal_label_prefix_len = p - internal_label_prefix;
1562 *p = '\0';
1566 void
1567 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1569 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1570 make the problem with not enough registers even worse. */
1571 #ifdef INSN_SCHEDULING
1572 if (level > 1)
1573 flag_schedule_insns = 0;
1574 #endif
1576 /* The default values of these switches depend on the TARGET_64BIT
1577 that is not known at this moment. Mark these values with 2 and
1578 let user the to override these. In case there is no command line option
1579 specifying them, we will set the defaults in override_options. */
1580 if (optimize >= 1)
1581 flag_omit_frame_pointer = 2;
1582 flag_pcc_struct_return = 2;
1583 flag_asynchronous_unwind_tables = 2;
1586 /* Table of valid machine attributes. */
1587 const struct attribute_spec ix86_attribute_table[] =
1589 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1590 /* Stdcall attribute says callee is responsible for popping arguments
1591 if they are not variable. */
1592 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1593 /* Fastcall attribute says callee is responsible for popping arguments
1594 if they are not variable. */
1595 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1596 /* Cdecl attribute says the callee is a normal C declaration */
1597 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1598 /* Regparm attribute specifies how many integer arguments are to be
1599 passed in registers. */
1600 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1601 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1602 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1603 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1604 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1605 #endif
1606 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1607 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1608 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1609 SUBTARGET_ATTRIBUTE_TABLE,
1610 #endif
1611 { NULL, 0, 0, false, false, false, NULL }
1614 /* Decide whether we can make a sibling call to a function. DECL is the
1615 declaration of the function being targeted by the call and EXP is the
1616 CALL_EXPR representing the call. */
1618 static bool
1619 ix86_function_ok_for_sibcall (tree decl, tree exp)
1621 /* If we are generating position-independent code, we cannot sibcall
1622 optimize any indirect call, or a direct call to a global function,
1623 as the PLT requires %ebx be live. */
1624 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1625 return false;
1627 /* If we are returning floats on the 80387 register stack, we cannot
1628 make a sibcall from a function that doesn't return a float to a
1629 function that does or, conversely, from a function that does return
1630 a float to a function that doesn't; the necessary stack adjustment
1631 would not be executed. */
1632 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1633 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1634 return false;
1636 /* If this call is indirect, we'll need to be able to use a call-clobbered
1637 register for the address of the target function. Make sure that all
1638 such registers are not used for passing parameters. */
1639 if (!decl && !TARGET_64BIT)
1641 tree type;
1643 /* We're looking at the CALL_EXPR, we need the type of the function. */
1644 type = TREE_OPERAND (exp, 0); /* pointer expression */
1645 type = TREE_TYPE (type); /* pointer type */
1646 type = TREE_TYPE (type); /* function type */
1648 if (ix86_function_regparm (type, NULL) >= 3)
1650 /* ??? Need to count the actual number of registers to be used,
1651 not the possible number of registers. Fix later. */
1652 return false;
1656 /* Otherwise okay. That also includes certain types of indirect calls. */
1657 return true;
1660 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1661 arguments as in struct attribute_spec.handler. */
1662 static tree
1663 ix86_handle_cdecl_attribute (tree *node, tree name,
1664 tree args ATTRIBUTE_UNUSED,
1665 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1667 if (TREE_CODE (*node) != FUNCTION_TYPE
1668 && TREE_CODE (*node) != METHOD_TYPE
1669 && TREE_CODE (*node) != FIELD_DECL
1670 && TREE_CODE (*node) != TYPE_DECL)
1672 warning ("`%s' attribute only applies to functions",
1673 IDENTIFIER_POINTER (name));
1674 *no_add_attrs = true;
1676 else
1678 if (is_attribute_p ("fastcall", name))
1680 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1682 error ("fastcall and stdcall attributes are not compatible");
1684 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1686 error ("fastcall and regparm attributes are not compatible");
1689 else if (is_attribute_p ("stdcall", name))
1691 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1693 error ("fastcall and stdcall attributes are not compatible");
1698 if (TARGET_64BIT)
1700 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1701 *no_add_attrs = true;
1704 return NULL_TREE;
1707 /* Handle a "regparm" attribute;
1708 arguments as in struct attribute_spec.handler. */
1709 static tree
1710 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1711 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1713 if (TREE_CODE (*node) != FUNCTION_TYPE
1714 && TREE_CODE (*node) != METHOD_TYPE
1715 && TREE_CODE (*node) != FIELD_DECL
1716 && TREE_CODE (*node) != TYPE_DECL)
1718 warning ("`%s' attribute only applies to functions",
1719 IDENTIFIER_POINTER (name));
1720 *no_add_attrs = true;
1722 else
1724 tree cst;
1726 cst = TREE_VALUE (args);
1727 if (TREE_CODE (cst) != INTEGER_CST)
1729 warning ("`%s' attribute requires an integer constant argument",
1730 IDENTIFIER_POINTER (name));
1731 *no_add_attrs = true;
1733 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1735 warning ("argument to `%s' attribute larger than %d",
1736 IDENTIFIER_POINTER (name), REGPARM_MAX);
1737 *no_add_attrs = true;
1740 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1742 error ("fastcall and regparm attributes are not compatible");
1746 return NULL_TREE;
1749 /* Return 0 if the attributes for two types are incompatible, 1 if they
1750 are compatible, and 2 if they are nearly compatible (which causes a
1751 warning to be generated). */
1753 static int
1754 ix86_comp_type_attributes (tree type1, tree type2)
1756 /* Check for mismatch of non-default calling convention. */
1757 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1759 if (TREE_CODE (type1) != FUNCTION_TYPE)
1760 return 1;
1762 /* Check for mismatched fastcall types */
1763 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1764 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1765 return 0;
1767 /* Check for mismatched return types (cdecl vs stdcall). */
1768 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1769 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1770 return 0;
1771 if (ix86_function_regparm (type1, NULL)
1772 != ix86_function_regparm (type2, NULL))
1773 return 0;
1774 return 1;
1777 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1778 DECL may be NULL when calling function indirectly
1779 or considering a libcall. */
1781 static int
1782 ix86_function_regparm (tree type, tree decl)
1784 tree attr;
1785 int regparm = ix86_regparm;
1786 bool user_convention = false;
1788 if (!TARGET_64BIT)
1790 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1791 if (attr)
1793 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1794 user_convention = true;
1797 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1799 regparm = 2;
1800 user_convention = true;
1803 /* Use register calling convention for local functions when possible. */
1804 if (!TARGET_64BIT && !user_convention && decl
1805 && flag_unit_at_a_time && !profile_flag)
1807 struct cgraph_local_info *i = cgraph_local_info (decl);
1808 if (i && i->local)
1810 /* We can't use regparm(3) for nested functions as these use
1811 static chain pointer in third argument. */
1812 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1813 regparm = 2;
1814 else
1815 regparm = 3;
1819 return regparm;
1822 /* Return true if EAX is live at the start of the function. Used by
1823 ix86_expand_prologue to determine if we need special help before
1824 calling allocate_stack_worker. */
1826 static bool
1827 ix86_eax_live_at_start_p (void)
1829 /* Cheat. Don't bother working forward from ix86_function_regparm
1830 to the function type to whether an actual argument is located in
1831 eax. Instead just look at cfg info, which is still close enough
1832 to correct at this point. This gives false positives for broken
1833 functions that might use uninitialized data that happens to be
1834 allocated in eax, but who cares? */
1835 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1838 /* Value is the number of bytes of arguments automatically
1839 popped when returning from a subroutine call.
1840 FUNDECL is the declaration node of the function (as a tree),
1841 FUNTYPE is the data type of the function (as a tree),
1842 or for a library call it is an identifier node for the subroutine name.
1843 SIZE is the number of bytes of arguments passed on the stack.
1845 On the 80386, the RTD insn may be used to pop them if the number
1846 of args is fixed, but if the number is variable then the caller
1847 must pop them all. RTD can't be used for library calls now
1848 because the library is compiled with the Unix compiler.
1849 Use of RTD is a selectable option, since it is incompatible with
1850 standard Unix calling sequences. If the option is not selected,
1851 the caller must always pop the args.
1853 The attribute stdcall is equivalent to RTD on a per module basis. */
1856 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1858 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1860 /* Cdecl functions override -mrtd, and never pop the stack. */
1861 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1863 /* Stdcall and fastcall functions will pop the stack if not
1864 variable args. */
1865 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1866 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1867 rtd = 1;
1869 if (rtd
1870 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1871 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1872 == void_type_node)))
1873 return size;
1876 /* Lose any fake structure return argument if it is passed on the stack. */
1877 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1878 && !TARGET_64BIT)
1880 int nregs = ix86_function_regparm (funtype, fundecl);
1882 if (!nregs)
1883 return GET_MODE_SIZE (Pmode);
1886 return 0;
1889 /* Argument support functions. */
1891 /* Return true when register may be used to pass function parameters. */
1892 bool
1893 ix86_function_arg_regno_p (int regno)
1895 int i;
1896 if (!TARGET_64BIT)
1897 return (regno < REGPARM_MAX
1898 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1899 if (SSE_REGNO_P (regno) && TARGET_SSE)
1900 return true;
1901 /* RAX is used as hidden argument to va_arg functions. */
1902 if (!regno)
1903 return true;
1904 for (i = 0; i < REGPARM_MAX; i++)
1905 if (regno == x86_64_int_parameter_registers[i])
1906 return true;
1907 return false;
1910 /* Return if we do not know how to pass TYPE solely in registers. */
1912 static bool
1913 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1915 if (must_pass_in_stack_var_size_or_pad (mode, type))
1916 return true;
1917 return (!TARGET_64BIT && type && mode == TImode);
1920 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1921 for a call to a function whose data type is FNTYPE.
1922 For a library call, FNTYPE is 0. */
1924 void
1925 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1926 tree fntype, /* tree ptr for function decl */
1927 rtx libname, /* SYMBOL_REF of library name or 0 */
1928 tree fndecl)
1930 static CUMULATIVE_ARGS zero_cum;
1931 tree param, next_param;
1933 if (TARGET_DEBUG_ARG)
1935 fprintf (stderr, "\ninit_cumulative_args (");
1936 if (fntype)
1937 fprintf (stderr, "fntype code = %s, ret code = %s",
1938 tree_code_name[(int) TREE_CODE (fntype)],
1939 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1940 else
1941 fprintf (stderr, "no fntype");
1943 if (libname)
1944 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1947 *cum = zero_cum;
1949 /* Set up the number of registers to use for passing arguments. */
1950 if (fntype)
1951 cum->nregs = ix86_function_regparm (fntype, fndecl);
1952 else
1953 cum->nregs = ix86_regparm;
1954 if (TARGET_SSE)
1955 cum->sse_nregs = SSE_REGPARM_MAX;
1956 if (TARGET_MMX)
1957 cum->mmx_nregs = MMX_REGPARM_MAX;
1958 cum->warn_sse = true;
1959 cum->warn_mmx = true;
1960 cum->maybe_vaarg = false;
1962 /* Use ecx and edx registers if function has fastcall attribute */
1963 if (fntype && !TARGET_64BIT)
1965 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1967 cum->nregs = 2;
1968 cum->fastcall = 1;
1972 /* Determine if this function has variable arguments. This is
1973 indicated by the last argument being 'void_type_mode' if there
1974 are no variable arguments. If there are variable arguments, then
1975 we won't pass anything in registers in 32-bit mode. */
1977 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1979 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1980 param != 0; param = next_param)
1982 next_param = TREE_CHAIN (param);
1983 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1985 if (!TARGET_64BIT)
1987 cum->nregs = 0;
1988 cum->sse_nregs = 0;
1989 cum->mmx_nregs = 0;
1990 cum->warn_sse = 0;
1991 cum->warn_mmx = 0;
1992 cum->fastcall = 0;
1994 cum->maybe_vaarg = true;
1998 if ((!fntype && !libname)
1999 || (fntype && !TYPE_ARG_TYPES (fntype)))
2000 cum->maybe_vaarg = 1;
2002 if (TARGET_DEBUG_ARG)
2003 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2005 return;
2008 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2009 of this code is to classify each 8bytes of incoming argument by the register
2010 class and assign registers accordingly. */
2012 /* Return the union class of CLASS1 and CLASS2.
2013 See the x86-64 PS ABI for details. */
2015 static enum x86_64_reg_class
2016 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2018 /* Rule #1: If both classes are equal, this is the resulting class. */
2019 if (class1 == class2)
2020 return class1;
2022 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2023 the other class. */
2024 if (class1 == X86_64_NO_CLASS)
2025 return class2;
2026 if (class2 == X86_64_NO_CLASS)
2027 return class1;
2029 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2030 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2031 return X86_64_MEMORY_CLASS;
2033 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2034 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2035 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2036 return X86_64_INTEGERSI_CLASS;
2037 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2038 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2039 return X86_64_INTEGER_CLASS;
2041 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2042 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2043 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2044 return X86_64_MEMORY_CLASS;
2046 /* Rule #6: Otherwise class SSE is used. */
2047 return X86_64_SSE_CLASS;
2050 /* Classify the argument of type TYPE and mode MODE.
2051 CLASSES will be filled by the register class used to pass each word
2052 of the operand. The number of words is returned. In case the parameter
2053 should be passed in memory, 0 is returned. As a special case for zero
2054 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2056 BIT_OFFSET is used internally for handling records and specifies offset
2057 of the offset in bits modulo 256 to avoid overflow cases.
2059 See the x86-64 PS ABI for details.
2062 static int
2063 classify_argument (enum machine_mode mode, tree type,
2064 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2066 HOST_WIDE_INT bytes =
2067 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2068 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2070 /* Variable sized entities are always passed/returned in memory. */
2071 if (bytes < 0)
2072 return 0;
2074 if (mode != VOIDmode
2075 && targetm.calls.must_pass_in_stack (mode, type))
2076 return 0;
2078 if (type && AGGREGATE_TYPE_P (type))
2080 int i;
2081 tree field;
2082 enum x86_64_reg_class subclasses[MAX_CLASSES];
2084 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2085 if (bytes > 16)
2086 return 0;
2088 for (i = 0; i < words; i++)
2089 classes[i] = X86_64_NO_CLASS;
2091 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2092 signalize memory class, so handle it as special case. */
2093 if (!words)
2095 classes[0] = X86_64_NO_CLASS;
2096 return 1;
2099 /* Classify each field of record and merge classes. */
2100 if (TREE_CODE (type) == RECORD_TYPE)
2102 /* For classes first merge in the field of the subclasses. */
2103 if (TYPE_BINFO (type))
2105 tree binfo, base_binfo;
2106 int i;
2108 for (binfo = TYPE_BINFO (type), i = 0;
2109 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2111 int num;
2112 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2113 tree type = BINFO_TYPE (base_binfo);
2115 num = classify_argument (TYPE_MODE (type),
2116 type, subclasses,
2117 (offset + bit_offset) % 256);
2118 if (!num)
2119 return 0;
2120 for (i = 0; i < num; i++)
2122 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2123 classes[i + pos] =
2124 merge_classes (subclasses[i], classes[i + pos]);
2128 /* And now merge the fields of structure. */
2129 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2131 if (TREE_CODE (field) == FIELD_DECL)
2133 int num;
2135 /* Bitfields are always classified as integer. Handle them
2136 early, since later code would consider them to be
2137 misaligned integers. */
2138 if (DECL_BIT_FIELD (field))
2140 for (i = int_bit_position (field) / 8 / 8;
2141 i < (int_bit_position (field)
2142 + tree_low_cst (DECL_SIZE (field), 0)
2143 + 63) / 8 / 8; i++)
2144 classes[i] =
2145 merge_classes (X86_64_INTEGER_CLASS,
2146 classes[i]);
2148 else
2150 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2151 TREE_TYPE (field), subclasses,
2152 (int_bit_position (field)
2153 + bit_offset) % 256);
2154 if (!num)
2155 return 0;
2156 for (i = 0; i < num; i++)
2158 int pos =
2159 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2160 classes[i + pos] =
2161 merge_classes (subclasses[i], classes[i + pos]);
2167 /* Arrays are handled as small records. */
2168 else if (TREE_CODE (type) == ARRAY_TYPE)
2170 int num;
2171 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2172 TREE_TYPE (type), subclasses, bit_offset);
2173 if (!num)
2174 return 0;
2176 /* The partial classes are now full classes. */
2177 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2178 subclasses[0] = X86_64_SSE_CLASS;
2179 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2180 subclasses[0] = X86_64_INTEGER_CLASS;
2182 for (i = 0; i < words; i++)
2183 classes[i] = subclasses[i % num];
2185 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2186 else if (TREE_CODE (type) == UNION_TYPE
2187 || TREE_CODE (type) == QUAL_UNION_TYPE)
2189 /* For classes first merge in the field of the subclasses. */
2190 if (TYPE_BINFO (type))
2192 tree binfo, base_binfo;
2193 int i;
2195 for (binfo = TYPE_BINFO (type), i = 0;
2196 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2198 int num;
2199 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2200 tree type = BINFO_TYPE (base_binfo);
2202 num = classify_argument (TYPE_MODE (type),
2203 type, subclasses,
2204 (offset + (bit_offset % 64)) % 256);
2205 if (!num)
2206 return 0;
2207 for (i = 0; i < num; i++)
2209 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2210 classes[i + pos] =
2211 merge_classes (subclasses[i], classes[i + pos]);
2215 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2217 if (TREE_CODE (field) == FIELD_DECL)
2219 int num;
2220 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2221 TREE_TYPE (field), subclasses,
2222 bit_offset);
2223 if (!num)
2224 return 0;
2225 for (i = 0; i < num; i++)
2226 classes[i] = merge_classes (subclasses[i], classes[i]);
2230 else if (TREE_CODE (type) == SET_TYPE)
2232 if (bytes <= 4)
2234 classes[0] = X86_64_INTEGERSI_CLASS;
2235 return 1;
2237 else if (bytes <= 8)
2239 classes[0] = X86_64_INTEGER_CLASS;
2240 return 1;
2242 else if (bytes <= 12)
2244 classes[0] = X86_64_INTEGER_CLASS;
2245 classes[1] = X86_64_INTEGERSI_CLASS;
2246 return 2;
2248 else
2250 classes[0] = X86_64_INTEGER_CLASS;
2251 classes[1] = X86_64_INTEGER_CLASS;
2252 return 2;
2255 else
2256 abort ();
2258 /* Final merger cleanup. */
2259 for (i = 0; i < words; i++)
2261 /* If one class is MEMORY, everything should be passed in
2262 memory. */
2263 if (classes[i] == X86_64_MEMORY_CLASS)
2264 return 0;
2266 /* The X86_64_SSEUP_CLASS should be always preceded by
2267 X86_64_SSE_CLASS. */
2268 if (classes[i] == X86_64_SSEUP_CLASS
2269 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2270 classes[i] = X86_64_SSE_CLASS;
2272 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2273 if (classes[i] == X86_64_X87UP_CLASS
2274 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2275 classes[i] = X86_64_SSE_CLASS;
2277 return words;
2280 /* Compute alignment needed. We align all types to natural boundaries with
2281 exception of XFmode that is aligned to 64bits. */
2282 if (mode != VOIDmode && mode != BLKmode)
2284 int mode_alignment = GET_MODE_BITSIZE (mode);
2286 if (mode == XFmode)
2287 mode_alignment = 128;
2288 else if (mode == XCmode)
2289 mode_alignment = 256;
2290 if (COMPLEX_MODE_P (mode))
2291 mode_alignment /= 2;
2292 /* Misaligned fields are always returned in memory. */
2293 if (bit_offset % mode_alignment)
2294 return 0;
2297 /* for V1xx modes, just use the base mode */
2298 if (VECTOR_MODE_P (mode)
2299 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2300 mode = GET_MODE_INNER (mode);
2302 /* Classification of atomic types. */
2303 switch (mode)
2305 case DImode:
2306 case SImode:
2307 case HImode:
2308 case QImode:
2309 case CSImode:
2310 case CHImode:
2311 case CQImode:
2312 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2313 classes[0] = X86_64_INTEGERSI_CLASS;
2314 else
2315 classes[0] = X86_64_INTEGER_CLASS;
2316 return 1;
2317 case CDImode:
2318 case TImode:
2319 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2320 return 2;
2321 case CTImode:
2322 return 0;
2323 case SFmode:
2324 if (!(bit_offset % 64))
2325 classes[0] = X86_64_SSESF_CLASS;
2326 else
2327 classes[0] = X86_64_SSE_CLASS;
2328 return 1;
2329 case DFmode:
2330 classes[0] = X86_64_SSEDF_CLASS;
2331 return 1;
2332 case XFmode:
2333 classes[0] = X86_64_X87_CLASS;
2334 classes[1] = X86_64_X87UP_CLASS;
2335 return 2;
2336 case TFmode:
2337 classes[0] = X86_64_SSE_CLASS;
2338 classes[1] = X86_64_SSEUP_CLASS;
2339 return 2;
2340 case SCmode:
2341 classes[0] = X86_64_SSE_CLASS;
2342 return 1;
2343 case DCmode:
2344 classes[0] = X86_64_SSEDF_CLASS;
2345 classes[1] = X86_64_SSEDF_CLASS;
2346 return 2;
2347 case XCmode:
2348 case TCmode:
2349 /* These modes are larger than 16 bytes. */
2350 return 0;
2351 case V4SFmode:
2352 case V4SImode:
2353 case V16QImode:
2354 case V8HImode:
2355 case V2DFmode:
2356 case V2DImode:
2357 classes[0] = X86_64_SSE_CLASS;
2358 classes[1] = X86_64_SSEUP_CLASS;
2359 return 2;
2360 case V2SFmode:
2361 case V2SImode:
2362 case V4HImode:
2363 case V8QImode:
2364 classes[0] = X86_64_SSE_CLASS;
2365 return 1;
2366 case BLKmode:
2367 case VOIDmode:
2368 return 0;
2369 default:
2370 if (VECTOR_MODE_P (mode))
2372 if (bytes > 16)
2373 return 0;
2374 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2376 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2377 classes[0] = X86_64_INTEGERSI_CLASS;
2378 else
2379 classes[0] = X86_64_INTEGER_CLASS;
2380 classes[1] = X86_64_INTEGER_CLASS;
2381 return 1 + (bytes > 8);
2384 abort ();
2388 /* Examine the argument and return set number of register required in each
2389 class. Return 0 iff parameter should be passed in memory. */
2390 static int
2391 examine_argument (enum machine_mode mode, tree type, int in_return,
2392 int *int_nregs, int *sse_nregs)
2394 enum x86_64_reg_class class[MAX_CLASSES];
2395 int n = classify_argument (mode, type, class, 0);
2397 *int_nregs = 0;
2398 *sse_nregs = 0;
2399 if (!n)
2400 return 0;
2401 for (n--; n >= 0; n--)
2402 switch (class[n])
2404 case X86_64_INTEGER_CLASS:
2405 case X86_64_INTEGERSI_CLASS:
2406 (*int_nregs)++;
2407 break;
2408 case X86_64_SSE_CLASS:
2409 case X86_64_SSESF_CLASS:
2410 case X86_64_SSEDF_CLASS:
2411 (*sse_nregs)++;
2412 break;
2413 case X86_64_NO_CLASS:
2414 case X86_64_SSEUP_CLASS:
2415 break;
2416 case X86_64_X87_CLASS:
2417 case X86_64_X87UP_CLASS:
2418 if (!in_return)
2419 return 0;
2420 break;
2421 case X86_64_MEMORY_CLASS:
2422 abort ();
2424 return 1;
2426 /* Construct container for the argument used by GCC interface. See
2427 FUNCTION_ARG for the detailed description. */
2428 static rtx
2429 construct_container (enum machine_mode mode, tree type, int in_return,
2430 int nintregs, int nsseregs, const int * intreg,
2431 int sse_regno)
2433 enum machine_mode tmpmode;
2434 int bytes =
2435 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2436 enum x86_64_reg_class class[MAX_CLASSES];
2437 int n;
2438 int i;
2439 int nexps = 0;
2440 int needed_sseregs, needed_intregs;
2441 rtx exp[MAX_CLASSES];
2442 rtx ret;
2444 n = classify_argument (mode, type, class, 0);
2445 if (TARGET_DEBUG_ARG)
2447 if (!n)
2448 fprintf (stderr, "Memory class\n");
2449 else
2451 fprintf (stderr, "Classes:");
2452 for (i = 0; i < n; i++)
2454 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2456 fprintf (stderr, "\n");
2459 if (!n)
2460 return NULL;
2461 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2462 return NULL;
2463 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2464 return NULL;
2466 /* First construct simple cases. Avoid SCmode, since we want to use
2467 single register to pass this type. */
2468 if (n == 1 && mode != SCmode)
2469 switch (class[0])
2471 case X86_64_INTEGER_CLASS:
2472 case X86_64_INTEGERSI_CLASS:
2473 return gen_rtx_REG (mode, intreg[0]);
2474 case X86_64_SSE_CLASS:
2475 case X86_64_SSESF_CLASS:
2476 case X86_64_SSEDF_CLASS:
2477 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2478 case X86_64_X87_CLASS:
2479 return gen_rtx_REG (mode, FIRST_STACK_REG);
2480 case X86_64_NO_CLASS:
2481 /* Zero sized array, struct or class. */
2482 return NULL;
2483 default:
2484 abort ();
2486 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2487 && mode != BLKmode)
2488 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2489 if (n == 2
2490 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2491 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2492 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2493 && class[1] == X86_64_INTEGER_CLASS
2494 && (mode == CDImode || mode == TImode || mode == TFmode)
2495 && intreg[0] + 1 == intreg[1])
2496 return gen_rtx_REG (mode, intreg[0]);
2497 if (n == 4
2498 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2499 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2500 && mode != BLKmode)
2501 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2503 /* Otherwise figure out the entries of the PARALLEL. */
2504 for (i = 0; i < n; i++)
2506 switch (class[i])
2508 case X86_64_NO_CLASS:
2509 break;
2510 case X86_64_INTEGER_CLASS:
2511 case X86_64_INTEGERSI_CLASS:
2512 /* Merge TImodes on aligned occasions here too. */
2513 if (i * 8 + 8 > bytes)
2514 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2515 else if (class[i] == X86_64_INTEGERSI_CLASS)
2516 tmpmode = SImode;
2517 else
2518 tmpmode = DImode;
2519 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2520 if (tmpmode == BLKmode)
2521 tmpmode = DImode;
2522 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2523 gen_rtx_REG (tmpmode, *intreg),
2524 GEN_INT (i*8));
2525 intreg++;
2526 break;
2527 case X86_64_SSESF_CLASS:
2528 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2529 gen_rtx_REG (SFmode,
2530 SSE_REGNO (sse_regno)),
2531 GEN_INT (i*8));
2532 sse_regno++;
2533 break;
2534 case X86_64_SSEDF_CLASS:
2535 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2536 gen_rtx_REG (DFmode,
2537 SSE_REGNO (sse_regno)),
2538 GEN_INT (i*8));
2539 sse_regno++;
2540 break;
2541 case X86_64_SSE_CLASS:
2542 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2543 tmpmode = TImode;
2544 else
2545 tmpmode = DImode;
2546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2547 gen_rtx_REG (tmpmode,
2548 SSE_REGNO (sse_regno)),
2549 GEN_INT (i*8));
2550 if (tmpmode == TImode)
2551 i++;
2552 sse_regno++;
2553 break;
2554 default:
2555 abort ();
2558 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2559 for (i = 0; i < nexps; i++)
2560 XVECEXP (ret, 0, i) = exp [i];
2561 return ret;
2564 /* Update the data in CUM to advance over an argument
2565 of mode MODE and data type TYPE.
2566 (TYPE is null for libcalls where that information may not be available.) */
2568 void
2569 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2570 enum machine_mode mode, /* current arg mode */
2571 tree type, /* type of the argument or 0 if lib support */
2572 int named) /* whether or not the argument was named */
2574 int bytes =
2575 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2576 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2578 if (TARGET_DEBUG_ARG)
2579 fprintf (stderr,
2580 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2581 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2582 if (TARGET_64BIT)
2584 int int_nregs, sse_nregs;
2585 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2586 cum->words += words;
2587 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2589 cum->nregs -= int_nregs;
2590 cum->sse_nregs -= sse_nregs;
2591 cum->regno += int_nregs;
2592 cum->sse_regno += sse_nregs;
2594 else
2595 cum->words += words;
2597 else
2599 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2600 && (!type || !AGGREGATE_TYPE_P (type)))
2602 cum->sse_words += words;
2603 cum->sse_nregs -= 1;
2604 cum->sse_regno += 1;
2605 if (cum->sse_nregs <= 0)
2607 cum->sse_nregs = 0;
2608 cum->sse_regno = 0;
2611 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2612 && (!type || !AGGREGATE_TYPE_P (type)))
2614 cum->mmx_words += words;
2615 cum->mmx_nregs -= 1;
2616 cum->mmx_regno += 1;
2617 if (cum->mmx_nregs <= 0)
2619 cum->mmx_nregs = 0;
2620 cum->mmx_regno = 0;
2623 else
2625 cum->words += words;
2626 cum->nregs -= words;
2627 cum->regno += words;
2629 if (cum->nregs <= 0)
2631 cum->nregs = 0;
2632 cum->regno = 0;
2636 return;
2639 /* Define where to put the arguments to a function.
2640 Value is zero to push the argument on the stack,
2641 or a hard register in which to store the argument.
2643 MODE is the argument's machine mode.
2644 TYPE is the data type of the argument (as a tree).
2645 This is null for libcalls where that information may
2646 not be available.
2647 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2648 the preceding args and about the function being called.
2649 NAMED is nonzero if this argument is a named parameter
2650 (otherwise it is an extra parameter matching an ellipsis). */
2653 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2654 enum machine_mode mode, /* current arg mode */
2655 tree type, /* type of the argument or 0 if lib support */
2656 int named) /* != 0 for normal args, == 0 for ... args */
2658 rtx ret = NULL_RTX;
2659 int bytes =
2660 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2661 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2662 static bool warnedsse, warnedmmx;
2664 /* To simplify the code below, represent vector types with a vector mode
2665 even if MMX/SSE are not active. */
2666 if (type
2667 && TREE_CODE (type) == VECTOR_TYPE
2668 && (bytes == 8 || bytes == 16)
2669 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2670 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2672 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2673 enum machine_mode newmode
2674 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2675 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2677 /* Get the mode which has this inner mode and number of units. */
2678 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2679 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2680 && GET_MODE_INNER (newmode) == innermode)
2682 mode = newmode;
2683 break;
2687 /* Handle a hidden AL argument containing number of registers for varargs
2688 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2689 any AL settings. */
2690 if (mode == VOIDmode)
2692 if (TARGET_64BIT)
2693 return GEN_INT (cum->maybe_vaarg
2694 ? (cum->sse_nregs < 0
2695 ? SSE_REGPARM_MAX
2696 : cum->sse_regno)
2697 : -1);
2698 else
2699 return constm1_rtx;
2701 if (TARGET_64BIT)
2702 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2703 &x86_64_int_parameter_registers [cum->regno],
2704 cum->sse_regno);
2705 else
2706 switch (mode)
2708 /* For now, pass fp/complex values on the stack. */
2709 default:
2710 break;
2712 case BLKmode:
2713 if (bytes < 0)
2714 break;
2715 /* FALLTHRU */
2716 case DImode:
2717 case SImode:
2718 case HImode:
2719 case QImode:
2720 if (words <= cum->nregs)
2722 int regno = cum->regno;
2724 /* Fastcall allocates the first two DWORD (SImode) or
2725 smaller arguments to ECX and EDX. */
2726 if (cum->fastcall)
2728 if (mode == BLKmode || mode == DImode)
2729 break;
2731 /* ECX not EAX is the first allocated register. */
2732 if (regno == 0)
2733 regno = 2;
2735 ret = gen_rtx_REG (mode, regno);
2737 break;
2738 case TImode:
2739 case V16QImode:
2740 case V8HImode:
2741 case V4SImode:
2742 case V2DImode:
2743 case V4SFmode:
2744 case V2DFmode:
2745 if (!type || !AGGREGATE_TYPE_P (type))
2747 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2749 warnedsse = true;
2750 warning ("SSE vector argument without SSE enabled "
2751 "changes the ABI");
2753 if (cum->sse_nregs)
2754 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2756 break;
2757 case V8QImode:
2758 case V4HImode:
2759 case V2SImode:
2760 case V2SFmode:
2761 if (!type || !AGGREGATE_TYPE_P (type))
2763 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2765 warnedmmx = true;
2766 warning ("MMX vector argument without MMX enabled "
2767 "changes the ABI");
2769 if (cum->mmx_nregs)
2770 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2772 break;
2775 if (TARGET_DEBUG_ARG)
2777 fprintf (stderr,
2778 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2779 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2781 if (ret)
2782 print_simple_rtl (stderr, ret);
2783 else
2784 fprintf (stderr, ", stack");
2786 fprintf (stderr, " )\n");
2789 return ret;
2792 /* A C expression that indicates when an argument must be passed by
2793 reference. If nonzero for an argument, a copy of that argument is
2794 made in memory and a pointer to the argument is passed instead of
2795 the argument itself. The pointer is passed in whatever way is
2796 appropriate for passing a pointer to that type. */
2798 static bool
2799 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2800 enum machine_mode mode ATTRIBUTE_UNUSED,
2801 tree type, bool named ATTRIBUTE_UNUSED)
2803 if (!TARGET_64BIT)
2804 return 0;
2806 if (type && int_size_in_bytes (type) == -1)
2808 if (TARGET_DEBUG_ARG)
2809 fprintf (stderr, "function_arg_pass_by_reference\n");
2810 return 1;
2813 return 0;
2816 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2817 ABI. Only called if TARGET_SSE. */
2818 static bool
2819 contains_128bit_aligned_vector_p (tree type)
2821 enum machine_mode mode = TYPE_MODE (type);
2822 if (SSE_REG_MODE_P (mode)
2823 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2824 return true;
2825 if (TYPE_ALIGN (type) < 128)
2826 return false;
2828 if (AGGREGATE_TYPE_P (type))
2830 /* Walk the aggregates recursively. */
2831 if (TREE_CODE (type) == RECORD_TYPE
2832 || TREE_CODE (type) == UNION_TYPE
2833 || TREE_CODE (type) == QUAL_UNION_TYPE)
2835 tree field;
2837 if (TYPE_BINFO (type))
2839 tree binfo, base_binfo;
2840 int i;
2842 for (binfo = TYPE_BINFO (type), i = 0;
2843 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2844 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2845 return true;
2847 /* And now merge the fields of structure. */
2848 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2850 if (TREE_CODE (field) == FIELD_DECL
2851 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2852 return true;
2855 /* Just for use if some languages passes arrays by value. */
2856 else if (TREE_CODE (type) == ARRAY_TYPE)
2858 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2859 return true;
2861 else
2862 abort ();
2864 return false;
2867 /* Gives the alignment boundary, in bits, of an argument with the
2868 specified mode and type. */
2871 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2873 int align;
2874 if (type)
2875 align = TYPE_ALIGN (type);
2876 else
2877 align = GET_MODE_ALIGNMENT (mode);
2878 if (align < PARM_BOUNDARY)
2879 align = PARM_BOUNDARY;
2880 if (!TARGET_64BIT)
2882 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2883 make an exception for SSE modes since these require 128bit
2884 alignment.
2886 The handling here differs from field_alignment. ICC aligns MMX
2887 arguments to 4 byte boundaries, while structure fields are aligned
2888 to 8 byte boundaries. */
2889 if (!TARGET_SSE)
2890 align = PARM_BOUNDARY;
2891 else if (!type)
2893 if (!SSE_REG_MODE_P (mode))
2894 align = PARM_BOUNDARY;
2896 else
2898 if (!contains_128bit_aligned_vector_p (type))
2899 align = PARM_BOUNDARY;
2902 if (align > 128)
2903 align = 128;
2904 return align;
2907 /* Return true if N is a possible register number of function value. */
2908 bool
2909 ix86_function_value_regno_p (int regno)
2911 if (!TARGET_64BIT)
2913 return ((regno) == 0
2914 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2915 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2917 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2918 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2919 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2922 /* Define how to find the value returned by a function.
2923 VALTYPE is the data type of the value (as a tree).
2924 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2925 otherwise, FUNC is 0. */
2927 ix86_function_value (tree valtype)
2929 if (TARGET_64BIT)
2931 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2932 REGPARM_MAX, SSE_REGPARM_MAX,
2933 x86_64_int_return_registers, 0);
2934 /* For zero sized structures, construct_container return NULL, but we need
2935 to keep rest of compiler happy by returning meaningful value. */
2936 if (!ret)
2937 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2938 return ret;
2940 else
2941 return gen_rtx_REG (TYPE_MODE (valtype),
2942 ix86_value_regno (TYPE_MODE (valtype)));
2945 /* Return false iff type is returned in memory. */
2947 ix86_return_in_memory (tree type)
2949 int needed_intregs, needed_sseregs, size;
2950 enum machine_mode mode = TYPE_MODE (type);
2952 if (TARGET_64BIT)
2953 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2955 if (mode == BLKmode)
2956 return 1;
2958 size = int_size_in_bytes (type);
2960 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2961 return 0;
2963 if (VECTOR_MODE_P (mode) || mode == TImode)
2965 /* User-created vectors small enough to fit in EAX. */
2966 if (size < 8)
2967 return 0;
2969 /* MMX/3dNow values are returned on the stack, since we've
2970 got to EMMS/FEMMS before returning. */
2971 if (size == 8)
2972 return 1;
2974 /* SSE values are returned in XMM0, except when it doesn't exist. */
2975 if (size == 16)
2976 return (TARGET_SSE ? 0 : 1);
2979 if (mode == XFmode)
2980 return 0;
2982 if (size > 12)
2983 return 1;
2984 return 0;
2987 /* When returning SSE vector types, we have a choice of either
2988 (1) being abi incompatible with a -march switch, or
2989 (2) generating an error.
2990 Given no good solution, I think the safest thing is one warning.
2991 The user won't be able to use -Werror, but....
2993 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2994 called in response to actually generating a caller or callee that
2995 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2996 via aggregate_value_p for general type probing from tree-ssa. */
2998 static rtx
2999 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3001 static bool warned;
3003 if (!TARGET_SSE && type && !warned)
3005 /* Look at the return type of the function, not the function type. */
3006 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3008 if (mode == TImode
3009 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3011 warned = true;
3012 warning ("SSE vector return without SSE enabled changes the ABI");
3016 return NULL;
3019 /* Define how to find the value returned by a library function
3020 assuming the value has mode MODE. */
3022 ix86_libcall_value (enum machine_mode mode)
3024 if (TARGET_64BIT)
3026 switch (mode)
3028 case SFmode:
3029 case SCmode:
3030 case DFmode:
3031 case DCmode:
3032 case TFmode:
3033 return gen_rtx_REG (mode, FIRST_SSE_REG);
3034 case XFmode:
3035 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3036 case XCmode:
3037 case TCmode:
3038 return NULL;
3039 default:
3040 return gen_rtx_REG (mode, 0);
3043 else
3044 return gen_rtx_REG (mode, ix86_value_regno (mode));
3047 /* Given a mode, return the register to use for a return value. */
3049 static int
3050 ix86_value_regno (enum machine_mode mode)
3052 /* Floating point return values in %st(0). */
3053 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3054 return FIRST_FLOAT_REG;
3055 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3056 we prevent this case when sse is not available. */
3057 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3058 return FIRST_SSE_REG;
3059 /* Everything else in %eax. */
3060 return 0;
3063 /* Create the va_list data type. */
3065 static tree
3066 ix86_build_builtin_va_list (void)
3068 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3070 /* For i386 we use plain pointer to argument area. */
3071 if (!TARGET_64BIT)
3072 return build_pointer_type (char_type_node);
3074 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3075 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3077 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3078 unsigned_type_node);
3079 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3080 unsigned_type_node);
3081 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3082 ptr_type_node);
3083 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3084 ptr_type_node);
3086 DECL_FIELD_CONTEXT (f_gpr) = record;
3087 DECL_FIELD_CONTEXT (f_fpr) = record;
3088 DECL_FIELD_CONTEXT (f_ovf) = record;
3089 DECL_FIELD_CONTEXT (f_sav) = record;
3091 TREE_CHAIN (record) = type_decl;
3092 TYPE_NAME (record) = type_decl;
3093 TYPE_FIELDS (record) = f_gpr;
3094 TREE_CHAIN (f_gpr) = f_fpr;
3095 TREE_CHAIN (f_fpr) = f_ovf;
3096 TREE_CHAIN (f_ovf) = f_sav;
3098 layout_type (record);
3100 /* The correct type is an array type of one element. */
3101 return build_array_type (record, build_index_type (size_zero_node));
3104 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3106 static void
3107 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3108 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3109 int no_rtl)
3111 CUMULATIVE_ARGS next_cum;
3112 rtx save_area = NULL_RTX, mem;
3113 rtx label;
3114 rtx label_ref;
3115 rtx tmp_reg;
3116 rtx nsse_reg;
3117 int set;
3118 tree fntype;
3119 int stdarg_p;
3120 int i;
3122 if (!TARGET_64BIT)
3123 return;
3125 /* Indicate to allocate space on the stack for varargs save area. */
3126 ix86_save_varrargs_registers = 1;
3128 cfun->stack_alignment_needed = 128;
3130 fntype = TREE_TYPE (current_function_decl);
3131 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3132 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3133 != void_type_node));
3135 /* For varargs, we do not want to skip the dummy va_dcl argument.
3136 For stdargs, we do want to skip the last named argument. */
3137 next_cum = *cum;
3138 if (stdarg_p)
3139 function_arg_advance (&next_cum, mode, type, 1);
3141 if (!no_rtl)
3142 save_area = frame_pointer_rtx;
3144 set = get_varargs_alias_set ();
3146 for (i = next_cum.regno; i < ix86_regparm; i++)
3148 mem = gen_rtx_MEM (Pmode,
3149 plus_constant (save_area, i * UNITS_PER_WORD));
3150 set_mem_alias_set (mem, set);
3151 emit_move_insn (mem, gen_rtx_REG (Pmode,
3152 x86_64_int_parameter_registers[i]));
3155 if (next_cum.sse_nregs)
3157 /* Now emit code to save SSE registers. The AX parameter contains number
3158 of SSE parameter registers used to call this function. We use
3159 sse_prologue_save insn template that produces computed jump across
3160 SSE saves. We need some preparation work to get this working. */
3162 label = gen_label_rtx ();
3163 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3165 /* Compute address to jump to :
3166 label - 5*eax + nnamed_sse_arguments*5 */
3167 tmp_reg = gen_reg_rtx (Pmode);
3168 nsse_reg = gen_reg_rtx (Pmode);
3169 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3170 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3171 gen_rtx_MULT (Pmode, nsse_reg,
3172 GEN_INT (4))));
3173 if (next_cum.sse_regno)
3174 emit_move_insn
3175 (nsse_reg,
3176 gen_rtx_CONST (DImode,
3177 gen_rtx_PLUS (DImode,
3178 label_ref,
3179 GEN_INT (next_cum.sse_regno * 4))));
3180 else
3181 emit_move_insn (nsse_reg, label_ref);
3182 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3184 /* Compute address of memory block we save into. We always use pointer
3185 pointing 127 bytes after first byte to store - this is needed to keep
3186 instruction size limited by 4 bytes. */
3187 tmp_reg = gen_reg_rtx (Pmode);
3188 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3189 plus_constant (save_area,
3190 8 * REGPARM_MAX + 127)));
3191 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3192 set_mem_alias_set (mem, set);
3193 set_mem_align (mem, BITS_PER_WORD);
3195 /* And finally do the dirty job! */
3196 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3197 GEN_INT (next_cum.sse_regno), label));
3202 /* Implement va_start. */
3204 void
3205 ix86_va_start (tree valist, rtx nextarg)
3207 HOST_WIDE_INT words, n_gpr, n_fpr;
3208 tree f_gpr, f_fpr, f_ovf, f_sav;
3209 tree gpr, fpr, ovf, sav, t;
3211 /* Only 64bit target needs something special. */
3212 if (!TARGET_64BIT)
3214 std_expand_builtin_va_start (valist, nextarg);
3215 return;
3218 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3219 f_fpr = TREE_CHAIN (f_gpr);
3220 f_ovf = TREE_CHAIN (f_fpr);
3221 f_sav = TREE_CHAIN (f_ovf);
3223 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3224 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3225 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3226 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3227 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3229 /* Count number of gp and fp argument registers used. */
3230 words = current_function_args_info.words;
3231 n_gpr = current_function_args_info.regno;
3232 n_fpr = current_function_args_info.sse_regno;
3234 if (TARGET_DEBUG_ARG)
3235 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3236 (int) words, (int) n_gpr, (int) n_fpr);
3238 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3239 build_int_cst (NULL_TREE, n_gpr * 8, 0));
3240 TREE_SIDE_EFFECTS (t) = 1;
3241 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3243 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3244 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX, 0));
3245 TREE_SIDE_EFFECTS (t) = 1;
3246 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3248 /* Find the overflow area. */
3249 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3250 if (words != 0)
3251 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3252 build_int_cst (NULL_TREE, words * UNITS_PER_WORD, 0));
3253 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3254 TREE_SIDE_EFFECTS (t) = 1;
3255 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3257 /* Find the register save area.
3258 Prologue of the function save it right above stack frame. */
3259 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3260 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3261 TREE_SIDE_EFFECTS (t) = 1;
3262 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3265 /* Implement va_arg. */
3267 tree
3268 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3270 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3271 tree f_gpr, f_fpr, f_ovf, f_sav;
3272 tree gpr, fpr, ovf, sav, t;
3273 int size, rsize;
3274 tree lab_false, lab_over = NULL_TREE;
3275 tree addr, t2;
3276 rtx container;
3277 int indirect_p = 0;
3278 tree ptrtype;
3280 /* Only 64bit target needs something special. */
3281 if (!TARGET_64BIT)
3282 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3284 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3285 f_fpr = TREE_CHAIN (f_gpr);
3286 f_ovf = TREE_CHAIN (f_fpr);
3287 f_sav = TREE_CHAIN (f_ovf);
3289 valist = build_fold_indirect_ref (valist);
3290 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3291 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3292 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3293 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3295 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3296 if (indirect_p)
3297 type = build_pointer_type (type);
3298 size = int_size_in_bytes (type);
3299 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3301 container = construct_container (TYPE_MODE (type), type, 0,
3302 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3304 * Pull the value out of the saved registers ...
3307 addr = create_tmp_var (ptr_type_node, "addr");
3308 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3310 if (container)
3312 int needed_intregs, needed_sseregs;
3313 bool need_temp;
3314 tree int_addr, sse_addr;
3316 lab_false = create_artificial_label ();
3317 lab_over = create_artificial_label ();
3319 examine_argument (TYPE_MODE (type), type, 0,
3320 &needed_intregs, &needed_sseregs);
3322 need_temp = (!REG_P (container)
3323 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3324 || TYPE_ALIGN (type) > 128));
3326 /* In case we are passing structure, verify that it is consecutive block
3327 on the register save area. If not we need to do moves. */
3328 if (!need_temp && !REG_P (container))
3330 /* Verify that all registers are strictly consecutive */
3331 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3333 int i;
3335 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3337 rtx slot = XVECEXP (container, 0, i);
3338 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3339 || INTVAL (XEXP (slot, 1)) != i * 16)
3340 need_temp = 1;
3343 else
3345 int i;
3347 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3349 rtx slot = XVECEXP (container, 0, i);
3350 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3351 || INTVAL (XEXP (slot, 1)) != i * 8)
3352 need_temp = 1;
3356 if (!need_temp)
3358 int_addr = addr;
3359 sse_addr = addr;
3361 else
3363 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3364 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3365 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3366 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3368 /* First ensure that we fit completely in registers. */
3369 if (needed_intregs)
3371 t = build_int_cst (TREE_TYPE (gpr),
3372 (REGPARM_MAX - needed_intregs + 1) * 8, 0);
3373 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3374 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3375 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3376 gimplify_and_add (t, pre_p);
3378 if (needed_sseregs)
3380 t = build_int_cst (TREE_TYPE (fpr),
3381 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3382 + REGPARM_MAX * 8, 0);
3383 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3384 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3385 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3386 gimplify_and_add (t, pre_p);
3389 /* Compute index to start of area used for integer regs. */
3390 if (needed_intregs)
3392 /* int_addr = gpr + sav; */
3393 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3394 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3395 gimplify_and_add (t, pre_p);
3397 if (needed_sseregs)
3399 /* sse_addr = fpr + sav; */
3400 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3401 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3402 gimplify_and_add (t, pre_p);
3404 if (need_temp)
3406 int i;
3407 tree temp = create_tmp_var (type, "va_arg_tmp");
3409 /* addr = &temp; */
3410 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3411 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3412 gimplify_and_add (t, pre_p);
3414 for (i = 0; i < XVECLEN (container, 0); i++)
3416 rtx slot = XVECEXP (container, 0, i);
3417 rtx reg = XEXP (slot, 0);
3418 enum machine_mode mode = GET_MODE (reg);
3419 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3420 tree addr_type = build_pointer_type (piece_type);
3421 tree src_addr, src;
3422 int src_offset;
3423 tree dest_addr, dest;
3425 if (SSE_REGNO_P (REGNO (reg)))
3427 src_addr = sse_addr;
3428 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3430 else
3432 src_addr = int_addr;
3433 src_offset = REGNO (reg) * 8;
3435 src_addr = fold_convert (addr_type, src_addr);
3436 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3437 size_int (src_offset)));
3438 src = build_fold_indirect_ref (src_addr);
3440 dest_addr = fold_convert (addr_type, addr);
3441 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3442 size_int (INTVAL (XEXP (slot, 1)))));
3443 dest = build_fold_indirect_ref (dest_addr);
3445 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3446 gimplify_and_add (t, pre_p);
3450 if (needed_intregs)
3452 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3453 build_int_cst (NULL_TREE, needed_intregs * 8, 0));
3454 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3455 gimplify_and_add (t, pre_p);
3457 if (needed_sseregs)
3459 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3460 build_int_cst (NULL_TREE, needed_sseregs * 16, 0));
3461 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3462 gimplify_and_add (t, pre_p);
3465 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3466 gimplify_and_add (t, pre_p);
3468 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3469 append_to_statement_list (t, pre_p);
3472 /* ... otherwise out of the overflow area. */
3474 /* Care for on-stack alignment if needed. */
3475 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3476 t = ovf;
3477 else
3479 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3480 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3481 build_int_cst (NULL_TREE, align - 1, 0));
3482 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3483 build_int_cst (NULL_TREE, -align, -1));
3485 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3487 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3488 gimplify_and_add (t2, pre_p);
3490 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3491 build_int_cst (NULL_TREE, rsize * UNITS_PER_WORD, 0));
3492 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3493 gimplify_and_add (t, pre_p);
3495 if (container)
3497 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3498 append_to_statement_list (t, pre_p);
3501 ptrtype = build_pointer_type (type);
3502 addr = fold_convert (ptrtype, addr);
3504 if (indirect_p)
3505 addr = build_fold_indirect_ref (addr);
3506 return build_fold_indirect_ref (addr);
3509 /* Return nonzero if OPNUM's MEM should be matched
3510 in movabs* patterns. */
3513 ix86_check_movabs (rtx insn, int opnum)
3515 rtx set, mem;
3517 set = PATTERN (insn);
3518 if (GET_CODE (set) == PARALLEL)
3519 set = XVECEXP (set, 0, 0);
3520 if (GET_CODE (set) != SET)
3521 abort ();
3522 mem = XEXP (set, opnum);
3523 while (GET_CODE (mem) == SUBREG)
3524 mem = SUBREG_REG (mem);
3525 if (GET_CODE (mem) != MEM)
3526 abort ();
3527 return (volatile_ok || !MEM_VOLATILE_P (mem));
3530 /* Initialize the table of extra 80387 mathematical constants. */
3532 static void
3533 init_ext_80387_constants (void)
3535 static const char * cst[5] =
3537 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3538 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3539 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3540 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3541 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3543 int i;
3545 for (i = 0; i < 5; i++)
3547 real_from_string (&ext_80387_constants_table[i], cst[i]);
3548 /* Ensure each constant is rounded to XFmode precision. */
3549 real_convert (&ext_80387_constants_table[i],
3550 XFmode, &ext_80387_constants_table[i]);
3553 ext_80387_constants_init = 1;
3556 /* Return true if the constant is something that can be loaded with
3557 a special instruction. */
3560 standard_80387_constant_p (rtx x)
3562 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3563 return -1;
3565 if (x == CONST0_RTX (GET_MODE (x)))
3566 return 1;
3567 if (x == CONST1_RTX (GET_MODE (x)))
3568 return 2;
3570 /* For XFmode constants, try to find a special 80387 instruction when
3571 optimizing for size or on those CPUs that benefit from them. */
3572 if (GET_MODE (x) == XFmode
3573 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3575 REAL_VALUE_TYPE r;
3576 int i;
3578 if (! ext_80387_constants_init)
3579 init_ext_80387_constants ();
3581 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3582 for (i = 0; i < 5; i++)
3583 if (real_identical (&r, &ext_80387_constants_table[i]))
3584 return i + 3;
3587 return 0;
3590 /* Return the opcode of the special instruction to be used to load
3591 the constant X. */
3593 const char *
3594 standard_80387_constant_opcode (rtx x)
3596 switch (standard_80387_constant_p (x))
3598 case 1:
3599 return "fldz";
3600 case 2:
3601 return "fld1";
3602 case 3:
3603 return "fldlg2";
3604 case 4:
3605 return "fldln2";
3606 case 5:
3607 return "fldl2e";
3608 case 6:
3609 return "fldl2t";
3610 case 7:
3611 return "fldpi";
3613 abort ();
3616 /* Return the CONST_DOUBLE representing the 80387 constant that is
3617 loaded by the specified special instruction. The argument IDX
3618 matches the return value from standard_80387_constant_p. */
3621 standard_80387_constant_rtx (int idx)
3623 int i;
3625 if (! ext_80387_constants_init)
3626 init_ext_80387_constants ();
3628 switch (idx)
3630 case 3:
3631 case 4:
3632 case 5:
3633 case 6:
3634 case 7:
3635 i = idx - 3;
3636 break;
3638 default:
3639 abort ();
3642 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3643 XFmode);
3646 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3649 standard_sse_constant_p (rtx x)
3651 if (x == const0_rtx)
3652 return 1;
3653 return (x == CONST0_RTX (GET_MODE (x)));
3656 /* Returns 1 if OP contains a symbol reference */
3659 symbolic_reference_mentioned_p (rtx op)
3661 const char *fmt;
3662 int i;
3664 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3665 return 1;
3667 fmt = GET_RTX_FORMAT (GET_CODE (op));
3668 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3670 if (fmt[i] == 'E')
3672 int j;
3674 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3675 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3676 return 1;
3679 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3680 return 1;
3683 return 0;
3686 /* Return 1 if it is appropriate to emit `ret' instructions in the
3687 body of a function. Do this only if the epilogue is simple, needing a
3688 couple of insns. Prior to reloading, we can't tell how many registers
3689 must be saved, so return 0 then. Return 0 if there is no frame
3690 marker to de-allocate.
3692 If NON_SAVING_SETJMP is defined and true, then it is not possible
3693 for the epilogue to be simple, so return 0. This is a special case
3694 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3695 until final, but jump_optimize may need to know sooner if a
3696 `return' is OK. */
3699 ix86_can_use_return_insn_p (void)
3701 struct ix86_frame frame;
3703 #ifdef NON_SAVING_SETJMP
3704 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3705 return 0;
3706 #endif
3708 if (! reload_completed || frame_pointer_needed)
3709 return 0;
3711 /* Don't allow more than 32 pop, since that's all we can do
3712 with one instruction. */
3713 if (current_function_pops_args
3714 && current_function_args_size >= 32768)
3715 return 0;
3717 ix86_compute_frame_layout (&frame);
3718 return frame.to_allocate == 0 && frame.nregs == 0;
3721 /* Value should be nonzero if functions must have frame pointers.
3722 Zero means the frame pointer need not be set up (and parms may
3723 be accessed via the stack pointer) in functions that seem suitable. */
3726 ix86_frame_pointer_required (void)
3728 /* If we accessed previous frames, then the generated code expects
3729 to be able to access the saved ebp value in our frame. */
3730 if (cfun->machine->accesses_prev_frame)
3731 return 1;
3733 /* Several x86 os'es need a frame pointer for other reasons,
3734 usually pertaining to setjmp. */
3735 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3736 return 1;
3738 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3739 the frame pointer by default. Turn it back on now if we've not
3740 got a leaf function. */
3741 if (TARGET_OMIT_LEAF_FRAME_POINTER
3742 && (!current_function_is_leaf))
3743 return 1;
3745 if (current_function_profile)
3746 return 1;
3748 return 0;
3751 /* Record that the current function accesses previous call frames. */
3753 void
3754 ix86_setup_frame_addresses (void)
3756 cfun->machine->accesses_prev_frame = 1;
3759 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3760 # define USE_HIDDEN_LINKONCE 1
3761 #else
3762 # define USE_HIDDEN_LINKONCE 0
3763 #endif
3765 static int pic_labels_used;
3767 /* Fills in the label name that should be used for a pc thunk for
3768 the given register. */
3770 static void
3771 get_pc_thunk_name (char name[32], unsigned int regno)
3773 if (USE_HIDDEN_LINKONCE)
3774 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3775 else
3776 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3780 /* This function generates code for -fpic that loads %ebx with
3781 the return address of the caller and then returns. */
3783 void
3784 ix86_file_end (void)
3786 rtx xops[2];
3787 int regno;
3789 for (regno = 0; regno < 8; ++regno)
3791 char name[32];
3793 if (! ((pic_labels_used >> regno) & 1))
3794 continue;
3796 get_pc_thunk_name (name, regno);
3798 if (USE_HIDDEN_LINKONCE)
3800 tree decl;
3802 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3803 error_mark_node);
3804 TREE_PUBLIC (decl) = 1;
3805 TREE_STATIC (decl) = 1;
3806 DECL_ONE_ONLY (decl) = 1;
3808 (*targetm.asm_out.unique_section) (decl, 0);
3809 named_section (decl, NULL, 0);
3811 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3812 fputs ("\t.hidden\t", asm_out_file);
3813 assemble_name (asm_out_file, name);
3814 fputc ('\n', asm_out_file);
3815 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3817 else
3819 text_section ();
3820 ASM_OUTPUT_LABEL (asm_out_file, name);
3823 xops[0] = gen_rtx_REG (SImode, regno);
3824 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3825 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3826 output_asm_insn ("ret", xops);
3829 if (NEED_INDICATE_EXEC_STACK)
3830 file_end_indicate_exec_stack ();
3833 /* Emit code for the SET_GOT patterns. */
3835 const char *
3836 output_set_got (rtx dest)
3838 rtx xops[3];
3840 xops[0] = dest;
3841 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3843 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3845 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3847 if (!flag_pic)
3848 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3849 else
3850 output_asm_insn ("call\t%a2", xops);
3852 #if TARGET_MACHO
3853 /* Output the "canonical" label name ("Lxx$pb") here too. This
3854 is what will be referred to by the Mach-O PIC subsystem. */
3855 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3856 #endif
3857 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3858 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3860 if (flag_pic)
3861 output_asm_insn ("pop{l}\t%0", xops);
3863 else
3865 char name[32];
3866 get_pc_thunk_name (name, REGNO (dest));
3867 pic_labels_used |= 1 << REGNO (dest);
3869 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3870 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3871 output_asm_insn ("call\t%X2", xops);
3874 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3875 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3876 else if (!TARGET_MACHO)
3877 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3879 return "";
3882 /* Generate an "push" pattern for input ARG. */
3884 static rtx
3885 gen_push (rtx arg)
3887 return gen_rtx_SET (VOIDmode,
3888 gen_rtx_MEM (Pmode,
3889 gen_rtx_PRE_DEC (Pmode,
3890 stack_pointer_rtx)),
3891 arg);
3894 /* Return >= 0 if there is an unused call-clobbered register available
3895 for the entire function. */
3897 static unsigned int
3898 ix86_select_alt_pic_regnum (void)
3900 if (current_function_is_leaf && !current_function_profile)
3902 int i;
3903 for (i = 2; i >= 0; --i)
3904 if (!regs_ever_live[i])
3905 return i;
3908 return INVALID_REGNUM;
3911 /* Return 1 if we need to save REGNO. */
3912 static int
3913 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3915 if (pic_offset_table_rtx
3916 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3917 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3918 || current_function_profile
3919 || current_function_calls_eh_return
3920 || current_function_uses_const_pool))
3922 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3923 return 0;
3924 return 1;
3927 if (current_function_calls_eh_return && maybe_eh_return)
3929 unsigned i;
3930 for (i = 0; ; i++)
3932 unsigned test = EH_RETURN_DATA_REGNO (i);
3933 if (test == INVALID_REGNUM)
3934 break;
3935 if (test == regno)
3936 return 1;
3940 return (regs_ever_live[regno]
3941 && !call_used_regs[regno]
3942 && !fixed_regs[regno]
3943 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3946 /* Return number of registers to be saved on the stack. */
3948 static int
3949 ix86_nsaved_regs (void)
3951 int nregs = 0;
3952 int regno;
3954 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3955 if (ix86_save_reg (regno, true))
3956 nregs++;
3957 return nregs;
3960 /* Return the offset between two registers, one to be eliminated, and the other
3961 its replacement, at the start of a routine. */
3963 HOST_WIDE_INT
3964 ix86_initial_elimination_offset (int from, int to)
3966 struct ix86_frame frame;
3967 ix86_compute_frame_layout (&frame);
3969 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3970 return frame.hard_frame_pointer_offset;
3971 else if (from == FRAME_POINTER_REGNUM
3972 && to == HARD_FRAME_POINTER_REGNUM)
3973 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3974 else
3976 if (to != STACK_POINTER_REGNUM)
3977 abort ();
3978 else if (from == ARG_POINTER_REGNUM)
3979 return frame.stack_pointer_offset;
3980 else if (from != FRAME_POINTER_REGNUM)
3981 abort ();
3982 else
3983 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3987 /* Fill structure ix86_frame about frame of currently computed function. */
3989 static void
3990 ix86_compute_frame_layout (struct ix86_frame *frame)
3992 HOST_WIDE_INT total_size;
3993 unsigned int stack_alignment_needed;
3994 HOST_WIDE_INT offset;
3995 unsigned int preferred_alignment;
3996 HOST_WIDE_INT size = get_frame_size ();
3998 frame->nregs = ix86_nsaved_regs ();
3999 total_size = size;
4001 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4002 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4004 /* During reload iteration the amount of registers saved can change.
4005 Recompute the value as needed. Do not recompute when amount of registers
4006 didn't change as reload does mutiple calls to the function and does not
4007 expect the decision to change within single iteration. */
4008 if (!optimize_size
4009 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4011 int count = frame->nregs;
4013 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4014 /* The fast prologue uses move instead of push to save registers. This
4015 is significantly longer, but also executes faster as modern hardware
4016 can execute the moves in parallel, but can't do that for push/pop.
4018 Be careful about choosing what prologue to emit: When function takes
4019 many instructions to execute we may use slow version as well as in
4020 case function is known to be outside hot spot (this is known with
4021 feedback only). Weight the size of function by number of registers
4022 to save as it is cheap to use one or two push instructions but very
4023 slow to use many of them. */
4024 if (count)
4025 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4026 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4027 || (flag_branch_probabilities
4028 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4029 cfun->machine->use_fast_prologue_epilogue = false;
4030 else
4031 cfun->machine->use_fast_prologue_epilogue
4032 = !expensive_function_p (count);
4034 if (TARGET_PROLOGUE_USING_MOVE
4035 && cfun->machine->use_fast_prologue_epilogue)
4036 frame->save_regs_using_mov = true;
4037 else
4038 frame->save_regs_using_mov = false;
4041 /* Skip return address and saved base pointer. */
4042 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4044 frame->hard_frame_pointer_offset = offset;
4046 /* Do some sanity checking of stack_alignment_needed and
4047 preferred_alignment, since i386 port is the only using those features
4048 that may break easily. */
4050 if (size && !stack_alignment_needed)
4051 abort ();
4052 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4053 abort ();
4054 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4055 abort ();
4056 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4057 abort ();
4059 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4060 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4062 /* Register save area */
4063 offset += frame->nregs * UNITS_PER_WORD;
4065 /* Va-arg area */
4066 if (ix86_save_varrargs_registers)
4068 offset += X86_64_VARARGS_SIZE;
4069 frame->va_arg_size = X86_64_VARARGS_SIZE;
4071 else
4072 frame->va_arg_size = 0;
4074 /* Align start of frame for local function. */
4075 frame->padding1 = ((offset + stack_alignment_needed - 1)
4076 & -stack_alignment_needed) - offset;
4078 offset += frame->padding1;
4080 /* Frame pointer points here. */
4081 frame->frame_pointer_offset = offset;
4083 offset += size;
4085 /* Add outgoing arguments area. Can be skipped if we eliminated
4086 all the function calls as dead code.
4087 Skipping is however impossible when function calls alloca. Alloca
4088 expander assumes that last current_function_outgoing_args_size
4089 of stack frame are unused. */
4090 if (ACCUMULATE_OUTGOING_ARGS
4091 && (!current_function_is_leaf || current_function_calls_alloca))
4093 offset += current_function_outgoing_args_size;
4094 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4096 else
4097 frame->outgoing_arguments_size = 0;
4099 /* Align stack boundary. Only needed if we're calling another function
4100 or using alloca. */
4101 if (!current_function_is_leaf || current_function_calls_alloca)
4102 frame->padding2 = ((offset + preferred_alignment - 1)
4103 & -preferred_alignment) - offset;
4104 else
4105 frame->padding2 = 0;
4107 offset += frame->padding2;
4109 /* We've reached end of stack frame. */
4110 frame->stack_pointer_offset = offset;
4112 /* Size prologue needs to allocate. */
4113 frame->to_allocate =
4114 (size + frame->padding1 + frame->padding2
4115 + frame->outgoing_arguments_size + frame->va_arg_size);
4117 if ((!frame->to_allocate && frame->nregs <= 1)
4118 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4119 frame->save_regs_using_mov = false;
4121 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4122 && current_function_is_leaf)
4124 frame->red_zone_size = frame->to_allocate;
4125 if (frame->save_regs_using_mov)
4126 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4127 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4128 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4130 else
4131 frame->red_zone_size = 0;
4132 frame->to_allocate -= frame->red_zone_size;
4133 frame->stack_pointer_offset -= frame->red_zone_size;
4134 #if 0
4135 fprintf (stderr, "nregs: %i\n", frame->nregs);
4136 fprintf (stderr, "size: %i\n", size);
4137 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4138 fprintf (stderr, "padding1: %i\n", frame->padding1);
4139 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4140 fprintf (stderr, "padding2: %i\n", frame->padding2);
4141 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4142 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4143 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4144 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4145 frame->hard_frame_pointer_offset);
4146 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4147 #endif
4150 /* Emit code to save registers in the prologue. */
4152 static void
4153 ix86_emit_save_regs (void)
4155 int regno;
4156 rtx insn;
4158 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4159 if (ix86_save_reg (regno, true))
4161 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4162 RTX_FRAME_RELATED_P (insn) = 1;
4166 /* Emit code to save registers using MOV insns. First register
4167 is restored from POINTER + OFFSET. */
4168 static void
4169 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4171 int regno;
4172 rtx insn;
4174 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4175 if (ix86_save_reg (regno, true))
4177 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4178 Pmode, offset),
4179 gen_rtx_REG (Pmode, regno));
4180 RTX_FRAME_RELATED_P (insn) = 1;
4181 offset += UNITS_PER_WORD;
4185 /* Expand prologue or epilogue stack adjustment.
4186 The pattern exist to put a dependency on all ebp-based memory accesses.
4187 STYLE should be negative if instructions should be marked as frame related,
4188 zero if %r11 register is live and cannot be freely used and positive
4189 otherwise. */
4191 static void
4192 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4194 rtx insn;
4196 if (! TARGET_64BIT)
4197 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4198 else if (x86_64_immediate_operand (offset, DImode))
4199 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4200 else
4202 rtx r11;
4203 /* r11 is used by indirect sibcall return as well, set before the
4204 epilogue and used after the epilogue. ATM indirect sibcall
4205 shouldn't be used together with huge frame sizes in one
4206 function because of the frame_size check in sibcall.c. */
4207 if (style == 0)
4208 abort ();
4209 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4210 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4211 if (style < 0)
4212 RTX_FRAME_RELATED_P (insn) = 1;
4213 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4214 offset));
4216 if (style < 0)
4217 RTX_FRAME_RELATED_P (insn) = 1;
4220 /* Expand the prologue into a bunch of separate insns. */
4222 void
4223 ix86_expand_prologue (void)
4225 rtx insn;
4226 bool pic_reg_used;
4227 struct ix86_frame frame;
4228 HOST_WIDE_INT allocate;
4230 ix86_compute_frame_layout (&frame);
4232 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4233 slower on all targets. Also sdb doesn't like it. */
4235 if (frame_pointer_needed)
4237 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4238 RTX_FRAME_RELATED_P (insn) = 1;
4240 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4241 RTX_FRAME_RELATED_P (insn) = 1;
4244 allocate = frame.to_allocate;
4246 if (!frame.save_regs_using_mov)
4247 ix86_emit_save_regs ();
4248 else
4249 allocate += frame.nregs * UNITS_PER_WORD;
4251 /* When using red zone we may start register saving before allocating
4252 the stack frame saving one cycle of the prologue. */
4253 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4254 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4255 : stack_pointer_rtx,
4256 -frame.nregs * UNITS_PER_WORD);
4258 if (allocate == 0)
4260 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4261 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4262 GEN_INT (-allocate), -1);
4263 else
4265 /* Only valid for Win32. */
4266 rtx eax = gen_rtx_REG (SImode, 0);
4267 bool eax_live = ix86_eax_live_at_start_p ();
4269 if (TARGET_64BIT)
4270 abort ();
4272 if (eax_live)
4274 emit_insn (gen_push (eax));
4275 allocate -= 4;
4278 insn = emit_move_insn (eax, GEN_INT (allocate));
4279 RTX_FRAME_RELATED_P (insn) = 1;
4281 insn = emit_insn (gen_allocate_stack_worker (eax));
4282 RTX_FRAME_RELATED_P (insn) = 1;
4284 if (eax_live)
4286 rtx t;
4287 if (frame_pointer_needed)
4288 t = plus_constant (hard_frame_pointer_rtx,
4289 allocate
4290 - frame.to_allocate
4291 - frame.nregs * UNITS_PER_WORD);
4292 else
4293 t = plus_constant (stack_pointer_rtx, allocate);
4294 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4298 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4300 if (!frame_pointer_needed || !frame.to_allocate)
4301 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4302 else
4303 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4304 -frame.nregs * UNITS_PER_WORD);
4307 pic_reg_used = false;
4308 if (pic_offset_table_rtx
4309 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4310 || current_function_profile))
4312 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4314 if (alt_pic_reg_used != INVALID_REGNUM)
4315 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4317 pic_reg_used = true;
4320 if (pic_reg_used)
4322 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4324 /* Even with accurate pre-reload life analysis, we can wind up
4325 deleting all references to the pic register after reload.
4326 Consider if cross-jumping unifies two sides of a branch
4327 controlled by a comparison vs the only read from a global.
4328 In which case, allow the set_got to be deleted, though we're
4329 too late to do anything about the ebx save in the prologue. */
4330 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4333 /* Prevent function calls from be scheduled before the call to mcount.
4334 In the pic_reg_used case, make sure that the got load isn't deleted. */
4335 if (current_function_profile)
4336 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4339 /* Emit code to restore saved registers using MOV insns. First register
4340 is restored from POINTER + OFFSET. */
4341 static void
4342 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4343 int maybe_eh_return)
4345 int regno;
4346 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4348 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4349 if (ix86_save_reg (regno, maybe_eh_return))
4351 /* Ensure that adjust_address won't be forced to produce pointer
4352 out of range allowed by x86-64 instruction set. */
4353 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4355 rtx r11;
4357 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4358 emit_move_insn (r11, GEN_INT (offset));
4359 emit_insn (gen_adddi3 (r11, r11, pointer));
4360 base_address = gen_rtx_MEM (Pmode, r11);
4361 offset = 0;
4363 emit_move_insn (gen_rtx_REG (Pmode, regno),
4364 adjust_address (base_address, Pmode, offset));
4365 offset += UNITS_PER_WORD;
4369 /* Restore function stack, frame, and registers. */
4371 void
4372 ix86_expand_epilogue (int style)
4374 int regno;
4375 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4376 struct ix86_frame frame;
4377 HOST_WIDE_INT offset;
4379 ix86_compute_frame_layout (&frame);
4381 /* Calculate start of saved registers relative to ebp. Special care
4382 must be taken for the normal return case of a function using
4383 eh_return: the eax and edx registers are marked as saved, but not
4384 restored along this path. */
4385 offset = frame.nregs;
4386 if (current_function_calls_eh_return && style != 2)
4387 offset -= 2;
4388 offset *= -UNITS_PER_WORD;
4390 /* If we're only restoring one register and sp is not valid then
4391 using a move instruction to restore the register since it's
4392 less work than reloading sp and popping the register.
4394 The default code result in stack adjustment using add/lea instruction,
4395 while this code results in LEAVE instruction (or discrete equivalent),
4396 so it is profitable in some other cases as well. Especially when there
4397 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4398 and there is exactly one register to pop. This heuristic may need some
4399 tuning in future. */
4400 if ((!sp_valid && frame.nregs <= 1)
4401 || (TARGET_EPILOGUE_USING_MOVE
4402 && cfun->machine->use_fast_prologue_epilogue
4403 && (frame.nregs > 1 || frame.to_allocate))
4404 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4405 || (frame_pointer_needed && TARGET_USE_LEAVE
4406 && cfun->machine->use_fast_prologue_epilogue
4407 && frame.nregs == 1)
4408 || current_function_calls_eh_return)
4410 /* Restore registers. We can use ebp or esp to address the memory
4411 locations. If both are available, default to ebp, since offsets
4412 are known to be small. Only exception is esp pointing directly to the
4413 end of block of saved registers, where we may simplify addressing
4414 mode. */
4416 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4417 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4418 frame.to_allocate, style == 2);
4419 else
4420 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4421 offset, style == 2);
4423 /* eh_return epilogues need %ecx added to the stack pointer. */
4424 if (style == 2)
4426 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4428 if (frame_pointer_needed)
4430 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4431 tmp = plus_constant (tmp, UNITS_PER_WORD);
4432 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4434 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4435 emit_move_insn (hard_frame_pointer_rtx, tmp);
4437 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4438 const0_rtx, style);
4440 else
4442 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4443 tmp = plus_constant (tmp, (frame.to_allocate
4444 + frame.nregs * UNITS_PER_WORD));
4445 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4448 else if (!frame_pointer_needed)
4449 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4450 GEN_INT (frame.to_allocate
4451 + frame.nregs * UNITS_PER_WORD),
4452 style);
4453 /* If not an i386, mov & pop is faster than "leave". */
4454 else if (TARGET_USE_LEAVE || optimize_size
4455 || !cfun->machine->use_fast_prologue_epilogue)
4456 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4457 else
4459 pro_epilogue_adjust_stack (stack_pointer_rtx,
4460 hard_frame_pointer_rtx,
4461 const0_rtx, style);
4462 if (TARGET_64BIT)
4463 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4464 else
4465 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4468 else
4470 /* First step is to deallocate the stack frame so that we can
4471 pop the registers. */
4472 if (!sp_valid)
4474 if (!frame_pointer_needed)
4475 abort ();
4476 pro_epilogue_adjust_stack (stack_pointer_rtx,
4477 hard_frame_pointer_rtx,
4478 GEN_INT (offset), style);
4480 else if (frame.to_allocate)
4481 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4482 GEN_INT (frame.to_allocate), style);
4484 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4485 if (ix86_save_reg (regno, false))
4487 if (TARGET_64BIT)
4488 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4489 else
4490 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4492 if (frame_pointer_needed)
4494 /* Leave results in shorter dependency chains on CPUs that are
4495 able to grok it fast. */
4496 if (TARGET_USE_LEAVE)
4497 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4498 else if (TARGET_64BIT)
4499 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4500 else
4501 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4505 /* Sibcall epilogues don't want a return instruction. */
4506 if (style == 0)
4507 return;
4509 if (current_function_pops_args && current_function_args_size)
4511 rtx popc = GEN_INT (current_function_pops_args);
4513 /* i386 can only pop 64K bytes. If asked to pop more, pop
4514 return address, do explicit add, and jump indirectly to the
4515 caller. */
4517 if (current_function_pops_args >= 65536)
4519 rtx ecx = gen_rtx_REG (SImode, 2);
4521 /* There is no "pascal" calling convention in 64bit ABI. */
4522 if (TARGET_64BIT)
4523 abort ();
4525 emit_insn (gen_popsi1 (ecx));
4526 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4527 emit_jump_insn (gen_return_indirect_internal (ecx));
4529 else
4530 emit_jump_insn (gen_return_pop_internal (popc));
4532 else
4533 emit_jump_insn (gen_return_internal ());
4536 /* Reset from the function's potential modifications. */
4538 static void
4539 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4540 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4542 if (pic_offset_table_rtx)
4543 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4546 /* Extract the parts of an RTL expression that is a valid memory address
4547 for an instruction. Return 0 if the structure of the address is
4548 grossly off. Return -1 if the address contains ASHIFT, so it is not
4549 strictly valid, but still used for computing length of lea instruction. */
4552 ix86_decompose_address (rtx addr, struct ix86_address *out)
4554 rtx base = NULL_RTX;
4555 rtx index = NULL_RTX;
4556 rtx disp = NULL_RTX;
4557 HOST_WIDE_INT scale = 1;
4558 rtx scale_rtx = NULL_RTX;
4559 int retval = 1;
4560 enum ix86_address_seg seg = SEG_DEFAULT;
4562 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4563 base = addr;
4564 else if (GET_CODE (addr) == PLUS)
4566 rtx addends[4], op;
4567 int n = 0, i;
4569 op = addr;
4572 if (n >= 4)
4573 return 0;
4574 addends[n++] = XEXP (op, 1);
4575 op = XEXP (op, 0);
4577 while (GET_CODE (op) == PLUS);
4578 if (n >= 4)
4579 return 0;
4580 addends[n] = op;
4582 for (i = n; i >= 0; --i)
4584 op = addends[i];
4585 switch (GET_CODE (op))
4587 case MULT:
4588 if (index)
4589 return 0;
4590 index = XEXP (op, 0);
4591 scale_rtx = XEXP (op, 1);
4592 break;
4594 case UNSPEC:
4595 if (XINT (op, 1) == UNSPEC_TP
4596 && TARGET_TLS_DIRECT_SEG_REFS
4597 && seg == SEG_DEFAULT)
4598 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4599 else
4600 return 0;
4601 break;
4603 case REG:
4604 case SUBREG:
4605 if (!base)
4606 base = op;
4607 else if (!index)
4608 index = op;
4609 else
4610 return 0;
4611 break;
4613 case CONST:
4614 case CONST_INT:
4615 case SYMBOL_REF:
4616 case LABEL_REF:
4617 if (disp)
4618 return 0;
4619 disp = op;
4620 break;
4622 default:
4623 return 0;
4627 else if (GET_CODE (addr) == MULT)
4629 index = XEXP (addr, 0); /* index*scale */
4630 scale_rtx = XEXP (addr, 1);
4632 else if (GET_CODE (addr) == ASHIFT)
4634 rtx tmp;
4636 /* We're called for lea too, which implements ashift on occasion. */
4637 index = XEXP (addr, 0);
4638 tmp = XEXP (addr, 1);
4639 if (GET_CODE (tmp) != CONST_INT)
4640 return 0;
4641 scale = INTVAL (tmp);
4642 if ((unsigned HOST_WIDE_INT) scale > 3)
4643 return 0;
4644 scale = 1 << scale;
4645 retval = -1;
4647 else
4648 disp = addr; /* displacement */
4650 /* Extract the integral value of scale. */
4651 if (scale_rtx)
4653 if (GET_CODE (scale_rtx) != CONST_INT)
4654 return 0;
4655 scale = INTVAL (scale_rtx);
4658 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4659 if (base && index && scale == 1
4660 && (index == arg_pointer_rtx
4661 || index == frame_pointer_rtx
4662 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4664 rtx tmp = base;
4665 base = index;
4666 index = tmp;
4669 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4670 if ((base == hard_frame_pointer_rtx
4671 || base == frame_pointer_rtx
4672 || base == arg_pointer_rtx) && !disp)
4673 disp = const0_rtx;
4675 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4676 Avoid this by transforming to [%esi+0]. */
4677 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4678 && base && !index && !disp
4679 && REG_P (base)
4680 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4681 disp = const0_rtx;
4683 /* Special case: encode reg+reg instead of reg*2. */
4684 if (!base && index && scale && scale == 2)
4685 base = index, scale = 1;
4687 /* Special case: scaling cannot be encoded without base or displacement. */
4688 if (!base && !disp && index && scale != 1)
4689 disp = const0_rtx;
4691 out->base = base;
4692 out->index = index;
4693 out->disp = disp;
4694 out->scale = scale;
4695 out->seg = seg;
4697 return retval;
4700 /* Return cost of the memory address x.
4701 For i386, it is better to use a complex address than let gcc copy
4702 the address into a reg and make a new pseudo. But not if the address
4703 requires to two regs - that would mean more pseudos with longer
4704 lifetimes. */
4705 static int
4706 ix86_address_cost (rtx x)
4708 struct ix86_address parts;
4709 int cost = 1;
4711 if (!ix86_decompose_address (x, &parts))
4712 abort ();
4714 /* More complex memory references are better. */
4715 if (parts.disp && parts.disp != const0_rtx)
4716 cost--;
4717 if (parts.seg != SEG_DEFAULT)
4718 cost--;
4720 /* Attempt to minimize number of registers in the address. */
4721 if ((parts.base
4722 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4723 || (parts.index
4724 && (!REG_P (parts.index)
4725 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4726 cost++;
4728 if (parts.base
4729 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4730 && parts.index
4731 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4732 && parts.base != parts.index)
4733 cost++;
4735 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4736 since it's predecode logic can't detect the length of instructions
4737 and it degenerates to vector decoded. Increase cost of such
4738 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4739 to split such addresses or even refuse such addresses at all.
4741 Following addressing modes are affected:
4742 [base+scale*index]
4743 [scale*index+disp]
4744 [base+index]
4746 The first and last case may be avoidable by explicitly coding the zero in
4747 memory address, but I don't have AMD-K6 machine handy to check this
4748 theory. */
4750 if (TARGET_K6
4751 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4752 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4753 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4754 cost += 10;
4756 return cost;
4759 /* If X is a machine specific address (i.e. a symbol or label being
4760 referenced as a displacement from the GOT implemented using an
4761 UNSPEC), then return the base term. Otherwise return X. */
4764 ix86_find_base_term (rtx x)
4766 rtx term;
4768 if (TARGET_64BIT)
4770 if (GET_CODE (x) != CONST)
4771 return x;
4772 term = XEXP (x, 0);
4773 if (GET_CODE (term) == PLUS
4774 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4775 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4776 term = XEXP (term, 0);
4777 if (GET_CODE (term) != UNSPEC
4778 || XINT (term, 1) != UNSPEC_GOTPCREL)
4779 return x;
4781 term = XVECEXP (term, 0, 0);
4783 if (GET_CODE (term) != SYMBOL_REF
4784 && GET_CODE (term) != LABEL_REF)
4785 return x;
4787 return term;
4790 term = ix86_delegitimize_address (x);
4792 if (GET_CODE (term) != SYMBOL_REF
4793 && GET_CODE (term) != LABEL_REF)
4794 return x;
4796 return term;
4799 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4800 this is used for to form addresses to local data when -fPIC is in
4801 use. */
4803 static bool
4804 darwin_local_data_pic (rtx disp)
4806 if (GET_CODE (disp) == MINUS)
4808 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4809 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4810 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4812 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4813 if (! strcmp (sym_name, "<pic base>"))
4814 return true;
4818 return false;
4821 /* Determine if a given RTX is a valid constant. We already know this
4822 satisfies CONSTANT_P. */
4824 bool
4825 legitimate_constant_p (rtx x)
4827 rtx inner;
4829 switch (GET_CODE (x))
4831 case SYMBOL_REF:
4832 /* TLS symbols are not constant. */
4833 if (tls_symbolic_operand (x, Pmode))
4834 return false;
4835 break;
4837 case CONST:
4838 inner = XEXP (x, 0);
4840 /* Offsets of TLS symbols are never valid.
4841 Discourage CSE from creating them. */
4842 if (GET_CODE (inner) == PLUS
4843 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4844 return false;
4846 if (GET_CODE (inner) == PLUS)
4848 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
4849 return false;
4850 inner = XEXP (inner, 0);
4853 if (TARGET_MACHO && darwin_local_data_pic (inner))
4854 return true;
4856 if (GET_CODE (inner) == MINUS)
4858 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
4859 return false;
4860 inner = XEXP (inner, 0);
4863 /* Only some unspecs are valid as "constants". */
4864 if (GET_CODE (inner) == UNSPEC)
4865 switch (XINT (inner, 1))
4867 case UNSPEC_TPOFF:
4868 case UNSPEC_NTPOFF:
4869 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4870 case UNSPEC_DTPOFF:
4871 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4872 default:
4873 return false;
4875 break;
4877 default:
4878 break;
4881 /* Otherwise we handle everything else in the move patterns. */
4882 return true;
4885 /* Determine if it's legal to put X into the constant pool. This
4886 is not possible for the address of thread-local symbols, which
4887 is checked above. */
4889 static bool
4890 ix86_cannot_force_const_mem (rtx x)
4892 return !legitimate_constant_p (x);
4895 /* Determine if a given RTX is a valid constant address. */
4897 bool
4898 constant_address_p (rtx x)
4900 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4903 /* Nonzero if the constant value X is a legitimate general operand
4904 when generating PIC code. It is given that flag_pic is on and
4905 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4907 bool
4908 legitimate_pic_operand_p (rtx x)
4910 rtx inner;
4912 switch (GET_CODE (x))
4914 case CONST:
4915 inner = XEXP (x, 0);
4917 /* Only some unspecs are valid as "constants". */
4918 if (GET_CODE (inner) == UNSPEC)
4919 switch (XINT (inner, 1))
4921 case UNSPEC_TPOFF:
4922 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4923 default:
4924 return false;
4926 /* FALLTHRU */
4928 case SYMBOL_REF:
4929 case LABEL_REF:
4930 return legitimate_pic_address_disp_p (x);
4932 default:
4933 return true;
4937 /* Determine if a given CONST RTX is a valid memory displacement
4938 in PIC mode. */
4941 legitimate_pic_address_disp_p (rtx disp)
4943 bool saw_plus;
4945 /* In 64bit mode we can allow direct addresses of symbols and labels
4946 when they are not dynamic symbols. */
4947 if (TARGET_64BIT)
4949 /* TLS references should always be enclosed in UNSPEC. */
4950 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4951 return 0;
4952 if (GET_CODE (disp) == SYMBOL_REF
4953 && ix86_cmodel == CM_SMALL_PIC
4954 && SYMBOL_REF_LOCAL_P (disp))
4955 return 1;
4956 if (GET_CODE (disp) == LABEL_REF)
4957 return 1;
4958 if (GET_CODE (disp) == CONST
4959 && GET_CODE (XEXP (disp, 0)) == PLUS)
4961 rtx op0 = XEXP (XEXP (disp, 0), 0);
4962 rtx op1 = XEXP (XEXP (disp, 0), 1);
4964 /* TLS references should always be enclosed in UNSPEC. */
4965 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4966 return 0;
4967 if (((GET_CODE (op0) == SYMBOL_REF
4968 && ix86_cmodel == CM_SMALL_PIC
4969 && SYMBOL_REF_LOCAL_P (op0))
4970 || GET_CODE (op0) == LABEL_REF)
4971 && GET_CODE (op1) == CONST_INT
4972 && INTVAL (op1) < 16*1024*1024
4973 && INTVAL (op1) >= -16*1024*1024)
4974 return 1;
4977 if (GET_CODE (disp) != CONST)
4978 return 0;
4979 disp = XEXP (disp, 0);
4981 if (TARGET_64BIT)
4983 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4984 of GOT tables. We should not need these anyway. */
4985 if (GET_CODE (disp) != UNSPEC
4986 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4987 return 0;
4989 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4990 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4991 return 0;
4992 return 1;
4995 saw_plus = false;
4996 if (GET_CODE (disp) == PLUS)
4998 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4999 return 0;
5000 disp = XEXP (disp, 0);
5001 saw_plus = true;
5004 if (TARGET_MACHO && darwin_local_data_pic (disp))
5005 return 1;
5007 if (GET_CODE (disp) != UNSPEC)
5008 return 0;
5010 switch (XINT (disp, 1))
5012 case UNSPEC_GOT:
5013 if (saw_plus)
5014 return false;
5015 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5016 case UNSPEC_GOTOFF:
5017 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5018 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5019 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5020 return false;
5021 case UNSPEC_GOTTPOFF:
5022 case UNSPEC_GOTNTPOFF:
5023 case UNSPEC_INDNTPOFF:
5024 if (saw_plus)
5025 return false;
5026 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5027 case UNSPEC_NTPOFF:
5028 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5029 case UNSPEC_DTPOFF:
5030 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5033 return 0;
5036 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5037 memory address for an instruction. The MODE argument is the machine mode
5038 for the MEM expression that wants to use this address.
5040 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5041 convert common non-canonical forms to canonical form so that they will
5042 be recognized. */
5045 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5047 struct ix86_address parts;
5048 rtx base, index, disp;
5049 HOST_WIDE_INT scale;
5050 const char *reason = NULL;
5051 rtx reason_rtx = NULL_RTX;
5053 if (TARGET_DEBUG_ADDR)
5055 fprintf (stderr,
5056 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5057 GET_MODE_NAME (mode), strict);
5058 debug_rtx (addr);
5061 if (ix86_decompose_address (addr, &parts) <= 0)
5063 reason = "decomposition failed";
5064 goto report_error;
5067 base = parts.base;
5068 index = parts.index;
5069 disp = parts.disp;
5070 scale = parts.scale;
5072 /* Validate base register.
5074 Don't allow SUBREG's here, it can lead to spill failures when the base
5075 is one word out of a two word structure, which is represented internally
5076 as a DImode int. */
5078 if (base)
5080 reason_rtx = base;
5082 if (GET_CODE (base) != REG)
5084 reason = "base is not a register";
5085 goto report_error;
5088 if (GET_MODE (base) != Pmode)
5090 reason = "base is not in Pmode";
5091 goto report_error;
5094 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5095 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5097 reason = "base is not valid";
5098 goto report_error;
5102 /* Validate index register.
5104 Don't allow SUBREG's here, it can lead to spill failures when the index
5105 is one word out of a two word structure, which is represented internally
5106 as a DImode int. */
5108 if (index)
5110 reason_rtx = index;
5112 if (GET_CODE (index) != REG)
5114 reason = "index is not a register";
5115 goto report_error;
5118 if (GET_MODE (index) != Pmode)
5120 reason = "index is not in Pmode";
5121 goto report_error;
5124 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5125 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5127 reason = "index is not valid";
5128 goto report_error;
5132 /* Validate scale factor. */
5133 if (scale != 1)
5135 reason_rtx = GEN_INT (scale);
5136 if (!index)
5138 reason = "scale without index";
5139 goto report_error;
5142 if (scale != 2 && scale != 4 && scale != 8)
5144 reason = "scale is not a valid multiplier";
5145 goto report_error;
5149 /* Validate displacement. */
5150 if (disp)
5152 reason_rtx = disp;
5154 if (GET_CODE (disp) == CONST
5155 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5156 switch (XINT (XEXP (disp, 0), 1))
5158 case UNSPEC_GOT:
5159 case UNSPEC_GOTOFF:
5160 case UNSPEC_GOTPCREL:
5161 if (!flag_pic)
5162 abort ();
5163 goto is_legitimate_pic;
5165 case UNSPEC_GOTTPOFF:
5166 case UNSPEC_GOTNTPOFF:
5167 case UNSPEC_INDNTPOFF:
5168 case UNSPEC_NTPOFF:
5169 case UNSPEC_DTPOFF:
5170 break;
5172 default:
5173 reason = "invalid address unspec";
5174 goto report_error;
5177 else if (flag_pic && (SYMBOLIC_CONST (disp)
5178 #if TARGET_MACHO
5179 && !machopic_operand_p (disp)
5180 #endif
5183 is_legitimate_pic:
5184 if (TARGET_64BIT && (index || base))
5186 /* foo@dtpoff(%rX) is ok. */
5187 if (GET_CODE (disp) != CONST
5188 || GET_CODE (XEXP (disp, 0)) != PLUS
5189 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5190 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5191 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5192 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5194 reason = "non-constant pic memory reference";
5195 goto report_error;
5198 else if (! legitimate_pic_address_disp_p (disp))
5200 reason = "displacement is an invalid pic construct";
5201 goto report_error;
5204 /* This code used to verify that a symbolic pic displacement
5205 includes the pic_offset_table_rtx register.
5207 While this is good idea, unfortunately these constructs may
5208 be created by "adds using lea" optimization for incorrect
5209 code like:
5211 int a;
5212 int foo(int i)
5214 return *(&a+i);
5217 This code is nonsensical, but results in addressing
5218 GOT table with pic_offset_table_rtx base. We can't
5219 just refuse it easily, since it gets matched by
5220 "addsi3" pattern, that later gets split to lea in the
5221 case output register differs from input. While this
5222 can be handled by separate addsi pattern for this case
5223 that never results in lea, this seems to be easier and
5224 correct fix for crash to disable this test. */
5226 else if (GET_CODE (disp) != LABEL_REF
5227 && GET_CODE (disp) != CONST_INT
5228 && (GET_CODE (disp) != CONST
5229 || !legitimate_constant_p (disp))
5230 && (GET_CODE (disp) != SYMBOL_REF
5231 || !legitimate_constant_p (disp)))
5233 reason = "displacement is not constant";
5234 goto report_error;
5236 else if (TARGET_64BIT
5237 && !x86_64_immediate_operand (disp, VOIDmode))
5239 reason = "displacement is out of range";
5240 goto report_error;
5244 /* Everything looks valid. */
5245 if (TARGET_DEBUG_ADDR)
5246 fprintf (stderr, "Success.\n");
5247 return TRUE;
5249 report_error:
5250 if (TARGET_DEBUG_ADDR)
5252 fprintf (stderr, "Error: %s\n", reason);
5253 debug_rtx (reason_rtx);
5255 return FALSE;
5258 /* Return an unique alias set for the GOT. */
5260 static HOST_WIDE_INT
5261 ix86_GOT_alias_set (void)
5263 static HOST_WIDE_INT set = -1;
5264 if (set == -1)
5265 set = new_alias_set ();
5266 return set;
5269 /* Return a legitimate reference for ORIG (an address) using the
5270 register REG. If REG is 0, a new pseudo is generated.
5272 There are two types of references that must be handled:
5274 1. Global data references must load the address from the GOT, via
5275 the PIC reg. An insn is emitted to do this load, and the reg is
5276 returned.
5278 2. Static data references, constant pool addresses, and code labels
5279 compute the address as an offset from the GOT, whose base is in
5280 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5281 differentiate them from global data objects. The returned
5282 address is the PIC reg + an unspec constant.
5284 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5285 reg also appears in the address. */
5287 static rtx
5288 legitimize_pic_address (rtx orig, rtx reg)
5290 rtx addr = orig;
5291 rtx new = orig;
5292 rtx base;
5294 #if TARGET_MACHO
5295 if (reg == 0)
5296 reg = gen_reg_rtx (Pmode);
5297 /* Use the generic Mach-O PIC machinery. */
5298 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5299 #endif
5301 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5302 new = addr;
5303 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5305 /* This symbol may be referenced via a displacement from the PIC
5306 base address (@GOTOFF). */
5308 if (reload_in_progress)
5309 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5310 if (GET_CODE (addr) == CONST)
5311 addr = XEXP (addr, 0);
5312 if (GET_CODE (addr) == PLUS)
5314 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5315 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5317 else
5318 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5319 new = gen_rtx_CONST (Pmode, new);
5320 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5322 if (reg != 0)
5324 emit_move_insn (reg, new);
5325 new = reg;
5328 else if (GET_CODE (addr) == SYMBOL_REF)
5330 if (TARGET_64BIT)
5332 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5333 new = gen_rtx_CONST (Pmode, new);
5334 new = gen_const_mem (Pmode, new);
5335 set_mem_alias_set (new, ix86_GOT_alias_set ());
5337 if (reg == 0)
5338 reg = gen_reg_rtx (Pmode);
5339 /* Use directly gen_movsi, otherwise the address is loaded
5340 into register for CSE. We don't want to CSE this addresses,
5341 instead we CSE addresses from the GOT table, so skip this. */
5342 emit_insn (gen_movsi (reg, new));
5343 new = reg;
5345 else
5347 /* This symbol must be referenced via a load from the
5348 Global Offset Table (@GOT). */
5350 if (reload_in_progress)
5351 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5352 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5353 new = gen_rtx_CONST (Pmode, new);
5354 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5355 new = gen_const_mem (Pmode, new);
5356 set_mem_alias_set (new, ix86_GOT_alias_set ());
5358 if (reg == 0)
5359 reg = gen_reg_rtx (Pmode);
5360 emit_move_insn (reg, new);
5361 new = reg;
5364 else
5366 if (GET_CODE (addr) == CONST)
5368 addr = XEXP (addr, 0);
5370 /* We must match stuff we generate before. Assume the only
5371 unspecs that can get here are ours. Not that we could do
5372 anything with them anyway.... */
5373 if (GET_CODE (addr) == UNSPEC
5374 || (GET_CODE (addr) == PLUS
5375 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5376 return orig;
5377 if (GET_CODE (addr) != PLUS)
5378 abort ();
5380 if (GET_CODE (addr) == PLUS)
5382 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5384 /* Check first to see if this is a constant offset from a @GOTOFF
5385 symbol reference. */
5386 if (local_symbolic_operand (op0, Pmode)
5387 && GET_CODE (op1) == CONST_INT)
5389 if (!TARGET_64BIT)
5391 if (reload_in_progress)
5392 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5393 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5394 UNSPEC_GOTOFF);
5395 new = gen_rtx_PLUS (Pmode, new, op1);
5396 new = gen_rtx_CONST (Pmode, new);
5397 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5399 if (reg != 0)
5401 emit_move_insn (reg, new);
5402 new = reg;
5405 else
5407 if (INTVAL (op1) < -16*1024*1024
5408 || INTVAL (op1) >= 16*1024*1024)
5409 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5412 else
5414 base = legitimize_pic_address (XEXP (addr, 0), reg);
5415 new = legitimize_pic_address (XEXP (addr, 1),
5416 base == reg ? NULL_RTX : reg);
5418 if (GET_CODE (new) == CONST_INT)
5419 new = plus_constant (base, INTVAL (new));
5420 else
5422 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5424 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5425 new = XEXP (new, 1);
5427 new = gen_rtx_PLUS (Pmode, base, new);
5432 return new;
5435 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5437 static rtx
5438 get_thread_pointer (int to_reg)
5440 rtx tp, reg, insn;
5442 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5443 if (!to_reg)
5444 return tp;
5446 reg = gen_reg_rtx (Pmode);
5447 insn = gen_rtx_SET (VOIDmode, reg, tp);
5448 insn = emit_insn (insn);
5450 return reg;
5453 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5454 false if we expect this to be used for a memory address and true if
5455 we expect to load the address into a register. */
5457 static rtx
5458 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5460 rtx dest, base, off, pic;
5461 int type;
5463 switch (model)
5465 case TLS_MODEL_GLOBAL_DYNAMIC:
5466 dest = gen_reg_rtx (Pmode);
5467 if (TARGET_64BIT)
5469 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5471 start_sequence ();
5472 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5473 insns = get_insns ();
5474 end_sequence ();
5476 emit_libcall_block (insns, dest, rax, x);
5478 else
5479 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5480 break;
5482 case TLS_MODEL_LOCAL_DYNAMIC:
5483 base = gen_reg_rtx (Pmode);
5484 if (TARGET_64BIT)
5486 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5488 start_sequence ();
5489 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5490 insns = get_insns ();
5491 end_sequence ();
5493 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5494 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5495 emit_libcall_block (insns, base, rax, note);
5497 else
5498 emit_insn (gen_tls_local_dynamic_base_32 (base));
5500 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5501 off = gen_rtx_CONST (Pmode, off);
5503 return gen_rtx_PLUS (Pmode, base, off);
5505 case TLS_MODEL_INITIAL_EXEC:
5506 if (TARGET_64BIT)
5508 pic = NULL;
5509 type = UNSPEC_GOTNTPOFF;
5511 else if (flag_pic)
5513 if (reload_in_progress)
5514 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5515 pic = pic_offset_table_rtx;
5516 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5518 else if (!TARGET_GNU_TLS)
5520 pic = gen_reg_rtx (Pmode);
5521 emit_insn (gen_set_got (pic));
5522 type = UNSPEC_GOTTPOFF;
5524 else
5526 pic = NULL;
5527 type = UNSPEC_INDNTPOFF;
5530 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5531 off = gen_rtx_CONST (Pmode, off);
5532 if (pic)
5533 off = gen_rtx_PLUS (Pmode, pic, off);
5534 off = gen_const_mem (Pmode, off);
5535 set_mem_alias_set (off, ix86_GOT_alias_set ());
5537 if (TARGET_64BIT || TARGET_GNU_TLS)
5539 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5540 off = force_reg (Pmode, off);
5541 return gen_rtx_PLUS (Pmode, base, off);
5543 else
5545 base = get_thread_pointer (true);
5546 dest = gen_reg_rtx (Pmode);
5547 emit_insn (gen_subsi3 (dest, base, off));
5549 break;
5551 case TLS_MODEL_LOCAL_EXEC:
5552 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5553 (TARGET_64BIT || TARGET_GNU_TLS)
5554 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5555 off = gen_rtx_CONST (Pmode, off);
5557 if (TARGET_64BIT || TARGET_GNU_TLS)
5559 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5560 return gen_rtx_PLUS (Pmode, base, off);
5562 else
5564 base = get_thread_pointer (true);
5565 dest = gen_reg_rtx (Pmode);
5566 emit_insn (gen_subsi3 (dest, base, off));
5568 break;
5570 default:
5571 abort ();
5574 return dest;
5577 /* Try machine-dependent ways of modifying an illegitimate address
5578 to be legitimate. If we find one, return the new, valid address.
5579 This macro is used in only one place: `memory_address' in explow.c.
5581 OLDX is the address as it was before break_out_memory_refs was called.
5582 In some cases it is useful to look at this to decide what needs to be done.
5584 MODE and WIN are passed so that this macro can use
5585 GO_IF_LEGITIMATE_ADDRESS.
5587 It is always safe for this macro to do nothing. It exists to recognize
5588 opportunities to optimize the output.
5590 For the 80386, we handle X+REG by loading X into a register R and
5591 using R+REG. R will go in a general reg and indexing will be used.
5592 However, if REG is a broken-out memory address or multiplication,
5593 nothing needs to be done because REG can certainly go in a general reg.
5595 When -fpic is used, special handling is needed for symbolic references.
5596 See comments by legitimize_pic_address in i386.c for details. */
5599 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5601 int changed = 0;
5602 unsigned log;
5604 if (TARGET_DEBUG_ADDR)
5606 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5607 GET_MODE_NAME (mode));
5608 debug_rtx (x);
5611 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5612 if (log)
5613 return legitimize_tls_address (x, log, false);
5614 if (GET_CODE (x) == CONST
5615 && GET_CODE (XEXP (x, 0)) == PLUS
5616 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5617 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5619 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5620 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5623 if (flag_pic && SYMBOLIC_CONST (x))
5624 return legitimize_pic_address (x, 0);
5626 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5627 if (GET_CODE (x) == ASHIFT
5628 && GET_CODE (XEXP (x, 1)) == CONST_INT
5629 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5631 changed = 1;
5632 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5633 GEN_INT (1 << log));
5636 if (GET_CODE (x) == PLUS)
5638 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5640 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5641 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5642 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5644 changed = 1;
5645 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5646 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5647 GEN_INT (1 << log));
5650 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5651 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5652 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5654 changed = 1;
5655 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5656 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5657 GEN_INT (1 << log));
5660 /* Put multiply first if it isn't already. */
5661 if (GET_CODE (XEXP (x, 1)) == MULT)
5663 rtx tmp = XEXP (x, 0);
5664 XEXP (x, 0) = XEXP (x, 1);
5665 XEXP (x, 1) = tmp;
5666 changed = 1;
5669 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5670 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5671 created by virtual register instantiation, register elimination, and
5672 similar optimizations. */
5673 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5675 changed = 1;
5676 x = gen_rtx_PLUS (Pmode,
5677 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5678 XEXP (XEXP (x, 1), 0)),
5679 XEXP (XEXP (x, 1), 1));
5682 /* Canonicalize
5683 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5684 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5685 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5686 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5687 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5688 && CONSTANT_P (XEXP (x, 1)))
5690 rtx constant;
5691 rtx other = NULL_RTX;
5693 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5695 constant = XEXP (x, 1);
5696 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5698 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5700 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5701 other = XEXP (x, 1);
5703 else
5704 constant = 0;
5706 if (constant)
5708 changed = 1;
5709 x = gen_rtx_PLUS (Pmode,
5710 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5711 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5712 plus_constant (other, INTVAL (constant)));
5716 if (changed && legitimate_address_p (mode, x, FALSE))
5717 return x;
5719 if (GET_CODE (XEXP (x, 0)) == MULT)
5721 changed = 1;
5722 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5725 if (GET_CODE (XEXP (x, 1)) == MULT)
5727 changed = 1;
5728 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5731 if (changed
5732 && GET_CODE (XEXP (x, 1)) == REG
5733 && GET_CODE (XEXP (x, 0)) == REG)
5734 return x;
5736 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5738 changed = 1;
5739 x = legitimize_pic_address (x, 0);
5742 if (changed && legitimate_address_p (mode, x, FALSE))
5743 return x;
5745 if (GET_CODE (XEXP (x, 0)) == REG)
5747 rtx temp = gen_reg_rtx (Pmode);
5748 rtx val = force_operand (XEXP (x, 1), temp);
5749 if (val != temp)
5750 emit_move_insn (temp, val);
5752 XEXP (x, 1) = temp;
5753 return x;
5756 else if (GET_CODE (XEXP (x, 1)) == REG)
5758 rtx temp = gen_reg_rtx (Pmode);
5759 rtx val = force_operand (XEXP (x, 0), temp);
5760 if (val != temp)
5761 emit_move_insn (temp, val);
5763 XEXP (x, 0) = temp;
5764 return x;
5768 return x;
5771 /* Print an integer constant expression in assembler syntax. Addition
5772 and subtraction are the only arithmetic that may appear in these
5773 expressions. FILE is the stdio stream to write to, X is the rtx, and
5774 CODE is the operand print code from the output string. */
5776 static void
5777 output_pic_addr_const (FILE *file, rtx x, int code)
5779 char buf[256];
5781 switch (GET_CODE (x))
5783 case PC:
5784 if (flag_pic)
5785 putc ('.', file);
5786 else
5787 abort ();
5788 break;
5790 case SYMBOL_REF:
5791 /* Mark the decl as referenced so that cgraph will output the function. */
5792 if (SYMBOL_REF_DECL (x))
5793 mark_decl_referenced (SYMBOL_REF_DECL (x));
5795 assemble_name (file, XSTR (x, 0));
5796 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5797 fputs ("@PLT", file);
5798 break;
5800 case LABEL_REF:
5801 x = XEXP (x, 0);
5802 /* FALLTHRU */
5803 case CODE_LABEL:
5804 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5805 assemble_name (asm_out_file, buf);
5806 break;
5808 case CONST_INT:
5809 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5810 break;
5812 case CONST:
5813 /* This used to output parentheses around the expression,
5814 but that does not work on the 386 (either ATT or BSD assembler). */
5815 output_pic_addr_const (file, XEXP (x, 0), code);
5816 break;
5818 case CONST_DOUBLE:
5819 if (GET_MODE (x) == VOIDmode)
5821 /* We can use %d if the number is <32 bits and positive. */
5822 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5823 fprintf (file, "0x%lx%08lx",
5824 (unsigned long) CONST_DOUBLE_HIGH (x),
5825 (unsigned long) CONST_DOUBLE_LOW (x));
5826 else
5827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5829 else
5830 /* We can't handle floating point constants;
5831 PRINT_OPERAND must handle them. */
5832 output_operand_lossage ("floating constant misused");
5833 break;
5835 case PLUS:
5836 /* Some assemblers need integer constants to appear first. */
5837 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5839 output_pic_addr_const (file, XEXP (x, 0), code);
5840 putc ('+', file);
5841 output_pic_addr_const (file, XEXP (x, 1), code);
5843 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5845 output_pic_addr_const (file, XEXP (x, 1), code);
5846 putc ('+', file);
5847 output_pic_addr_const (file, XEXP (x, 0), code);
5849 else
5850 abort ();
5851 break;
5853 case MINUS:
5854 if (!TARGET_MACHO)
5855 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5856 output_pic_addr_const (file, XEXP (x, 0), code);
5857 putc ('-', file);
5858 output_pic_addr_const (file, XEXP (x, 1), code);
5859 if (!TARGET_MACHO)
5860 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5861 break;
5863 case UNSPEC:
5864 if (XVECLEN (x, 0) != 1)
5865 abort ();
5866 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5867 switch (XINT (x, 1))
5869 case UNSPEC_GOT:
5870 fputs ("@GOT", file);
5871 break;
5872 case UNSPEC_GOTOFF:
5873 fputs ("@GOTOFF", file);
5874 break;
5875 case UNSPEC_GOTPCREL:
5876 fputs ("@GOTPCREL(%rip)", file);
5877 break;
5878 case UNSPEC_GOTTPOFF:
5879 /* FIXME: This might be @TPOFF in Sun ld too. */
5880 fputs ("@GOTTPOFF", file);
5881 break;
5882 case UNSPEC_TPOFF:
5883 fputs ("@TPOFF", file);
5884 break;
5885 case UNSPEC_NTPOFF:
5886 if (TARGET_64BIT)
5887 fputs ("@TPOFF", file);
5888 else
5889 fputs ("@NTPOFF", file);
5890 break;
5891 case UNSPEC_DTPOFF:
5892 fputs ("@DTPOFF", file);
5893 break;
5894 case UNSPEC_GOTNTPOFF:
5895 if (TARGET_64BIT)
5896 fputs ("@GOTTPOFF(%rip)", file);
5897 else
5898 fputs ("@GOTNTPOFF", file);
5899 break;
5900 case UNSPEC_INDNTPOFF:
5901 fputs ("@INDNTPOFF", file);
5902 break;
5903 default:
5904 output_operand_lossage ("invalid UNSPEC as operand");
5905 break;
5907 break;
5909 default:
5910 output_operand_lossage ("invalid expression as operand");
5914 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5915 We need to handle our special PIC relocations. */
5917 void
5918 i386_dwarf_output_addr_const (FILE *file, rtx x)
5920 #ifdef ASM_QUAD
5921 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5922 #else
5923 if (TARGET_64BIT)
5924 abort ();
5925 fprintf (file, "%s", ASM_LONG);
5926 #endif
5927 if (flag_pic)
5928 output_pic_addr_const (file, x, '\0');
5929 else
5930 output_addr_const (file, x);
5931 fputc ('\n', file);
5934 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5935 We need to emit DTP-relative relocations. */
5937 void
5938 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5940 fputs (ASM_LONG, file);
5941 output_addr_const (file, x);
5942 fputs ("@DTPOFF", file);
5943 switch (size)
5945 case 4:
5946 break;
5947 case 8:
5948 fputs (", 0", file);
5949 break;
5950 default:
5951 abort ();
5955 /* In the name of slightly smaller debug output, and to cater to
5956 general assembler losage, recognize PIC+GOTOFF and turn it back
5957 into a direct symbol reference. */
5959 static rtx
5960 ix86_delegitimize_address (rtx orig_x)
5962 rtx x = orig_x, y;
5964 if (GET_CODE (x) == MEM)
5965 x = XEXP (x, 0);
5967 if (TARGET_64BIT)
5969 if (GET_CODE (x) != CONST
5970 || GET_CODE (XEXP (x, 0)) != UNSPEC
5971 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5972 || GET_CODE (orig_x) != MEM)
5973 return orig_x;
5974 return XVECEXP (XEXP (x, 0), 0, 0);
5977 if (GET_CODE (x) != PLUS
5978 || GET_CODE (XEXP (x, 1)) != CONST)
5979 return orig_x;
5981 if (GET_CODE (XEXP (x, 0)) == REG
5982 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5983 /* %ebx + GOT/GOTOFF */
5984 y = NULL;
5985 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5987 /* %ebx + %reg * scale + GOT/GOTOFF */
5988 y = XEXP (x, 0);
5989 if (GET_CODE (XEXP (y, 0)) == REG
5990 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5991 y = XEXP (y, 1);
5992 else if (GET_CODE (XEXP (y, 1)) == REG
5993 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5994 y = XEXP (y, 0);
5995 else
5996 return orig_x;
5997 if (GET_CODE (y) != REG
5998 && GET_CODE (y) != MULT
5999 && GET_CODE (y) != ASHIFT)
6000 return orig_x;
6002 else
6003 return orig_x;
6005 x = XEXP (XEXP (x, 1), 0);
6006 if (GET_CODE (x) == UNSPEC
6007 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6008 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6010 if (y)
6011 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6012 return XVECEXP (x, 0, 0);
6015 if (GET_CODE (x) == PLUS
6016 && GET_CODE (XEXP (x, 0)) == UNSPEC
6017 && GET_CODE (XEXP (x, 1)) == CONST_INT
6018 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6019 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6020 && GET_CODE (orig_x) != MEM)))
6022 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6023 if (y)
6024 return gen_rtx_PLUS (Pmode, y, x);
6025 return x;
6028 return orig_x;
6031 static void
6032 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6033 int fp, FILE *file)
6035 const char *suffix;
6037 if (mode == CCFPmode || mode == CCFPUmode)
6039 enum rtx_code second_code, bypass_code;
6040 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6041 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6042 abort ();
6043 code = ix86_fp_compare_code_to_integer (code);
6044 mode = CCmode;
6046 if (reverse)
6047 code = reverse_condition (code);
6049 switch (code)
6051 case EQ:
6052 suffix = "e";
6053 break;
6054 case NE:
6055 suffix = "ne";
6056 break;
6057 case GT:
6058 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6059 abort ();
6060 suffix = "g";
6061 break;
6062 case GTU:
6063 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6064 Those same assemblers have the same but opposite losage on cmov. */
6065 if (mode != CCmode)
6066 abort ();
6067 suffix = fp ? "nbe" : "a";
6068 break;
6069 case LT:
6070 if (mode == CCNOmode || mode == CCGOCmode)
6071 suffix = "s";
6072 else if (mode == CCmode || mode == CCGCmode)
6073 suffix = "l";
6074 else
6075 abort ();
6076 break;
6077 case LTU:
6078 if (mode != CCmode)
6079 abort ();
6080 suffix = "b";
6081 break;
6082 case GE:
6083 if (mode == CCNOmode || mode == CCGOCmode)
6084 suffix = "ns";
6085 else if (mode == CCmode || mode == CCGCmode)
6086 suffix = "ge";
6087 else
6088 abort ();
6089 break;
6090 case GEU:
6091 /* ??? As above. */
6092 if (mode != CCmode)
6093 abort ();
6094 suffix = fp ? "nb" : "ae";
6095 break;
6096 case LE:
6097 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6098 abort ();
6099 suffix = "le";
6100 break;
6101 case LEU:
6102 if (mode != CCmode)
6103 abort ();
6104 suffix = "be";
6105 break;
6106 case UNORDERED:
6107 suffix = fp ? "u" : "p";
6108 break;
6109 case ORDERED:
6110 suffix = fp ? "nu" : "np";
6111 break;
6112 default:
6113 abort ();
6115 fputs (suffix, file);
6118 /* Print the name of register X to FILE based on its machine mode and number.
6119 If CODE is 'w', pretend the mode is HImode.
6120 If CODE is 'b', pretend the mode is QImode.
6121 If CODE is 'k', pretend the mode is SImode.
6122 If CODE is 'q', pretend the mode is DImode.
6123 If CODE is 'h', pretend the reg is the `high' byte register.
6124 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6126 void
6127 print_reg (rtx x, int code, FILE *file)
6129 if (REGNO (x) == ARG_POINTER_REGNUM
6130 || REGNO (x) == FRAME_POINTER_REGNUM
6131 || REGNO (x) == FLAGS_REG
6132 || REGNO (x) == FPSR_REG)
6133 abort ();
6135 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6136 putc ('%', file);
6138 if (code == 'w' || MMX_REG_P (x))
6139 code = 2;
6140 else if (code == 'b')
6141 code = 1;
6142 else if (code == 'k')
6143 code = 4;
6144 else if (code == 'q')
6145 code = 8;
6146 else if (code == 'y')
6147 code = 3;
6148 else if (code == 'h')
6149 code = 0;
6150 else
6151 code = GET_MODE_SIZE (GET_MODE (x));
6153 /* Irritatingly, AMD extended registers use different naming convention
6154 from the normal registers. */
6155 if (REX_INT_REG_P (x))
6157 if (!TARGET_64BIT)
6158 abort ();
6159 switch (code)
6161 case 0:
6162 error ("extended registers have no high halves");
6163 break;
6164 case 1:
6165 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6166 break;
6167 case 2:
6168 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6169 break;
6170 case 4:
6171 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6172 break;
6173 case 8:
6174 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6175 break;
6176 default:
6177 error ("unsupported operand size for extended register");
6178 break;
6180 return;
6182 switch (code)
6184 case 3:
6185 if (STACK_TOP_P (x))
6187 fputs ("st(0)", file);
6188 break;
6190 /* FALLTHRU */
6191 case 8:
6192 case 4:
6193 case 12:
6194 if (! ANY_FP_REG_P (x))
6195 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6196 /* FALLTHRU */
6197 case 16:
6198 case 2:
6199 normal:
6200 fputs (hi_reg_name[REGNO (x)], file);
6201 break;
6202 case 1:
6203 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6204 goto normal;
6205 fputs (qi_reg_name[REGNO (x)], file);
6206 break;
6207 case 0:
6208 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6209 goto normal;
6210 fputs (qi_high_reg_name[REGNO (x)], file);
6211 break;
6212 default:
6213 abort ();
6217 /* Locate some local-dynamic symbol still in use by this function
6218 so that we can print its name in some tls_local_dynamic_base
6219 pattern. */
6221 static const char *
6222 get_some_local_dynamic_name (void)
6224 rtx insn;
6226 if (cfun->machine->some_ld_name)
6227 return cfun->machine->some_ld_name;
6229 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6230 if (INSN_P (insn)
6231 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6232 return cfun->machine->some_ld_name;
6234 abort ();
6237 static int
6238 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6240 rtx x = *px;
6242 if (GET_CODE (x) == SYMBOL_REF
6243 && local_dynamic_symbolic_operand (x, Pmode))
6245 cfun->machine->some_ld_name = XSTR (x, 0);
6246 return 1;
6249 return 0;
6252 /* Meaning of CODE:
6253 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6254 C -- print opcode suffix for set/cmov insn.
6255 c -- like C, but print reversed condition
6256 F,f -- likewise, but for floating-point.
6257 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6258 otherwise nothing
6259 R -- print the prefix for register names.
6260 z -- print the opcode suffix for the size of the current operand.
6261 * -- print a star (in certain assembler syntax)
6262 A -- print an absolute memory reference.
6263 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6264 s -- print a shift double count, followed by the assemblers argument
6265 delimiter.
6266 b -- print the QImode name of the register for the indicated operand.
6267 %b0 would print %al if operands[0] is reg 0.
6268 w -- likewise, print the HImode name of the register.
6269 k -- likewise, print the SImode name of the register.
6270 q -- likewise, print the DImode name of the register.
6271 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6272 y -- print "st(0)" instead of "st" as a register.
6273 D -- print condition for SSE cmp instruction.
6274 P -- if PIC, print an @PLT suffix.
6275 X -- don't print any sort of PIC '@' suffix for a symbol.
6276 & -- print some in-use local-dynamic symbol name.
6279 void
6280 print_operand (FILE *file, rtx x, int code)
6282 if (code)
6284 switch (code)
6286 case '*':
6287 if (ASSEMBLER_DIALECT == ASM_ATT)
6288 putc ('*', file);
6289 return;
6291 case '&':
6292 assemble_name (file, get_some_local_dynamic_name ());
6293 return;
6295 case 'A':
6296 if (ASSEMBLER_DIALECT == ASM_ATT)
6297 putc ('*', file);
6298 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6300 /* Intel syntax. For absolute addresses, registers should not
6301 be surrounded by braces. */
6302 if (GET_CODE (x) != REG)
6304 putc ('[', file);
6305 PRINT_OPERAND (file, x, 0);
6306 putc (']', file);
6307 return;
6310 else
6311 abort ();
6313 PRINT_OPERAND (file, x, 0);
6314 return;
6317 case 'L':
6318 if (ASSEMBLER_DIALECT == ASM_ATT)
6319 putc ('l', file);
6320 return;
6322 case 'W':
6323 if (ASSEMBLER_DIALECT == ASM_ATT)
6324 putc ('w', file);
6325 return;
6327 case 'B':
6328 if (ASSEMBLER_DIALECT == ASM_ATT)
6329 putc ('b', file);
6330 return;
6332 case 'Q':
6333 if (ASSEMBLER_DIALECT == ASM_ATT)
6334 putc ('l', file);
6335 return;
6337 case 'S':
6338 if (ASSEMBLER_DIALECT == ASM_ATT)
6339 putc ('s', file);
6340 return;
6342 case 'T':
6343 if (ASSEMBLER_DIALECT == ASM_ATT)
6344 putc ('t', file);
6345 return;
6347 case 'z':
6348 /* 387 opcodes don't get size suffixes if the operands are
6349 registers. */
6350 if (STACK_REG_P (x))
6351 return;
6353 /* Likewise if using Intel opcodes. */
6354 if (ASSEMBLER_DIALECT == ASM_INTEL)
6355 return;
6357 /* This is the size of op from size of operand. */
6358 switch (GET_MODE_SIZE (GET_MODE (x)))
6360 case 2:
6361 #ifdef HAVE_GAS_FILDS_FISTS
6362 putc ('s', file);
6363 #endif
6364 return;
6366 case 4:
6367 if (GET_MODE (x) == SFmode)
6369 putc ('s', file);
6370 return;
6372 else
6373 putc ('l', file);
6374 return;
6376 case 12:
6377 case 16:
6378 putc ('t', file);
6379 return;
6381 case 8:
6382 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6384 #ifdef GAS_MNEMONICS
6385 putc ('q', file);
6386 #else
6387 putc ('l', file);
6388 putc ('l', file);
6389 #endif
6391 else
6392 putc ('l', file);
6393 return;
6395 default:
6396 abort ();
6399 case 'b':
6400 case 'w':
6401 case 'k':
6402 case 'q':
6403 case 'h':
6404 case 'y':
6405 case 'X':
6406 case 'P':
6407 break;
6409 case 's':
6410 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6412 PRINT_OPERAND (file, x, 0);
6413 putc (',', file);
6415 return;
6417 case 'D':
6418 /* Little bit of braindamage here. The SSE compare instructions
6419 does use completely different names for the comparisons that the
6420 fp conditional moves. */
6421 switch (GET_CODE (x))
6423 case EQ:
6424 case UNEQ:
6425 fputs ("eq", file);
6426 break;
6427 case LT:
6428 case UNLT:
6429 fputs ("lt", file);
6430 break;
6431 case LE:
6432 case UNLE:
6433 fputs ("le", file);
6434 break;
6435 case UNORDERED:
6436 fputs ("unord", file);
6437 break;
6438 case NE:
6439 case LTGT:
6440 fputs ("neq", file);
6441 break;
6442 case UNGE:
6443 case GE:
6444 fputs ("nlt", file);
6445 break;
6446 case UNGT:
6447 case GT:
6448 fputs ("nle", file);
6449 break;
6450 case ORDERED:
6451 fputs ("ord", file);
6452 break;
6453 default:
6454 abort ();
6455 break;
6457 return;
6458 case 'O':
6459 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6460 if (ASSEMBLER_DIALECT == ASM_ATT)
6462 switch (GET_MODE (x))
6464 case HImode: putc ('w', file); break;
6465 case SImode:
6466 case SFmode: putc ('l', file); break;
6467 case DImode:
6468 case DFmode: putc ('q', file); break;
6469 default: abort ();
6471 putc ('.', file);
6473 #endif
6474 return;
6475 case 'C':
6476 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6477 return;
6478 case 'F':
6479 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6480 if (ASSEMBLER_DIALECT == ASM_ATT)
6481 putc ('.', file);
6482 #endif
6483 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6484 return;
6486 /* Like above, but reverse condition */
6487 case 'c':
6488 /* Check to see if argument to %c is really a constant
6489 and not a condition code which needs to be reversed. */
6490 if (!COMPARISON_P (x))
6492 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6493 return;
6495 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6496 return;
6497 case 'f':
6498 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6499 if (ASSEMBLER_DIALECT == ASM_ATT)
6500 putc ('.', file);
6501 #endif
6502 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6503 return;
6504 case '+':
6506 rtx x;
6508 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6509 return;
6511 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6512 if (x)
6514 int pred_val = INTVAL (XEXP (x, 0));
6516 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6517 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6519 int taken = pred_val > REG_BR_PROB_BASE / 2;
6520 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6522 /* Emit hints only in the case default branch prediction
6523 heuristics would fail. */
6524 if (taken != cputaken)
6526 /* We use 3e (DS) prefix for taken branches and
6527 2e (CS) prefix for not taken branches. */
6528 if (taken)
6529 fputs ("ds ; ", file);
6530 else
6531 fputs ("cs ; ", file);
6535 return;
6537 default:
6538 output_operand_lossage ("invalid operand code `%c'", code);
6542 if (GET_CODE (x) == REG)
6543 print_reg (x, code, file);
6545 else if (GET_CODE (x) == MEM)
6547 /* No `byte ptr' prefix for call instructions. */
6548 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6550 const char * size;
6551 switch (GET_MODE_SIZE (GET_MODE (x)))
6553 case 1: size = "BYTE"; break;
6554 case 2: size = "WORD"; break;
6555 case 4: size = "DWORD"; break;
6556 case 8: size = "QWORD"; break;
6557 case 12: size = "XWORD"; break;
6558 case 16: size = "XMMWORD"; break;
6559 default:
6560 abort ();
6563 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6564 if (code == 'b')
6565 size = "BYTE";
6566 else if (code == 'w')
6567 size = "WORD";
6568 else if (code == 'k')
6569 size = "DWORD";
6571 fputs (size, file);
6572 fputs (" PTR ", file);
6575 x = XEXP (x, 0);
6576 /* Avoid (%rip) for call operands. */
6577 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6578 && GET_CODE (x) != CONST_INT)
6579 output_addr_const (file, x);
6580 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6581 output_operand_lossage ("invalid constraints for operand");
6582 else
6583 output_address (x);
6586 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6588 REAL_VALUE_TYPE r;
6589 long l;
6591 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6592 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6594 if (ASSEMBLER_DIALECT == ASM_ATT)
6595 putc ('$', file);
6596 fprintf (file, "0x%08lx", l);
6599 /* These float cases don't actually occur as immediate operands. */
6600 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6602 char dstr[30];
6604 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6605 fprintf (file, "%s", dstr);
6608 else if (GET_CODE (x) == CONST_DOUBLE
6609 && GET_MODE (x) == XFmode)
6611 char dstr[30];
6613 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6614 fprintf (file, "%s", dstr);
6617 else
6619 if (code != 'P')
6621 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6623 if (ASSEMBLER_DIALECT == ASM_ATT)
6624 putc ('$', file);
6626 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6627 || GET_CODE (x) == LABEL_REF)
6629 if (ASSEMBLER_DIALECT == ASM_ATT)
6630 putc ('$', file);
6631 else
6632 fputs ("OFFSET FLAT:", file);
6635 if (GET_CODE (x) == CONST_INT)
6636 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6637 else if (flag_pic)
6638 output_pic_addr_const (file, x, code);
6639 else
6640 output_addr_const (file, x);
6644 /* Print a memory operand whose address is ADDR. */
6646 void
6647 print_operand_address (FILE *file, rtx addr)
6649 struct ix86_address parts;
6650 rtx base, index, disp;
6651 int scale;
6653 if (! ix86_decompose_address (addr, &parts))
6654 abort ();
6656 base = parts.base;
6657 index = parts.index;
6658 disp = parts.disp;
6659 scale = parts.scale;
6661 switch (parts.seg)
6663 case SEG_DEFAULT:
6664 break;
6665 case SEG_FS:
6666 case SEG_GS:
6667 if (USER_LABEL_PREFIX[0] == 0)
6668 putc ('%', file);
6669 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6670 break;
6671 default:
6672 abort ();
6675 if (!base && !index)
6677 /* Displacement only requires special attention. */
6679 if (GET_CODE (disp) == CONST_INT)
6681 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6683 if (USER_LABEL_PREFIX[0] == 0)
6684 putc ('%', file);
6685 fputs ("ds:", file);
6687 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6689 else if (flag_pic)
6690 output_pic_addr_const (file, disp, 0);
6691 else
6692 output_addr_const (file, disp);
6694 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6695 if (TARGET_64BIT
6696 && ((GET_CODE (disp) == SYMBOL_REF
6697 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6698 || GET_CODE (disp) == LABEL_REF
6699 || (GET_CODE (disp) == CONST
6700 && GET_CODE (XEXP (disp, 0)) == PLUS
6701 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6702 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6703 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6704 fputs ("(%rip)", file);
6706 else
6708 if (ASSEMBLER_DIALECT == ASM_ATT)
6710 if (disp)
6712 if (flag_pic)
6713 output_pic_addr_const (file, disp, 0);
6714 else if (GET_CODE (disp) == LABEL_REF)
6715 output_asm_label (disp);
6716 else
6717 output_addr_const (file, disp);
6720 putc ('(', file);
6721 if (base)
6722 print_reg (base, 0, file);
6723 if (index)
6725 putc (',', file);
6726 print_reg (index, 0, file);
6727 if (scale != 1)
6728 fprintf (file, ",%d", scale);
6730 putc (')', file);
6732 else
6734 rtx offset = NULL_RTX;
6736 if (disp)
6738 /* Pull out the offset of a symbol; print any symbol itself. */
6739 if (GET_CODE (disp) == CONST
6740 && GET_CODE (XEXP (disp, 0)) == PLUS
6741 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6743 offset = XEXP (XEXP (disp, 0), 1);
6744 disp = gen_rtx_CONST (VOIDmode,
6745 XEXP (XEXP (disp, 0), 0));
6748 if (flag_pic)
6749 output_pic_addr_const (file, disp, 0);
6750 else if (GET_CODE (disp) == LABEL_REF)
6751 output_asm_label (disp);
6752 else if (GET_CODE (disp) == CONST_INT)
6753 offset = disp;
6754 else
6755 output_addr_const (file, disp);
6758 putc ('[', file);
6759 if (base)
6761 print_reg (base, 0, file);
6762 if (offset)
6764 if (INTVAL (offset) >= 0)
6765 putc ('+', file);
6766 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6769 else if (offset)
6770 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6771 else
6772 putc ('0', file);
6774 if (index)
6776 putc ('+', file);
6777 print_reg (index, 0, file);
6778 if (scale != 1)
6779 fprintf (file, "*%d", scale);
6781 putc (']', file);
6786 bool
6787 output_addr_const_extra (FILE *file, rtx x)
6789 rtx op;
6791 if (GET_CODE (x) != UNSPEC)
6792 return false;
6794 op = XVECEXP (x, 0, 0);
6795 switch (XINT (x, 1))
6797 case UNSPEC_GOTTPOFF:
6798 output_addr_const (file, op);
6799 /* FIXME: This might be @TPOFF in Sun ld. */
6800 fputs ("@GOTTPOFF", file);
6801 break;
6802 case UNSPEC_TPOFF:
6803 output_addr_const (file, op);
6804 fputs ("@TPOFF", file);
6805 break;
6806 case UNSPEC_NTPOFF:
6807 output_addr_const (file, op);
6808 if (TARGET_64BIT)
6809 fputs ("@TPOFF", file);
6810 else
6811 fputs ("@NTPOFF", file);
6812 break;
6813 case UNSPEC_DTPOFF:
6814 output_addr_const (file, op);
6815 fputs ("@DTPOFF", file);
6816 break;
6817 case UNSPEC_GOTNTPOFF:
6818 output_addr_const (file, op);
6819 if (TARGET_64BIT)
6820 fputs ("@GOTTPOFF(%rip)", file);
6821 else
6822 fputs ("@GOTNTPOFF", file);
6823 break;
6824 case UNSPEC_INDNTPOFF:
6825 output_addr_const (file, op);
6826 fputs ("@INDNTPOFF", file);
6827 break;
6829 default:
6830 return false;
6833 return true;
6836 /* Split one or more DImode RTL references into pairs of SImode
6837 references. The RTL can be REG, offsettable MEM, integer constant, or
6838 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6839 split and "num" is its length. lo_half and hi_half are output arrays
6840 that parallel "operands". */
6842 void
6843 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6845 while (num--)
6847 rtx op = operands[num];
6849 /* simplify_subreg refuse to split volatile memory addresses,
6850 but we still have to handle it. */
6851 if (GET_CODE (op) == MEM)
6853 lo_half[num] = adjust_address (op, SImode, 0);
6854 hi_half[num] = adjust_address (op, SImode, 4);
6856 else
6858 lo_half[num] = simplify_gen_subreg (SImode, op,
6859 GET_MODE (op) == VOIDmode
6860 ? DImode : GET_MODE (op), 0);
6861 hi_half[num] = simplify_gen_subreg (SImode, op,
6862 GET_MODE (op) == VOIDmode
6863 ? DImode : GET_MODE (op), 4);
6867 /* Split one or more TImode RTL references into pairs of SImode
6868 references. The RTL can be REG, offsettable MEM, integer constant, or
6869 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6870 split and "num" is its length. lo_half and hi_half are output arrays
6871 that parallel "operands". */
6873 void
6874 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6876 while (num--)
6878 rtx op = operands[num];
6880 /* simplify_subreg refuse to split volatile memory addresses, but we
6881 still have to handle it. */
6882 if (GET_CODE (op) == MEM)
6884 lo_half[num] = adjust_address (op, DImode, 0);
6885 hi_half[num] = adjust_address (op, DImode, 8);
6887 else
6889 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6890 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6895 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6896 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6897 is the expression of the binary operation. The output may either be
6898 emitted here, or returned to the caller, like all output_* functions.
6900 There is no guarantee that the operands are the same mode, as they
6901 might be within FLOAT or FLOAT_EXTEND expressions. */
6903 #ifndef SYSV386_COMPAT
6904 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6905 wants to fix the assemblers because that causes incompatibility
6906 with gcc. No-one wants to fix gcc because that causes
6907 incompatibility with assemblers... You can use the option of
6908 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6909 #define SYSV386_COMPAT 1
6910 #endif
6912 const char *
6913 output_387_binary_op (rtx insn, rtx *operands)
6915 static char buf[30];
6916 const char *p;
6917 const char *ssep;
6918 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6920 #ifdef ENABLE_CHECKING
6921 /* Even if we do not want to check the inputs, this documents input
6922 constraints. Which helps in understanding the following code. */
6923 if (STACK_REG_P (operands[0])
6924 && ((REG_P (operands[1])
6925 && REGNO (operands[0]) == REGNO (operands[1])
6926 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6927 || (REG_P (operands[2])
6928 && REGNO (operands[0]) == REGNO (operands[2])
6929 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6930 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6931 ; /* ok */
6932 else if (!is_sse)
6933 abort ();
6934 #endif
6936 switch (GET_CODE (operands[3]))
6938 case PLUS:
6939 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6940 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6941 p = "fiadd";
6942 else
6943 p = "fadd";
6944 ssep = "add";
6945 break;
6947 case MINUS:
6948 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6949 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6950 p = "fisub";
6951 else
6952 p = "fsub";
6953 ssep = "sub";
6954 break;
6956 case MULT:
6957 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6958 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6959 p = "fimul";
6960 else
6961 p = "fmul";
6962 ssep = "mul";
6963 break;
6965 case DIV:
6966 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6967 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6968 p = "fidiv";
6969 else
6970 p = "fdiv";
6971 ssep = "div";
6972 break;
6974 default:
6975 abort ();
6978 if (is_sse)
6980 strcpy (buf, ssep);
6981 if (GET_MODE (operands[0]) == SFmode)
6982 strcat (buf, "ss\t{%2, %0|%0, %2}");
6983 else
6984 strcat (buf, "sd\t{%2, %0|%0, %2}");
6985 return buf;
6987 strcpy (buf, p);
6989 switch (GET_CODE (operands[3]))
6991 case MULT:
6992 case PLUS:
6993 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6995 rtx temp = operands[2];
6996 operands[2] = operands[1];
6997 operands[1] = temp;
7000 /* know operands[0] == operands[1]. */
7002 if (GET_CODE (operands[2]) == MEM)
7004 p = "%z2\t%2";
7005 break;
7008 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7010 if (STACK_TOP_P (operands[0]))
7011 /* How is it that we are storing to a dead operand[2]?
7012 Well, presumably operands[1] is dead too. We can't
7013 store the result to st(0) as st(0) gets popped on this
7014 instruction. Instead store to operands[2] (which I
7015 think has to be st(1)). st(1) will be popped later.
7016 gcc <= 2.8.1 didn't have this check and generated
7017 assembly code that the Unixware assembler rejected. */
7018 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7019 else
7020 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7021 break;
7024 if (STACK_TOP_P (operands[0]))
7025 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7026 else
7027 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7028 break;
7030 case MINUS:
7031 case DIV:
7032 if (GET_CODE (operands[1]) == MEM)
7034 p = "r%z1\t%1";
7035 break;
7038 if (GET_CODE (operands[2]) == MEM)
7040 p = "%z2\t%2";
7041 break;
7044 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7046 #if SYSV386_COMPAT
7047 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7048 derived assemblers, confusingly reverse the direction of
7049 the operation for fsub{r} and fdiv{r} when the
7050 destination register is not st(0). The Intel assembler
7051 doesn't have this brain damage. Read !SYSV386_COMPAT to
7052 figure out what the hardware really does. */
7053 if (STACK_TOP_P (operands[0]))
7054 p = "{p\t%0, %2|rp\t%2, %0}";
7055 else
7056 p = "{rp\t%2, %0|p\t%0, %2}";
7057 #else
7058 if (STACK_TOP_P (operands[0]))
7059 /* As above for fmul/fadd, we can't store to st(0). */
7060 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7061 else
7062 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7063 #endif
7064 break;
7067 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7069 #if SYSV386_COMPAT
7070 if (STACK_TOP_P (operands[0]))
7071 p = "{rp\t%0, %1|p\t%1, %0}";
7072 else
7073 p = "{p\t%1, %0|rp\t%0, %1}";
7074 #else
7075 if (STACK_TOP_P (operands[0]))
7076 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7077 else
7078 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7079 #endif
7080 break;
7083 if (STACK_TOP_P (operands[0]))
7085 if (STACK_TOP_P (operands[1]))
7086 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7087 else
7088 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7089 break;
7091 else if (STACK_TOP_P (operands[1]))
7093 #if SYSV386_COMPAT
7094 p = "{\t%1, %0|r\t%0, %1}";
7095 #else
7096 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7097 #endif
7099 else
7101 #if SYSV386_COMPAT
7102 p = "{r\t%2, %0|\t%0, %2}";
7103 #else
7104 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7105 #endif
7107 break;
7109 default:
7110 abort ();
7113 strcat (buf, p);
7114 return buf;
7117 /* Output code to initialize control word copies used by
7118 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7119 is set to control word rounding downwards. */
7120 void
7121 emit_i387_cw_initialization (rtx normal, rtx round_down)
7123 rtx reg = gen_reg_rtx (HImode);
7125 emit_insn (gen_x86_fnstcw_1 (normal));
7126 emit_move_insn (reg, normal);
7127 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7128 && !TARGET_64BIT)
7129 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7130 else
7131 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7132 emit_move_insn (round_down, reg);
7135 /* Output code for INSN to convert a float to a signed int. OPERANDS
7136 are the insn operands. The output may be [HSD]Imode and the input
7137 operand may be [SDX]Fmode. */
7139 const char *
7140 output_fix_trunc (rtx insn, rtx *operands)
7142 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7143 int dimode_p = GET_MODE (operands[0]) == DImode;
7145 /* Jump through a hoop or two for DImode, since the hardware has no
7146 non-popping instruction. We used to do this a different way, but
7147 that was somewhat fragile and broke with post-reload splitters. */
7148 if (dimode_p && !stack_top_dies)
7149 output_asm_insn ("fld\t%y1", operands);
7151 if (!STACK_TOP_P (operands[1]))
7152 abort ();
7154 if (GET_CODE (operands[0]) != MEM)
7155 abort ();
7157 output_asm_insn ("fldcw\t%3", operands);
7158 if (stack_top_dies || dimode_p)
7159 output_asm_insn ("fistp%z0\t%0", operands);
7160 else
7161 output_asm_insn ("fist%z0\t%0", operands);
7162 output_asm_insn ("fldcw\t%2", operands);
7164 return "";
7167 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7168 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7169 when fucom should be used. */
7171 const char *
7172 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7174 int stack_top_dies;
7175 rtx cmp_op0 = operands[0];
7176 rtx cmp_op1 = operands[1];
7177 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7179 if (eflags_p == 2)
7181 cmp_op0 = cmp_op1;
7182 cmp_op1 = operands[2];
7184 if (is_sse)
7186 if (GET_MODE (operands[0]) == SFmode)
7187 if (unordered_p)
7188 return "ucomiss\t{%1, %0|%0, %1}";
7189 else
7190 return "comiss\t{%1, %0|%0, %1}";
7191 else
7192 if (unordered_p)
7193 return "ucomisd\t{%1, %0|%0, %1}";
7194 else
7195 return "comisd\t{%1, %0|%0, %1}";
7198 if (! STACK_TOP_P (cmp_op0))
7199 abort ();
7201 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7203 if (STACK_REG_P (cmp_op1)
7204 && stack_top_dies
7205 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7206 && REGNO (cmp_op1) != FIRST_STACK_REG)
7208 /* If both the top of the 387 stack dies, and the other operand
7209 is also a stack register that dies, then this must be a
7210 `fcompp' float compare */
7212 if (eflags_p == 1)
7214 /* There is no double popping fcomi variant. Fortunately,
7215 eflags is immune from the fstp's cc clobbering. */
7216 if (unordered_p)
7217 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7218 else
7219 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7220 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7222 else
7224 if (eflags_p == 2)
7226 if (unordered_p)
7227 return "fucompp\n\tfnstsw\t%0";
7228 else
7229 return "fcompp\n\tfnstsw\t%0";
7231 else
7233 if (unordered_p)
7234 return "fucompp";
7235 else
7236 return "fcompp";
7240 else
7242 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7244 static const char * const alt[24] =
7246 "fcom%z1\t%y1",
7247 "fcomp%z1\t%y1",
7248 "fucom%z1\t%y1",
7249 "fucomp%z1\t%y1",
7251 "ficom%z1\t%y1",
7252 "ficomp%z1\t%y1",
7253 NULL,
7254 NULL,
7256 "fcomi\t{%y1, %0|%0, %y1}",
7257 "fcomip\t{%y1, %0|%0, %y1}",
7258 "fucomi\t{%y1, %0|%0, %y1}",
7259 "fucomip\t{%y1, %0|%0, %y1}",
7261 NULL,
7262 NULL,
7263 NULL,
7264 NULL,
7266 "fcom%z2\t%y2\n\tfnstsw\t%0",
7267 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7268 "fucom%z2\t%y2\n\tfnstsw\t%0",
7269 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7271 "ficom%z2\t%y2\n\tfnstsw\t%0",
7272 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7273 NULL,
7274 NULL
7277 int mask;
7278 const char *ret;
7280 mask = eflags_p << 3;
7281 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7282 mask |= unordered_p << 1;
7283 mask |= stack_top_dies;
7285 if (mask >= 24)
7286 abort ();
7287 ret = alt[mask];
7288 if (ret == NULL)
7289 abort ();
7291 return ret;
7295 void
7296 ix86_output_addr_vec_elt (FILE *file, int value)
7298 const char *directive = ASM_LONG;
7300 if (TARGET_64BIT)
7302 #ifdef ASM_QUAD
7303 directive = ASM_QUAD;
7304 #else
7305 abort ();
7306 #endif
7309 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7312 void
7313 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7315 if (TARGET_64BIT)
7316 fprintf (file, "%s%s%d-%s%d\n",
7317 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7318 else if (HAVE_AS_GOTOFF_IN_DATA)
7319 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7320 #if TARGET_MACHO
7321 else if (TARGET_MACHO)
7323 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7324 machopic_output_function_base_name (file);
7325 fprintf(file, "\n");
7327 #endif
7328 else
7329 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7330 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7333 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7334 for the target. */
7336 void
7337 ix86_expand_clear (rtx dest)
7339 rtx tmp;
7341 /* We play register width games, which are only valid after reload. */
7342 if (!reload_completed)
7343 abort ();
7345 /* Avoid HImode and its attendant prefix byte. */
7346 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7347 dest = gen_rtx_REG (SImode, REGNO (dest));
7349 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7351 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7352 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7354 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7355 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7358 emit_insn (tmp);
7361 /* X is an unchanging MEM. If it is a constant pool reference, return
7362 the constant pool rtx, else NULL. */
7365 maybe_get_pool_constant (rtx x)
7367 x = ix86_delegitimize_address (XEXP (x, 0));
7369 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7370 return get_pool_constant (x);
7372 return NULL_RTX;
7375 void
7376 ix86_expand_move (enum machine_mode mode, rtx operands[])
7378 int strict = (reload_in_progress || reload_completed);
7379 rtx op0, op1;
7380 enum tls_model model;
7382 op0 = operands[0];
7383 op1 = operands[1];
7385 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7386 if (model)
7388 op1 = legitimize_tls_address (op1, model, true);
7389 op1 = force_operand (op1, op0);
7390 if (op1 == op0)
7391 return;
7394 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7396 #if TARGET_MACHO
7397 if (MACHOPIC_PURE)
7399 rtx temp = ((reload_in_progress
7400 || ((op0 && GET_CODE (op0) == REG)
7401 && mode == Pmode))
7402 ? op0 : gen_reg_rtx (Pmode));
7403 op1 = machopic_indirect_data_reference (op1, temp);
7404 op1 = machopic_legitimize_pic_address (op1, mode,
7405 temp == op1 ? 0 : temp);
7407 else if (MACHOPIC_INDIRECT)
7408 op1 = machopic_indirect_data_reference (op1, 0);
7409 if (op0 == op1)
7410 return;
7411 #else
7412 if (GET_CODE (op0) == MEM)
7413 op1 = force_reg (Pmode, op1);
7414 else
7415 op1 = legitimize_address (op1, op1, Pmode);
7416 #endif /* TARGET_MACHO */
7418 else
7420 if (GET_CODE (op0) == MEM
7421 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7422 || !push_operand (op0, mode))
7423 && GET_CODE (op1) == MEM)
7424 op1 = force_reg (mode, op1);
7426 if (push_operand (op0, mode)
7427 && ! general_no_elim_operand (op1, mode))
7428 op1 = copy_to_mode_reg (mode, op1);
7430 /* Force large constants in 64bit compilation into register
7431 to get them CSEed. */
7432 if (TARGET_64BIT && mode == DImode
7433 && immediate_operand (op1, mode)
7434 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7435 && !register_operand (op0, mode)
7436 && optimize && !reload_completed && !reload_in_progress)
7437 op1 = copy_to_mode_reg (mode, op1);
7439 if (FLOAT_MODE_P (mode))
7441 /* If we are loading a floating point constant to a register,
7442 force the value to memory now, since we'll get better code
7443 out the back end. */
7445 if (strict)
7447 else if (GET_CODE (op1) == CONST_DOUBLE)
7449 op1 = validize_mem (force_const_mem (mode, op1));
7450 if (!register_operand (op0, mode))
7452 rtx temp = gen_reg_rtx (mode);
7453 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7454 emit_move_insn (op0, temp);
7455 return;
7461 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7464 void
7465 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7467 /* Force constants other than zero into memory. We do not know how
7468 the instructions used to build constants modify the upper 64 bits
7469 of the register, once we have that information we may be able
7470 to handle some of them more efficiently. */
7471 if ((reload_in_progress | reload_completed) == 0
7472 && register_operand (operands[0], mode)
7473 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7474 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7476 /* Make operand1 a register if it isn't already. */
7477 if (!no_new_pseudos
7478 && !register_operand (operands[0], mode)
7479 && !register_operand (operands[1], mode))
7481 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7482 emit_move_insn (operands[0], temp);
7483 return;
7486 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7489 /* Attempt to expand a binary operator. Make the expansion closer to the
7490 actual machine, then just general_operand, which will allow 3 separate
7491 memory references (one output, two input) in a single insn. */
7493 void
7494 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7495 rtx operands[])
7497 int matching_memory;
7498 rtx src1, src2, dst, op, clob;
7500 dst = operands[0];
7501 src1 = operands[1];
7502 src2 = operands[2];
7504 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7505 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7506 && (rtx_equal_p (dst, src2)
7507 || immediate_operand (src1, mode)))
7509 rtx temp = src1;
7510 src1 = src2;
7511 src2 = temp;
7514 /* If the destination is memory, and we do not have matching source
7515 operands, do things in registers. */
7516 matching_memory = 0;
7517 if (GET_CODE (dst) == MEM)
7519 if (rtx_equal_p (dst, src1))
7520 matching_memory = 1;
7521 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7522 && rtx_equal_p (dst, src2))
7523 matching_memory = 2;
7524 else
7525 dst = gen_reg_rtx (mode);
7528 /* Both source operands cannot be in memory. */
7529 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7531 if (matching_memory != 2)
7532 src2 = force_reg (mode, src2);
7533 else
7534 src1 = force_reg (mode, src1);
7537 /* If the operation is not commutable, source 1 cannot be a constant
7538 or non-matching memory. */
7539 if ((CONSTANT_P (src1)
7540 || (!matching_memory && GET_CODE (src1) == MEM))
7541 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7542 src1 = force_reg (mode, src1);
7544 /* If optimizing, copy to regs to improve CSE */
7545 if (optimize && ! no_new_pseudos)
7547 if (GET_CODE (dst) == MEM)
7548 dst = gen_reg_rtx (mode);
7549 if (GET_CODE (src1) == MEM)
7550 src1 = force_reg (mode, src1);
7551 if (GET_CODE (src2) == MEM)
7552 src2 = force_reg (mode, src2);
7555 /* Emit the instruction. */
7557 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7558 if (reload_in_progress)
7560 /* Reload doesn't know about the flags register, and doesn't know that
7561 it doesn't want to clobber it. We can only do this with PLUS. */
7562 if (code != PLUS)
7563 abort ();
7564 emit_insn (op);
7566 else
7568 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7569 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7572 /* Fix up the destination if needed. */
7573 if (dst != operands[0])
7574 emit_move_insn (operands[0], dst);
7577 /* Return TRUE or FALSE depending on whether the binary operator meets the
7578 appropriate constraints. */
7581 ix86_binary_operator_ok (enum rtx_code code,
7582 enum machine_mode mode ATTRIBUTE_UNUSED,
7583 rtx operands[3])
7585 /* Both source operands cannot be in memory. */
7586 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7587 return 0;
7588 /* If the operation is not commutable, source 1 cannot be a constant. */
7589 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7590 return 0;
7591 /* If the destination is memory, we must have a matching source operand. */
7592 if (GET_CODE (operands[0]) == MEM
7593 && ! (rtx_equal_p (operands[0], operands[1])
7594 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7595 && rtx_equal_p (operands[0], operands[2]))))
7596 return 0;
7597 /* If the operation is not commutable and the source 1 is memory, we must
7598 have a matching destination. */
7599 if (GET_CODE (operands[1]) == MEM
7600 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7601 && ! rtx_equal_p (operands[0], operands[1]))
7602 return 0;
7603 return 1;
7606 /* Attempt to expand a unary operator. Make the expansion closer to the
7607 actual machine, then just general_operand, which will allow 2 separate
7608 memory references (one output, one input) in a single insn. */
7610 void
7611 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7612 rtx operands[])
7614 int matching_memory;
7615 rtx src, dst, op, clob;
7617 dst = operands[0];
7618 src = operands[1];
7620 /* If the destination is memory, and we do not have matching source
7621 operands, do things in registers. */
7622 matching_memory = 0;
7623 if (GET_CODE (dst) == MEM)
7625 if (rtx_equal_p (dst, src))
7626 matching_memory = 1;
7627 else
7628 dst = gen_reg_rtx (mode);
7631 /* When source operand is memory, destination must match. */
7632 if (!matching_memory && GET_CODE (src) == MEM)
7633 src = force_reg (mode, src);
7635 /* If optimizing, copy to regs to improve CSE */
7636 if (optimize && ! no_new_pseudos)
7638 if (GET_CODE (dst) == MEM)
7639 dst = gen_reg_rtx (mode);
7640 if (GET_CODE (src) == MEM)
7641 src = force_reg (mode, src);
7644 /* Emit the instruction. */
7646 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7647 if (reload_in_progress || code == NOT)
7649 /* Reload doesn't know about the flags register, and doesn't know that
7650 it doesn't want to clobber it. */
7651 if (code != NOT)
7652 abort ();
7653 emit_insn (op);
7655 else
7657 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7658 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7661 /* Fix up the destination if needed. */
7662 if (dst != operands[0])
7663 emit_move_insn (operands[0], dst);
7666 /* Return TRUE or FALSE depending on whether the unary operator meets the
7667 appropriate constraints. */
7670 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7671 enum machine_mode mode ATTRIBUTE_UNUSED,
7672 rtx operands[2] ATTRIBUTE_UNUSED)
7674 /* If one of operands is memory, source and destination must match. */
7675 if ((GET_CODE (operands[0]) == MEM
7676 || GET_CODE (operands[1]) == MEM)
7677 && ! rtx_equal_p (operands[0], operands[1]))
7678 return FALSE;
7679 return TRUE;
7682 /* Return TRUE or FALSE depending on whether the first SET in INSN
7683 has source and destination with matching CC modes, and that the
7684 CC mode is at least as constrained as REQ_MODE. */
7687 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7689 rtx set;
7690 enum machine_mode set_mode;
7692 set = PATTERN (insn);
7693 if (GET_CODE (set) == PARALLEL)
7694 set = XVECEXP (set, 0, 0);
7695 if (GET_CODE (set) != SET)
7696 abort ();
7697 if (GET_CODE (SET_SRC (set)) != COMPARE)
7698 abort ();
7700 set_mode = GET_MODE (SET_DEST (set));
7701 switch (set_mode)
7703 case CCNOmode:
7704 if (req_mode != CCNOmode
7705 && (req_mode != CCmode
7706 || XEXP (SET_SRC (set), 1) != const0_rtx))
7707 return 0;
7708 break;
7709 case CCmode:
7710 if (req_mode == CCGCmode)
7711 return 0;
7712 /* FALLTHRU */
7713 case CCGCmode:
7714 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7715 return 0;
7716 /* FALLTHRU */
7717 case CCGOCmode:
7718 if (req_mode == CCZmode)
7719 return 0;
7720 /* FALLTHRU */
7721 case CCZmode:
7722 break;
7724 default:
7725 abort ();
7728 return (GET_MODE (SET_SRC (set)) == set_mode);
7731 /* Generate insn patterns to do an integer compare of OPERANDS. */
7733 static rtx
7734 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7736 enum machine_mode cmpmode;
7737 rtx tmp, flags;
7739 cmpmode = SELECT_CC_MODE (code, op0, op1);
7740 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7742 /* This is very simple, but making the interface the same as in the
7743 FP case makes the rest of the code easier. */
7744 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7745 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7747 /* Return the test that should be put into the flags user, i.e.
7748 the bcc, scc, or cmov instruction. */
7749 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7752 /* Figure out whether to use ordered or unordered fp comparisons.
7753 Return the appropriate mode to use. */
7755 enum machine_mode
7756 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7758 /* ??? In order to make all comparisons reversible, we do all comparisons
7759 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7760 all forms trapping and nontrapping comparisons, we can make inequality
7761 comparisons trapping again, since it results in better code when using
7762 FCOM based compares. */
7763 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7766 enum machine_mode
7767 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7769 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7770 return ix86_fp_compare_mode (code);
7771 switch (code)
7773 /* Only zero flag is needed. */
7774 case EQ: /* ZF=0 */
7775 case NE: /* ZF!=0 */
7776 return CCZmode;
7777 /* Codes needing carry flag. */
7778 case GEU: /* CF=0 */
7779 case GTU: /* CF=0 & ZF=0 */
7780 case LTU: /* CF=1 */
7781 case LEU: /* CF=1 | ZF=1 */
7782 return CCmode;
7783 /* Codes possibly doable only with sign flag when
7784 comparing against zero. */
7785 case GE: /* SF=OF or SF=0 */
7786 case LT: /* SF<>OF or SF=1 */
7787 if (op1 == const0_rtx)
7788 return CCGOCmode;
7789 else
7790 /* For other cases Carry flag is not required. */
7791 return CCGCmode;
7792 /* Codes doable only with sign flag when comparing
7793 against zero, but we miss jump instruction for it
7794 so we need to use relational tests against overflow
7795 that thus needs to be zero. */
7796 case GT: /* ZF=0 & SF=OF */
7797 case LE: /* ZF=1 | SF<>OF */
7798 if (op1 == const0_rtx)
7799 return CCNOmode;
7800 else
7801 return CCGCmode;
7802 /* strcmp pattern do (use flags) and combine may ask us for proper
7803 mode. */
7804 case USE:
7805 return CCmode;
7806 default:
7807 abort ();
7811 /* Return the fixed registers used for condition codes. */
7813 static bool
7814 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7816 *p1 = FLAGS_REG;
7817 *p2 = FPSR_REG;
7818 return true;
7821 /* If two condition code modes are compatible, return a condition code
7822 mode which is compatible with both. Otherwise, return
7823 VOIDmode. */
7825 static enum machine_mode
7826 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7828 if (m1 == m2)
7829 return m1;
7831 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7832 return VOIDmode;
7834 if ((m1 == CCGCmode && m2 == CCGOCmode)
7835 || (m1 == CCGOCmode && m2 == CCGCmode))
7836 return CCGCmode;
7838 switch (m1)
7840 default:
7841 abort ();
7843 case CCmode:
7844 case CCGCmode:
7845 case CCGOCmode:
7846 case CCNOmode:
7847 case CCZmode:
7848 switch (m2)
7850 default:
7851 return VOIDmode;
7853 case CCmode:
7854 case CCGCmode:
7855 case CCGOCmode:
7856 case CCNOmode:
7857 case CCZmode:
7858 return CCmode;
7861 case CCFPmode:
7862 case CCFPUmode:
7863 /* These are only compatible with themselves, which we already
7864 checked above. */
7865 return VOIDmode;
7869 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7872 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
7874 enum rtx_code swapped_code = swap_condition (code);
7875 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7876 || (ix86_fp_comparison_cost (swapped_code)
7877 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7880 /* Swap, force into registers, or otherwise massage the two operands
7881 to a fp comparison. The operands are updated in place; the new
7882 comparison code is returned. */
7884 static enum rtx_code
7885 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
7887 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7888 rtx op0 = *pop0, op1 = *pop1;
7889 enum machine_mode op_mode = GET_MODE (op0);
7890 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7892 /* All of the unordered compare instructions only work on registers.
7893 The same is true of the XFmode compare instructions. The same is
7894 true of the fcomi compare instructions. */
7896 if (!is_sse
7897 && (fpcmp_mode == CCFPUmode
7898 || op_mode == XFmode
7899 || ix86_use_fcomi_compare (code)))
7901 op0 = force_reg (op_mode, op0);
7902 op1 = force_reg (op_mode, op1);
7904 else
7906 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7907 things around if they appear profitable, otherwise force op0
7908 into a register. */
7910 if (standard_80387_constant_p (op0) == 0
7911 || (GET_CODE (op0) == MEM
7912 && ! (standard_80387_constant_p (op1) == 0
7913 || GET_CODE (op1) == MEM)))
7915 rtx tmp;
7916 tmp = op0, op0 = op1, op1 = tmp;
7917 code = swap_condition (code);
7920 if (GET_CODE (op0) != REG)
7921 op0 = force_reg (op_mode, op0);
7923 if (CONSTANT_P (op1))
7925 if (standard_80387_constant_p (op1))
7926 op1 = force_reg (op_mode, op1);
7927 else
7928 op1 = validize_mem (force_const_mem (op_mode, op1));
7932 /* Try to rearrange the comparison to make it cheaper. */
7933 if (ix86_fp_comparison_cost (code)
7934 > ix86_fp_comparison_cost (swap_condition (code))
7935 && (GET_CODE (op1) == REG || !no_new_pseudos))
7937 rtx tmp;
7938 tmp = op0, op0 = op1, op1 = tmp;
7939 code = swap_condition (code);
7940 if (GET_CODE (op0) != REG)
7941 op0 = force_reg (op_mode, op0);
7944 *pop0 = op0;
7945 *pop1 = op1;
7946 return code;
7949 /* Convert comparison codes we use to represent FP comparison to integer
7950 code that will result in proper branch. Return UNKNOWN if no such code
7951 is available. */
7953 enum rtx_code
7954 ix86_fp_compare_code_to_integer (enum rtx_code code)
7956 switch (code)
7958 case GT:
7959 return GTU;
7960 case GE:
7961 return GEU;
7962 case ORDERED:
7963 case UNORDERED:
7964 return code;
7965 break;
7966 case UNEQ:
7967 return EQ;
7968 break;
7969 case UNLT:
7970 return LTU;
7971 break;
7972 case UNLE:
7973 return LEU;
7974 break;
7975 case LTGT:
7976 return NE;
7977 break;
7978 default:
7979 return UNKNOWN;
7983 /* Split comparison code CODE into comparisons we can do using branch
7984 instructions. BYPASS_CODE is comparison code for branch that will
7985 branch around FIRST_CODE and SECOND_CODE. If some of branches
7986 is not required, set value to UNKNOWN.
7987 We never require more than two branches. */
7989 void
7990 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
7991 enum rtx_code *first_code,
7992 enum rtx_code *second_code)
7994 *first_code = code;
7995 *bypass_code = UNKNOWN;
7996 *second_code = UNKNOWN;
7998 /* The fcomi comparison sets flags as follows:
8000 cmp ZF PF CF
8001 > 0 0 0
8002 < 0 0 1
8003 = 1 0 0
8004 un 1 1 1 */
8006 switch (code)
8008 case GT: /* GTU - CF=0 & ZF=0 */
8009 case GE: /* GEU - CF=0 */
8010 case ORDERED: /* PF=0 */
8011 case UNORDERED: /* PF=1 */
8012 case UNEQ: /* EQ - ZF=1 */
8013 case UNLT: /* LTU - CF=1 */
8014 case UNLE: /* LEU - CF=1 | ZF=1 */
8015 case LTGT: /* EQ - ZF=0 */
8016 break;
8017 case LT: /* LTU - CF=1 - fails on unordered */
8018 *first_code = UNLT;
8019 *bypass_code = UNORDERED;
8020 break;
8021 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8022 *first_code = UNLE;
8023 *bypass_code = UNORDERED;
8024 break;
8025 case EQ: /* EQ - ZF=1 - fails on unordered */
8026 *first_code = UNEQ;
8027 *bypass_code = UNORDERED;
8028 break;
8029 case NE: /* NE - ZF=0 - fails on unordered */
8030 *first_code = LTGT;
8031 *second_code = UNORDERED;
8032 break;
8033 case UNGE: /* GEU - CF=0 - fails on unordered */
8034 *first_code = GE;
8035 *second_code = UNORDERED;
8036 break;
8037 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8038 *first_code = GT;
8039 *second_code = UNORDERED;
8040 break;
8041 default:
8042 abort ();
8044 if (!TARGET_IEEE_FP)
8046 *second_code = UNKNOWN;
8047 *bypass_code = UNKNOWN;
8051 /* Return cost of comparison done fcom + arithmetics operations on AX.
8052 All following functions do use number of instructions as a cost metrics.
8053 In future this should be tweaked to compute bytes for optimize_size and
8054 take into account performance of various instructions on various CPUs. */
8055 static int
8056 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8058 if (!TARGET_IEEE_FP)
8059 return 4;
8060 /* The cost of code output by ix86_expand_fp_compare. */
8061 switch (code)
8063 case UNLE:
8064 case UNLT:
8065 case LTGT:
8066 case GT:
8067 case GE:
8068 case UNORDERED:
8069 case ORDERED:
8070 case UNEQ:
8071 return 4;
8072 break;
8073 case LT:
8074 case NE:
8075 case EQ:
8076 case UNGE:
8077 return 5;
8078 break;
8079 case LE:
8080 case UNGT:
8081 return 6;
8082 break;
8083 default:
8084 abort ();
8088 /* Return cost of comparison done using fcomi operation.
8089 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8090 static int
8091 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8093 enum rtx_code bypass_code, first_code, second_code;
8094 /* Return arbitrarily high cost when instruction is not supported - this
8095 prevents gcc from using it. */
8096 if (!TARGET_CMOVE)
8097 return 1024;
8098 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8099 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8102 /* Return cost of comparison done using sahf operation.
8103 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8104 static int
8105 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8107 enum rtx_code bypass_code, first_code, second_code;
8108 /* Return arbitrarily high cost when instruction is not preferred - this
8109 avoids gcc from using it. */
8110 if (!TARGET_USE_SAHF && !optimize_size)
8111 return 1024;
8112 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8113 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8116 /* Compute cost of the comparison done using any method.
8117 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8118 static int
8119 ix86_fp_comparison_cost (enum rtx_code code)
8121 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8122 int min;
8124 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8125 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8127 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8128 if (min > sahf_cost)
8129 min = sahf_cost;
8130 if (min > fcomi_cost)
8131 min = fcomi_cost;
8132 return min;
8135 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8137 static rtx
8138 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8139 rtx *second_test, rtx *bypass_test)
8141 enum machine_mode fpcmp_mode, intcmp_mode;
8142 rtx tmp, tmp2;
8143 int cost = ix86_fp_comparison_cost (code);
8144 enum rtx_code bypass_code, first_code, second_code;
8146 fpcmp_mode = ix86_fp_compare_mode (code);
8147 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8149 if (second_test)
8150 *second_test = NULL_RTX;
8151 if (bypass_test)
8152 *bypass_test = NULL_RTX;
8154 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8156 /* Do fcomi/sahf based test when profitable. */
8157 if ((bypass_code == UNKNOWN || bypass_test)
8158 && (second_code == UNKNOWN || second_test)
8159 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8161 if (TARGET_CMOVE)
8163 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8164 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8165 tmp);
8166 emit_insn (tmp);
8168 else
8170 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8171 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8172 if (!scratch)
8173 scratch = gen_reg_rtx (HImode);
8174 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8175 emit_insn (gen_x86_sahf_1 (scratch));
8178 /* The FP codes work out to act like unsigned. */
8179 intcmp_mode = fpcmp_mode;
8180 code = first_code;
8181 if (bypass_code != UNKNOWN)
8182 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8183 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8184 const0_rtx);
8185 if (second_code != UNKNOWN)
8186 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8187 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8188 const0_rtx);
8190 else
8192 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8193 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8194 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8195 if (!scratch)
8196 scratch = gen_reg_rtx (HImode);
8197 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8199 /* In the unordered case, we have to check C2 for NaN's, which
8200 doesn't happen to work out to anything nice combination-wise.
8201 So do some bit twiddling on the value we've got in AH to come
8202 up with an appropriate set of condition codes. */
8204 intcmp_mode = CCNOmode;
8205 switch (code)
8207 case GT:
8208 case UNGT:
8209 if (code == GT || !TARGET_IEEE_FP)
8211 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8212 code = EQ;
8214 else
8216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8217 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8218 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8219 intcmp_mode = CCmode;
8220 code = GEU;
8222 break;
8223 case LT:
8224 case UNLT:
8225 if (code == LT && TARGET_IEEE_FP)
8227 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8228 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8229 intcmp_mode = CCmode;
8230 code = EQ;
8232 else
8234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8235 code = NE;
8237 break;
8238 case GE:
8239 case UNGE:
8240 if (code == GE || !TARGET_IEEE_FP)
8242 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8243 code = EQ;
8245 else
8247 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8248 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8249 GEN_INT (0x01)));
8250 code = NE;
8252 break;
8253 case LE:
8254 case UNLE:
8255 if (code == LE && TARGET_IEEE_FP)
8257 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8258 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8259 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8260 intcmp_mode = CCmode;
8261 code = LTU;
8263 else
8265 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8266 code = NE;
8268 break;
8269 case EQ:
8270 case UNEQ:
8271 if (code == EQ && TARGET_IEEE_FP)
8273 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8274 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8275 intcmp_mode = CCmode;
8276 code = EQ;
8278 else
8280 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8281 code = NE;
8282 break;
8284 break;
8285 case NE:
8286 case LTGT:
8287 if (code == NE && TARGET_IEEE_FP)
8289 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8290 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8291 GEN_INT (0x40)));
8292 code = NE;
8294 else
8296 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8297 code = EQ;
8299 break;
8301 case UNORDERED:
8302 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8303 code = NE;
8304 break;
8305 case ORDERED:
8306 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8307 code = EQ;
8308 break;
8310 default:
8311 abort ();
8315 /* Return the test that should be put into the flags user, i.e.
8316 the bcc, scc, or cmov instruction. */
8317 return gen_rtx_fmt_ee (code, VOIDmode,
8318 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8319 const0_rtx);
8323 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8325 rtx op0, op1, ret;
8326 op0 = ix86_compare_op0;
8327 op1 = ix86_compare_op1;
8329 if (second_test)
8330 *second_test = NULL_RTX;
8331 if (bypass_test)
8332 *bypass_test = NULL_RTX;
8334 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8335 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8336 second_test, bypass_test);
8337 else
8338 ret = ix86_expand_int_compare (code, op0, op1);
8340 return ret;
8343 /* Return true if the CODE will result in nontrivial jump sequence. */
8344 bool
8345 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8347 enum rtx_code bypass_code, first_code, second_code;
8348 if (!TARGET_CMOVE)
8349 return true;
8350 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8351 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8354 void
8355 ix86_expand_branch (enum rtx_code code, rtx label)
8357 rtx tmp;
8359 switch (GET_MODE (ix86_compare_op0))
8361 case QImode:
8362 case HImode:
8363 case SImode:
8364 simple:
8365 tmp = ix86_expand_compare (code, NULL, NULL);
8366 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8367 gen_rtx_LABEL_REF (VOIDmode, label),
8368 pc_rtx);
8369 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8370 return;
8372 case SFmode:
8373 case DFmode:
8374 case XFmode:
8376 rtvec vec;
8377 int use_fcomi;
8378 enum rtx_code bypass_code, first_code, second_code;
8380 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8381 &ix86_compare_op1);
8383 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8385 /* Check whether we will use the natural sequence with one jump. If
8386 so, we can expand jump early. Otherwise delay expansion by
8387 creating compound insn to not confuse optimizers. */
8388 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8389 && TARGET_CMOVE)
8391 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8392 gen_rtx_LABEL_REF (VOIDmode, label),
8393 pc_rtx, NULL_RTX);
8395 else
8397 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8398 ix86_compare_op0, ix86_compare_op1);
8399 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8400 gen_rtx_LABEL_REF (VOIDmode, label),
8401 pc_rtx);
8402 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8404 use_fcomi = ix86_use_fcomi_compare (code);
8405 vec = rtvec_alloc (3 + !use_fcomi);
8406 RTVEC_ELT (vec, 0) = tmp;
8407 RTVEC_ELT (vec, 1)
8408 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8409 RTVEC_ELT (vec, 2)
8410 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8411 if (! use_fcomi)
8412 RTVEC_ELT (vec, 3)
8413 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8415 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8417 return;
8420 case DImode:
8421 if (TARGET_64BIT)
8422 goto simple;
8423 /* Expand DImode branch into multiple compare+branch. */
8425 rtx lo[2], hi[2], label2;
8426 enum rtx_code code1, code2, code3;
8428 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8430 tmp = ix86_compare_op0;
8431 ix86_compare_op0 = ix86_compare_op1;
8432 ix86_compare_op1 = tmp;
8433 code = swap_condition (code);
8435 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8436 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8438 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8439 avoid two branches. This costs one extra insn, so disable when
8440 optimizing for size. */
8442 if ((code == EQ || code == NE)
8443 && (!optimize_size
8444 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8446 rtx xor0, xor1;
8448 xor1 = hi[0];
8449 if (hi[1] != const0_rtx)
8450 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8451 NULL_RTX, 0, OPTAB_WIDEN);
8453 xor0 = lo[0];
8454 if (lo[1] != const0_rtx)
8455 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8456 NULL_RTX, 0, OPTAB_WIDEN);
8458 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8459 NULL_RTX, 0, OPTAB_WIDEN);
8461 ix86_compare_op0 = tmp;
8462 ix86_compare_op1 = const0_rtx;
8463 ix86_expand_branch (code, label);
8464 return;
8467 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8468 op1 is a constant and the low word is zero, then we can just
8469 examine the high word. */
8471 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8472 switch (code)
8474 case LT: case LTU: case GE: case GEU:
8475 ix86_compare_op0 = hi[0];
8476 ix86_compare_op1 = hi[1];
8477 ix86_expand_branch (code, label);
8478 return;
8479 default:
8480 break;
8483 /* Otherwise, we need two or three jumps. */
8485 label2 = gen_label_rtx ();
8487 code1 = code;
8488 code2 = swap_condition (code);
8489 code3 = unsigned_condition (code);
8491 switch (code)
8493 case LT: case GT: case LTU: case GTU:
8494 break;
8496 case LE: code1 = LT; code2 = GT; break;
8497 case GE: code1 = GT; code2 = LT; break;
8498 case LEU: code1 = LTU; code2 = GTU; break;
8499 case GEU: code1 = GTU; code2 = LTU; break;
8501 case EQ: code1 = UNKNOWN; code2 = NE; break;
8502 case NE: code2 = UNKNOWN; break;
8504 default:
8505 abort ();
8509 * a < b =>
8510 * if (hi(a) < hi(b)) goto true;
8511 * if (hi(a) > hi(b)) goto false;
8512 * if (lo(a) < lo(b)) goto true;
8513 * false:
8516 ix86_compare_op0 = hi[0];
8517 ix86_compare_op1 = hi[1];
8519 if (code1 != UNKNOWN)
8520 ix86_expand_branch (code1, label);
8521 if (code2 != UNKNOWN)
8522 ix86_expand_branch (code2, label2);
8524 ix86_compare_op0 = lo[0];
8525 ix86_compare_op1 = lo[1];
8526 ix86_expand_branch (code3, label);
8528 if (code2 != UNKNOWN)
8529 emit_label (label2);
8530 return;
8533 default:
8534 abort ();
8538 /* Split branch based on floating point condition. */
8539 void
8540 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8541 rtx target1, rtx target2, rtx tmp)
8543 rtx second, bypass;
8544 rtx label = NULL_RTX;
8545 rtx condition;
8546 int bypass_probability = -1, second_probability = -1, probability = -1;
8547 rtx i;
8549 if (target2 != pc_rtx)
8551 rtx tmp = target2;
8552 code = reverse_condition_maybe_unordered (code);
8553 target2 = target1;
8554 target1 = tmp;
8557 condition = ix86_expand_fp_compare (code, op1, op2,
8558 tmp, &second, &bypass);
8560 if (split_branch_probability >= 0)
8562 /* Distribute the probabilities across the jumps.
8563 Assume the BYPASS and SECOND to be always test
8564 for UNORDERED. */
8565 probability = split_branch_probability;
8567 /* Value of 1 is low enough to make no need for probability
8568 to be updated. Later we may run some experiments and see
8569 if unordered values are more frequent in practice. */
8570 if (bypass)
8571 bypass_probability = 1;
8572 if (second)
8573 second_probability = 1;
8575 if (bypass != NULL_RTX)
8577 label = gen_label_rtx ();
8578 i = emit_jump_insn (gen_rtx_SET
8579 (VOIDmode, pc_rtx,
8580 gen_rtx_IF_THEN_ELSE (VOIDmode,
8581 bypass,
8582 gen_rtx_LABEL_REF (VOIDmode,
8583 label),
8584 pc_rtx)));
8585 if (bypass_probability >= 0)
8586 REG_NOTES (i)
8587 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8588 GEN_INT (bypass_probability),
8589 REG_NOTES (i));
8591 i = emit_jump_insn (gen_rtx_SET
8592 (VOIDmode, pc_rtx,
8593 gen_rtx_IF_THEN_ELSE (VOIDmode,
8594 condition, target1, target2)));
8595 if (probability >= 0)
8596 REG_NOTES (i)
8597 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8598 GEN_INT (probability),
8599 REG_NOTES (i));
8600 if (second != NULL_RTX)
8602 i = emit_jump_insn (gen_rtx_SET
8603 (VOIDmode, pc_rtx,
8604 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8605 target2)));
8606 if (second_probability >= 0)
8607 REG_NOTES (i)
8608 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8609 GEN_INT (second_probability),
8610 REG_NOTES (i));
8612 if (label != NULL_RTX)
8613 emit_label (label);
8617 ix86_expand_setcc (enum rtx_code code, rtx dest)
8619 rtx ret, tmp, tmpreg, equiv;
8620 rtx second_test, bypass_test;
8622 if (GET_MODE (ix86_compare_op0) == DImode
8623 && !TARGET_64BIT)
8624 return 0; /* FAIL */
8626 if (GET_MODE (dest) != QImode)
8627 abort ();
8629 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8630 PUT_MODE (ret, QImode);
8632 tmp = dest;
8633 tmpreg = dest;
8635 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8636 if (bypass_test || second_test)
8638 rtx test = second_test;
8639 int bypass = 0;
8640 rtx tmp2 = gen_reg_rtx (QImode);
8641 if (bypass_test)
8643 if (second_test)
8644 abort ();
8645 test = bypass_test;
8646 bypass = 1;
8647 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8649 PUT_MODE (test, QImode);
8650 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8652 if (bypass)
8653 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8654 else
8655 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8658 /* Attach a REG_EQUAL note describing the comparison result. */
8659 equiv = simplify_gen_relational (code, QImode,
8660 GET_MODE (ix86_compare_op0),
8661 ix86_compare_op0, ix86_compare_op1);
8662 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8664 return 1; /* DONE */
8667 /* Expand comparison setting or clearing carry flag. Return true when
8668 successful and set pop for the operation. */
8669 static bool
8670 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8672 enum machine_mode mode =
8673 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8675 /* Do not handle DImode compares that go trought special path. Also we can't
8676 deal with FP compares yet. This is possible to add. */
8677 if ((mode == DImode && !TARGET_64BIT))
8678 return false;
8679 if (FLOAT_MODE_P (mode))
8681 rtx second_test = NULL, bypass_test = NULL;
8682 rtx compare_op, compare_seq;
8684 /* Shortcut: following common codes never translate into carry flag compares. */
8685 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8686 || code == ORDERED || code == UNORDERED)
8687 return false;
8689 /* These comparisons require zero flag; swap operands so they won't. */
8690 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8691 && !TARGET_IEEE_FP)
8693 rtx tmp = op0;
8694 op0 = op1;
8695 op1 = tmp;
8696 code = swap_condition (code);
8699 /* Try to expand the comparison and verify that we end up with carry flag
8700 based comparison. This is fails to be true only when we decide to expand
8701 comparison using arithmetic that is not too common scenario. */
8702 start_sequence ();
8703 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8704 &second_test, &bypass_test);
8705 compare_seq = get_insns ();
8706 end_sequence ();
8708 if (second_test || bypass_test)
8709 return false;
8710 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8711 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8712 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8713 else
8714 code = GET_CODE (compare_op);
8715 if (code != LTU && code != GEU)
8716 return false;
8717 emit_insn (compare_seq);
8718 *pop = compare_op;
8719 return true;
8721 if (!INTEGRAL_MODE_P (mode))
8722 return false;
8723 switch (code)
8725 case LTU:
8726 case GEU:
8727 break;
8729 /* Convert a==0 into (unsigned)a<1. */
8730 case EQ:
8731 case NE:
8732 if (op1 != const0_rtx)
8733 return false;
8734 op1 = const1_rtx;
8735 code = (code == EQ ? LTU : GEU);
8736 break;
8738 /* Convert a>b into b<a or a>=b-1. */
8739 case GTU:
8740 case LEU:
8741 if (GET_CODE (op1) == CONST_INT)
8743 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8744 /* Bail out on overflow. We still can swap operands but that
8745 would force loading of the constant into register. */
8746 if (op1 == const0_rtx
8747 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8748 return false;
8749 code = (code == GTU ? GEU : LTU);
8751 else
8753 rtx tmp = op1;
8754 op1 = op0;
8755 op0 = tmp;
8756 code = (code == GTU ? LTU : GEU);
8758 break;
8760 /* Convert a>=0 into (unsigned)a<0x80000000. */
8761 case LT:
8762 case GE:
8763 if (mode == DImode || op1 != const0_rtx)
8764 return false;
8765 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8766 code = (code == LT ? GEU : LTU);
8767 break;
8768 case LE:
8769 case GT:
8770 if (mode == DImode || op1 != constm1_rtx)
8771 return false;
8772 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8773 code = (code == LE ? GEU : LTU);
8774 break;
8776 default:
8777 return false;
8779 /* Swapping operands may cause constant to appear as first operand. */
8780 if (!nonimmediate_operand (op0, VOIDmode))
8782 if (no_new_pseudos)
8783 return false;
8784 op0 = force_reg (mode, op0);
8786 ix86_compare_op0 = op0;
8787 ix86_compare_op1 = op1;
8788 *pop = ix86_expand_compare (code, NULL, NULL);
8789 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8790 abort ();
8791 return true;
8795 ix86_expand_int_movcc (rtx operands[])
8797 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8798 rtx compare_seq, compare_op;
8799 rtx second_test, bypass_test;
8800 enum machine_mode mode = GET_MODE (operands[0]);
8801 bool sign_bit_compare_p = false;;
8803 start_sequence ();
8804 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8805 compare_seq = get_insns ();
8806 end_sequence ();
8808 compare_code = GET_CODE (compare_op);
8810 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8811 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8812 sign_bit_compare_p = true;
8814 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8815 HImode insns, we'd be swallowed in word prefix ops. */
8817 if ((mode != HImode || TARGET_FAST_PREFIX)
8818 && (mode != DImode || TARGET_64BIT)
8819 && GET_CODE (operands[2]) == CONST_INT
8820 && GET_CODE (operands[3]) == CONST_INT)
8822 rtx out = operands[0];
8823 HOST_WIDE_INT ct = INTVAL (operands[2]);
8824 HOST_WIDE_INT cf = INTVAL (operands[3]);
8825 HOST_WIDE_INT diff;
8827 diff = ct - cf;
8828 /* Sign bit compares are better done using shifts than we do by using
8829 sbb. */
8830 if (sign_bit_compare_p
8831 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8832 ix86_compare_op1, &compare_op))
8834 /* Detect overlap between destination and compare sources. */
8835 rtx tmp = out;
8837 if (!sign_bit_compare_p)
8839 bool fpcmp = false;
8841 compare_code = GET_CODE (compare_op);
8843 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8844 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8846 fpcmp = true;
8847 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8850 /* To simplify rest of code, restrict to the GEU case. */
8851 if (compare_code == LTU)
8853 HOST_WIDE_INT tmp = ct;
8854 ct = cf;
8855 cf = tmp;
8856 compare_code = reverse_condition (compare_code);
8857 code = reverse_condition (code);
8859 else
8861 if (fpcmp)
8862 PUT_CODE (compare_op,
8863 reverse_condition_maybe_unordered
8864 (GET_CODE (compare_op)));
8865 else
8866 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
8868 diff = ct - cf;
8870 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8871 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8872 tmp = gen_reg_rtx (mode);
8874 if (mode == DImode)
8875 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
8876 else
8877 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
8879 else
8881 if (code == GT || code == GE)
8882 code = reverse_condition (code);
8883 else
8885 HOST_WIDE_INT tmp = ct;
8886 ct = cf;
8887 cf = tmp;
8888 diff = ct - cf;
8890 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
8891 ix86_compare_op1, VOIDmode, 0, -1);
8894 if (diff == 1)
8897 * cmpl op0,op1
8898 * sbbl dest,dest
8899 * [addl dest, ct]
8901 * Size 5 - 8.
8903 if (ct)
8904 tmp = expand_simple_binop (mode, PLUS,
8905 tmp, GEN_INT (ct),
8906 copy_rtx (tmp), 1, OPTAB_DIRECT);
8908 else if (cf == -1)
8911 * cmpl op0,op1
8912 * sbbl dest,dest
8913 * orl $ct, dest
8915 * Size 8.
8917 tmp = expand_simple_binop (mode, IOR,
8918 tmp, GEN_INT (ct),
8919 copy_rtx (tmp), 1, OPTAB_DIRECT);
8921 else if (diff == -1 && ct)
8924 * cmpl op0,op1
8925 * sbbl dest,dest
8926 * notl dest
8927 * [addl dest, cf]
8929 * Size 8 - 11.
8931 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
8932 if (cf)
8933 tmp = expand_simple_binop (mode, PLUS,
8934 copy_rtx (tmp), GEN_INT (cf),
8935 copy_rtx (tmp), 1, OPTAB_DIRECT);
8937 else
8940 * cmpl op0,op1
8941 * sbbl dest,dest
8942 * [notl dest]
8943 * andl cf - ct, dest
8944 * [addl dest, ct]
8946 * Size 8 - 11.
8949 if (cf == 0)
8951 cf = ct;
8952 ct = 0;
8953 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
8956 tmp = expand_simple_binop (mode, AND,
8957 copy_rtx (tmp),
8958 gen_int_mode (cf - ct, mode),
8959 copy_rtx (tmp), 1, OPTAB_DIRECT);
8960 if (ct)
8961 tmp = expand_simple_binop (mode, PLUS,
8962 copy_rtx (tmp), GEN_INT (ct),
8963 copy_rtx (tmp), 1, OPTAB_DIRECT);
8966 if (!rtx_equal_p (tmp, out))
8967 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
8969 return 1; /* DONE */
8972 if (diff < 0)
8974 HOST_WIDE_INT tmp;
8975 tmp = ct, ct = cf, cf = tmp;
8976 diff = -diff;
8977 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8979 /* We may be reversing unordered compare to normal compare, that
8980 is not valid in general (we may convert non-trapping condition
8981 to trapping one), however on i386 we currently emit all
8982 comparisons unordered. */
8983 compare_code = reverse_condition_maybe_unordered (compare_code);
8984 code = reverse_condition_maybe_unordered (code);
8986 else
8988 compare_code = reverse_condition (compare_code);
8989 code = reverse_condition (code);
8993 compare_code = UNKNOWN;
8994 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8995 && GET_CODE (ix86_compare_op1) == CONST_INT)
8997 if (ix86_compare_op1 == const0_rtx
8998 && (code == LT || code == GE))
8999 compare_code = code;
9000 else if (ix86_compare_op1 == constm1_rtx)
9002 if (code == LE)
9003 compare_code = LT;
9004 else if (code == GT)
9005 compare_code = GE;
9009 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9010 if (compare_code != UNKNOWN
9011 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9012 && (cf == -1 || ct == -1))
9014 /* If lea code below could be used, only optimize
9015 if it results in a 2 insn sequence. */
9017 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9018 || diff == 3 || diff == 5 || diff == 9)
9019 || (compare_code == LT && ct == -1)
9020 || (compare_code == GE && cf == -1))
9023 * notl op1 (if necessary)
9024 * sarl $31, op1
9025 * orl cf, op1
9027 if (ct != -1)
9029 cf = ct;
9030 ct = -1;
9031 code = reverse_condition (code);
9034 out = emit_store_flag (out, code, ix86_compare_op0,
9035 ix86_compare_op1, VOIDmode, 0, -1);
9037 out = expand_simple_binop (mode, IOR,
9038 out, GEN_INT (cf),
9039 out, 1, OPTAB_DIRECT);
9040 if (out != operands[0])
9041 emit_move_insn (operands[0], out);
9043 return 1; /* DONE */
9048 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9049 || diff == 3 || diff == 5 || diff == 9)
9050 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9051 && (mode != DImode
9052 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9055 * xorl dest,dest
9056 * cmpl op1,op2
9057 * setcc dest
9058 * lea cf(dest*(ct-cf)),dest
9060 * Size 14.
9062 * This also catches the degenerate setcc-only case.
9065 rtx tmp;
9066 int nops;
9068 out = emit_store_flag (out, code, ix86_compare_op0,
9069 ix86_compare_op1, VOIDmode, 0, 1);
9071 nops = 0;
9072 /* On x86_64 the lea instruction operates on Pmode, so we need
9073 to get arithmetics done in proper mode to match. */
9074 if (diff == 1)
9075 tmp = copy_rtx (out);
9076 else
9078 rtx out1;
9079 out1 = copy_rtx (out);
9080 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9081 nops++;
9082 if (diff & 1)
9084 tmp = gen_rtx_PLUS (mode, tmp, out1);
9085 nops++;
9088 if (cf != 0)
9090 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9091 nops++;
9093 if (!rtx_equal_p (tmp, out))
9095 if (nops == 1)
9096 out = force_operand (tmp, copy_rtx (out));
9097 else
9098 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9100 if (!rtx_equal_p (out, operands[0]))
9101 emit_move_insn (operands[0], copy_rtx (out));
9103 return 1; /* DONE */
9107 * General case: Jumpful:
9108 * xorl dest,dest cmpl op1, op2
9109 * cmpl op1, op2 movl ct, dest
9110 * setcc dest jcc 1f
9111 * decl dest movl cf, dest
9112 * andl (cf-ct),dest 1:
9113 * addl ct,dest
9115 * Size 20. Size 14.
9117 * This is reasonably steep, but branch mispredict costs are
9118 * high on modern cpus, so consider failing only if optimizing
9119 * for space.
9122 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9123 && BRANCH_COST >= 2)
9125 if (cf == 0)
9127 cf = ct;
9128 ct = 0;
9129 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9130 /* We may be reversing unordered compare to normal compare,
9131 that is not valid in general (we may convert non-trapping
9132 condition to trapping one), however on i386 we currently
9133 emit all comparisons unordered. */
9134 code = reverse_condition_maybe_unordered (code);
9135 else
9137 code = reverse_condition (code);
9138 if (compare_code != UNKNOWN)
9139 compare_code = reverse_condition (compare_code);
9143 if (compare_code != UNKNOWN)
9145 /* notl op1 (if needed)
9146 sarl $31, op1
9147 andl (cf-ct), op1
9148 addl ct, op1
9150 For x < 0 (resp. x <= -1) there will be no notl,
9151 so if possible swap the constants to get rid of the
9152 complement.
9153 True/false will be -1/0 while code below (store flag
9154 followed by decrement) is 0/-1, so the constants need
9155 to be exchanged once more. */
9157 if (compare_code == GE || !cf)
9159 code = reverse_condition (code);
9160 compare_code = LT;
9162 else
9164 HOST_WIDE_INT tmp = cf;
9165 cf = ct;
9166 ct = tmp;
9169 out = emit_store_flag (out, code, ix86_compare_op0,
9170 ix86_compare_op1, VOIDmode, 0, -1);
9172 else
9174 out = emit_store_flag (out, code, ix86_compare_op0,
9175 ix86_compare_op1, VOIDmode, 0, 1);
9177 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9178 copy_rtx (out), 1, OPTAB_DIRECT);
9181 out = expand_simple_binop (mode, AND, copy_rtx (out),
9182 gen_int_mode (cf - ct, mode),
9183 copy_rtx (out), 1, OPTAB_DIRECT);
9184 if (ct)
9185 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9186 copy_rtx (out), 1, OPTAB_DIRECT);
9187 if (!rtx_equal_p (out, operands[0]))
9188 emit_move_insn (operands[0], copy_rtx (out));
9190 return 1; /* DONE */
9194 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9196 /* Try a few things more with specific constants and a variable. */
9198 optab op;
9199 rtx var, orig_out, out, tmp;
9201 if (BRANCH_COST <= 2)
9202 return 0; /* FAIL */
9204 /* If one of the two operands is an interesting constant, load a
9205 constant with the above and mask it in with a logical operation. */
9207 if (GET_CODE (operands[2]) == CONST_INT)
9209 var = operands[3];
9210 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9211 operands[3] = constm1_rtx, op = and_optab;
9212 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9213 operands[3] = const0_rtx, op = ior_optab;
9214 else
9215 return 0; /* FAIL */
9217 else if (GET_CODE (operands[3]) == CONST_INT)
9219 var = operands[2];
9220 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9221 operands[2] = constm1_rtx, op = and_optab;
9222 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9223 operands[2] = const0_rtx, op = ior_optab;
9224 else
9225 return 0; /* FAIL */
9227 else
9228 return 0; /* FAIL */
9230 orig_out = operands[0];
9231 tmp = gen_reg_rtx (mode);
9232 operands[0] = tmp;
9234 /* Recurse to get the constant loaded. */
9235 if (ix86_expand_int_movcc (operands) == 0)
9236 return 0; /* FAIL */
9238 /* Mask in the interesting variable. */
9239 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9240 OPTAB_WIDEN);
9241 if (!rtx_equal_p (out, orig_out))
9242 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9244 return 1; /* DONE */
9248 * For comparison with above,
9250 * movl cf,dest
9251 * movl ct,tmp
9252 * cmpl op1,op2
9253 * cmovcc tmp,dest
9255 * Size 15.
9258 if (! nonimmediate_operand (operands[2], mode))
9259 operands[2] = force_reg (mode, operands[2]);
9260 if (! nonimmediate_operand (operands[3], mode))
9261 operands[3] = force_reg (mode, operands[3]);
9263 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9265 rtx tmp = gen_reg_rtx (mode);
9266 emit_move_insn (tmp, operands[3]);
9267 operands[3] = tmp;
9269 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9271 rtx tmp = gen_reg_rtx (mode);
9272 emit_move_insn (tmp, operands[2]);
9273 operands[2] = tmp;
9276 if (! register_operand (operands[2], VOIDmode)
9277 && (mode == QImode
9278 || ! register_operand (operands[3], VOIDmode)))
9279 operands[2] = force_reg (mode, operands[2]);
9281 if (mode == QImode
9282 && ! register_operand (operands[3], VOIDmode))
9283 operands[3] = force_reg (mode, operands[3]);
9285 emit_insn (compare_seq);
9286 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9287 gen_rtx_IF_THEN_ELSE (mode,
9288 compare_op, operands[2],
9289 operands[3])));
9290 if (bypass_test)
9291 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9292 gen_rtx_IF_THEN_ELSE (mode,
9293 bypass_test,
9294 copy_rtx (operands[3]),
9295 copy_rtx (operands[0]))));
9296 if (second_test)
9297 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9298 gen_rtx_IF_THEN_ELSE (mode,
9299 second_test,
9300 copy_rtx (operands[2]),
9301 copy_rtx (operands[0]))));
9303 return 1; /* DONE */
9307 ix86_expand_fp_movcc (rtx operands[])
9309 enum rtx_code code;
9310 rtx tmp;
9311 rtx compare_op, second_test, bypass_test;
9313 /* For SF/DFmode conditional moves based on comparisons
9314 in same mode, we may want to use SSE min/max instructions. */
9315 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9316 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9317 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9318 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9319 && (!TARGET_IEEE_FP
9320 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9321 /* We may be called from the post-reload splitter. */
9322 && (!REG_P (operands[0])
9323 || SSE_REG_P (operands[0])
9324 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9326 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9327 code = GET_CODE (operands[1]);
9329 /* See if we have (cross) match between comparison operands and
9330 conditional move operands. */
9331 if (rtx_equal_p (operands[2], op1))
9333 rtx tmp = op0;
9334 op0 = op1;
9335 op1 = tmp;
9336 code = reverse_condition_maybe_unordered (code);
9338 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9340 /* Check for min operation. */
9341 if (code == LT || code == UNLE)
9343 if (code == UNLE)
9345 rtx tmp = op0;
9346 op0 = op1;
9347 op1 = tmp;
9349 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9350 if (memory_operand (op0, VOIDmode))
9351 op0 = force_reg (GET_MODE (operands[0]), op0);
9352 if (GET_MODE (operands[0]) == SFmode)
9353 emit_insn (gen_minsf3 (operands[0], op0, op1));
9354 else
9355 emit_insn (gen_mindf3 (operands[0], op0, op1));
9356 return 1;
9358 /* Check for max operation. */
9359 if (code == GT || code == UNGE)
9361 if (code == UNGE)
9363 rtx tmp = op0;
9364 op0 = op1;
9365 op1 = tmp;
9367 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9368 if (memory_operand (op0, VOIDmode))
9369 op0 = force_reg (GET_MODE (operands[0]), op0);
9370 if (GET_MODE (operands[0]) == SFmode)
9371 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9372 else
9373 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9374 return 1;
9377 /* Manage condition to be sse_comparison_operator. In case we are
9378 in non-ieee mode, try to canonicalize the destination operand
9379 to be first in the comparison - this helps reload to avoid extra
9380 moves. */
9381 if (!sse_comparison_operator (operands[1], VOIDmode)
9382 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9384 rtx tmp = ix86_compare_op0;
9385 ix86_compare_op0 = ix86_compare_op1;
9386 ix86_compare_op1 = tmp;
9387 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9388 VOIDmode, ix86_compare_op0,
9389 ix86_compare_op1);
9391 /* Similarly try to manage result to be first operand of conditional
9392 move. We also don't support the NE comparison on SSE, so try to
9393 avoid it. */
9394 if ((rtx_equal_p (operands[0], operands[3])
9395 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9396 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9398 rtx tmp = operands[2];
9399 operands[2] = operands[3];
9400 operands[3] = tmp;
9401 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9402 (GET_CODE (operands[1])),
9403 VOIDmode, ix86_compare_op0,
9404 ix86_compare_op1);
9406 if (GET_MODE (operands[0]) == SFmode)
9407 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9408 operands[2], operands[3],
9409 ix86_compare_op0, ix86_compare_op1));
9410 else
9411 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9412 operands[2], operands[3],
9413 ix86_compare_op0, ix86_compare_op1));
9414 return 1;
9417 /* The floating point conditional move instructions don't directly
9418 support conditions resulting from a signed integer comparison. */
9420 code = GET_CODE (operands[1]);
9421 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9423 /* The floating point conditional move instructions don't directly
9424 support signed integer comparisons. */
9426 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9428 if (second_test != NULL || bypass_test != NULL)
9429 abort ();
9430 tmp = gen_reg_rtx (QImode);
9431 ix86_expand_setcc (code, tmp);
9432 code = NE;
9433 ix86_compare_op0 = tmp;
9434 ix86_compare_op1 = const0_rtx;
9435 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9437 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9439 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9440 emit_move_insn (tmp, operands[3]);
9441 operands[3] = tmp;
9443 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9445 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9446 emit_move_insn (tmp, operands[2]);
9447 operands[2] = tmp;
9450 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9452 compare_op,
9453 operands[2],
9454 operands[3])));
9455 if (bypass_test)
9456 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9457 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9458 bypass_test,
9459 operands[3],
9460 operands[0])));
9461 if (second_test)
9462 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9463 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9464 second_test,
9465 operands[2],
9466 operands[0])));
9468 return 1;
9471 /* Expand conditional increment or decrement using adb/sbb instructions.
9472 The default case using setcc followed by the conditional move can be
9473 done by generic code. */
9475 ix86_expand_int_addcc (rtx operands[])
9477 enum rtx_code code = GET_CODE (operands[1]);
9478 rtx compare_op;
9479 rtx val = const0_rtx;
9480 bool fpcmp = false;
9481 enum machine_mode mode = GET_MODE (operands[0]);
9483 if (operands[3] != const1_rtx
9484 && operands[3] != constm1_rtx)
9485 return 0;
9486 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9487 ix86_compare_op1, &compare_op))
9488 return 0;
9489 code = GET_CODE (compare_op);
9491 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9492 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9494 fpcmp = true;
9495 code = ix86_fp_compare_code_to_integer (code);
9498 if (code != LTU)
9500 val = constm1_rtx;
9501 if (fpcmp)
9502 PUT_CODE (compare_op,
9503 reverse_condition_maybe_unordered
9504 (GET_CODE (compare_op)));
9505 else
9506 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9508 PUT_MODE (compare_op, mode);
9510 /* Construct either adc or sbb insn. */
9511 if ((code == LTU) == (operands[3] == constm1_rtx))
9513 switch (GET_MODE (operands[0]))
9515 case QImode:
9516 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9517 break;
9518 case HImode:
9519 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9520 break;
9521 case SImode:
9522 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9523 break;
9524 case DImode:
9525 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9526 break;
9527 default:
9528 abort ();
9531 else
9533 switch (GET_MODE (operands[0]))
9535 case QImode:
9536 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9537 break;
9538 case HImode:
9539 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9540 break;
9541 case SImode:
9542 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9543 break;
9544 case DImode:
9545 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9546 break;
9547 default:
9548 abort ();
9551 return 1; /* DONE */
9555 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9556 works for floating pointer parameters and nonoffsetable memories.
9557 For pushes, it returns just stack offsets; the values will be saved
9558 in the right order. Maximally three parts are generated. */
9560 static int
9561 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9563 int size;
9565 if (!TARGET_64BIT)
9566 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9567 else
9568 size = (GET_MODE_SIZE (mode) + 4) / 8;
9570 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9571 abort ();
9572 if (size < 2 || size > 3)
9573 abort ();
9575 /* Optimize constant pool reference to immediates. This is used by fp
9576 moves, that force all constants to memory to allow combining. */
9577 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9579 rtx tmp = maybe_get_pool_constant (operand);
9580 if (tmp)
9581 operand = tmp;
9584 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9586 /* The only non-offsetable memories we handle are pushes. */
9587 if (! push_operand (operand, VOIDmode))
9588 abort ();
9590 operand = copy_rtx (operand);
9591 PUT_MODE (operand, Pmode);
9592 parts[0] = parts[1] = parts[2] = operand;
9594 else if (!TARGET_64BIT)
9596 if (mode == DImode)
9597 split_di (&operand, 1, &parts[0], &parts[1]);
9598 else
9600 if (REG_P (operand))
9602 if (!reload_completed)
9603 abort ();
9604 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9605 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9606 if (size == 3)
9607 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9609 else if (offsettable_memref_p (operand))
9611 operand = adjust_address (operand, SImode, 0);
9612 parts[0] = operand;
9613 parts[1] = adjust_address (operand, SImode, 4);
9614 if (size == 3)
9615 parts[2] = adjust_address (operand, SImode, 8);
9617 else if (GET_CODE (operand) == CONST_DOUBLE)
9619 REAL_VALUE_TYPE r;
9620 long l[4];
9622 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9623 switch (mode)
9625 case XFmode:
9626 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9627 parts[2] = gen_int_mode (l[2], SImode);
9628 break;
9629 case DFmode:
9630 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9631 break;
9632 default:
9633 abort ();
9635 parts[1] = gen_int_mode (l[1], SImode);
9636 parts[0] = gen_int_mode (l[0], SImode);
9638 else
9639 abort ();
9642 else
9644 if (mode == TImode)
9645 split_ti (&operand, 1, &parts[0], &parts[1]);
9646 if (mode == XFmode || mode == TFmode)
9648 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9649 if (REG_P (operand))
9651 if (!reload_completed)
9652 abort ();
9653 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9654 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9656 else if (offsettable_memref_p (operand))
9658 operand = adjust_address (operand, DImode, 0);
9659 parts[0] = operand;
9660 parts[1] = adjust_address (operand, upper_mode, 8);
9662 else if (GET_CODE (operand) == CONST_DOUBLE)
9664 REAL_VALUE_TYPE r;
9665 long l[3];
9667 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9668 real_to_target (l, &r, mode);
9669 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9670 if (HOST_BITS_PER_WIDE_INT >= 64)
9671 parts[0]
9672 = gen_int_mode
9673 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9674 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9675 DImode);
9676 else
9677 parts[0] = immed_double_const (l[0], l[1], DImode);
9678 if (upper_mode == SImode)
9679 parts[1] = gen_int_mode (l[2], SImode);
9680 else if (HOST_BITS_PER_WIDE_INT >= 64)
9681 parts[1]
9682 = gen_int_mode
9683 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9684 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9685 DImode);
9686 else
9687 parts[1] = immed_double_const (l[2], l[3], DImode);
9689 else
9690 abort ();
9694 return size;
9697 /* Emit insns to perform a move or push of DI, DF, and XF values.
9698 Return false when normal moves are needed; true when all required
9699 insns have been emitted. Operands 2-4 contain the input values
9700 int the correct order; operands 5-7 contain the output values. */
9702 void
9703 ix86_split_long_move (rtx operands[])
9705 rtx part[2][3];
9706 int nparts;
9707 int push = 0;
9708 int collisions = 0;
9709 enum machine_mode mode = GET_MODE (operands[0]);
9711 /* The DFmode expanders may ask us to move double.
9712 For 64bit target this is single move. By hiding the fact
9713 here we simplify i386.md splitters. */
9714 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9716 /* Optimize constant pool reference to immediates. This is used by
9717 fp moves, that force all constants to memory to allow combining. */
9719 if (GET_CODE (operands[1]) == MEM
9720 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9721 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9722 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9723 if (push_operand (operands[0], VOIDmode))
9725 operands[0] = copy_rtx (operands[0]);
9726 PUT_MODE (operands[0], Pmode);
9728 else
9729 operands[0] = gen_lowpart (DImode, operands[0]);
9730 operands[1] = gen_lowpart (DImode, operands[1]);
9731 emit_move_insn (operands[0], operands[1]);
9732 return;
9735 /* The only non-offsettable memory we handle is push. */
9736 if (push_operand (operands[0], VOIDmode))
9737 push = 1;
9738 else if (GET_CODE (operands[0]) == MEM
9739 && ! offsettable_memref_p (operands[0]))
9740 abort ();
9742 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9743 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9745 /* When emitting push, take care for source operands on the stack. */
9746 if (push && GET_CODE (operands[1]) == MEM
9747 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9749 if (nparts == 3)
9750 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9751 XEXP (part[1][2], 0));
9752 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9753 XEXP (part[1][1], 0));
9756 /* We need to do copy in the right order in case an address register
9757 of the source overlaps the destination. */
9758 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9760 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9761 collisions++;
9762 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9763 collisions++;
9764 if (nparts == 3
9765 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9766 collisions++;
9768 /* Collision in the middle part can be handled by reordering. */
9769 if (collisions == 1 && nparts == 3
9770 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9772 rtx tmp;
9773 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9774 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9777 /* If there are more collisions, we can't handle it by reordering.
9778 Do an lea to the last part and use only one colliding move. */
9779 else if (collisions > 1)
9781 rtx base;
9783 collisions = 1;
9785 base = part[0][nparts - 1];
9787 /* Handle the case when the last part isn't valid for lea.
9788 Happens in 64-bit mode storing the 12-byte XFmode. */
9789 if (GET_MODE (base) != Pmode)
9790 base = gen_rtx_REG (Pmode, REGNO (base));
9792 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9793 part[1][0] = replace_equiv_address (part[1][0], base);
9794 part[1][1] = replace_equiv_address (part[1][1],
9795 plus_constant (base, UNITS_PER_WORD));
9796 if (nparts == 3)
9797 part[1][2] = replace_equiv_address (part[1][2],
9798 plus_constant (base, 8));
9802 if (push)
9804 if (!TARGET_64BIT)
9806 if (nparts == 3)
9808 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9809 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
9810 emit_move_insn (part[0][2], part[1][2]);
9813 else
9815 /* In 64bit mode we don't have 32bit push available. In case this is
9816 register, it is OK - we will just use larger counterpart. We also
9817 retype memory - these comes from attempt to avoid REX prefix on
9818 moving of second half of TFmode value. */
9819 if (GET_MODE (part[1][1]) == SImode)
9821 if (GET_CODE (part[1][1]) == MEM)
9822 part[1][1] = adjust_address (part[1][1], DImode, 0);
9823 else if (REG_P (part[1][1]))
9824 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9825 else
9826 abort ();
9827 if (GET_MODE (part[1][0]) == SImode)
9828 part[1][0] = part[1][1];
9831 emit_move_insn (part[0][1], part[1][1]);
9832 emit_move_insn (part[0][0], part[1][0]);
9833 return;
9836 /* Choose correct order to not overwrite the source before it is copied. */
9837 if ((REG_P (part[0][0])
9838 && REG_P (part[1][1])
9839 && (REGNO (part[0][0]) == REGNO (part[1][1])
9840 || (nparts == 3
9841 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9842 || (collisions > 0
9843 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9845 if (nparts == 3)
9847 operands[2] = part[0][2];
9848 operands[3] = part[0][1];
9849 operands[4] = part[0][0];
9850 operands[5] = part[1][2];
9851 operands[6] = part[1][1];
9852 operands[7] = part[1][0];
9854 else
9856 operands[2] = part[0][1];
9857 operands[3] = part[0][0];
9858 operands[5] = part[1][1];
9859 operands[6] = part[1][0];
9862 else
9864 if (nparts == 3)
9866 operands[2] = part[0][0];
9867 operands[3] = part[0][1];
9868 operands[4] = part[0][2];
9869 operands[5] = part[1][0];
9870 operands[6] = part[1][1];
9871 operands[7] = part[1][2];
9873 else
9875 operands[2] = part[0][0];
9876 operands[3] = part[0][1];
9877 operands[5] = part[1][0];
9878 operands[6] = part[1][1];
9881 emit_move_insn (operands[2], operands[5]);
9882 emit_move_insn (operands[3], operands[6]);
9883 if (nparts == 3)
9884 emit_move_insn (operands[4], operands[7]);
9886 return;
9889 void
9890 ix86_split_ashldi (rtx *operands, rtx scratch)
9892 rtx low[2], high[2];
9893 int count;
9895 if (GET_CODE (operands[2]) == CONST_INT)
9897 split_di (operands, 2, low, high);
9898 count = INTVAL (operands[2]) & 63;
9900 if (count >= 32)
9902 emit_move_insn (high[0], low[1]);
9903 emit_move_insn (low[0], const0_rtx);
9905 if (count > 32)
9906 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9908 else
9910 if (!rtx_equal_p (operands[0], operands[1]))
9911 emit_move_insn (operands[0], operands[1]);
9912 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9913 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9916 else
9918 if (!rtx_equal_p (operands[0], operands[1]))
9919 emit_move_insn (operands[0], operands[1]);
9921 split_di (operands, 1, low, high);
9923 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9924 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9926 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9928 if (! no_new_pseudos)
9929 scratch = force_reg (SImode, const0_rtx);
9930 else
9931 emit_move_insn (scratch, const0_rtx);
9933 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9934 scratch));
9936 else
9937 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9941 void
9942 ix86_split_ashrdi (rtx *operands, rtx scratch)
9944 rtx low[2], high[2];
9945 int count;
9947 if (GET_CODE (operands[2]) == CONST_INT)
9949 split_di (operands, 2, low, high);
9950 count = INTVAL (operands[2]) & 63;
9952 if (count == 63)
9954 emit_move_insn (high[0], high[1]);
9955 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9956 emit_move_insn (low[0], high[0]);
9959 else if (count >= 32)
9961 emit_move_insn (low[0], high[1]);
9963 if (! reload_completed)
9964 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9965 else
9967 emit_move_insn (high[0], low[0]);
9968 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9971 if (count > 32)
9972 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9974 else
9976 if (!rtx_equal_p (operands[0], operands[1]))
9977 emit_move_insn (operands[0], operands[1]);
9978 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9979 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9982 else
9984 if (!rtx_equal_p (operands[0], operands[1]))
9985 emit_move_insn (operands[0], operands[1]);
9987 split_di (operands, 1, low, high);
9989 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9990 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9992 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9994 if (! no_new_pseudos)
9995 scratch = gen_reg_rtx (SImode);
9996 emit_move_insn (scratch, high[0]);
9997 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9998 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9999 scratch));
10001 else
10002 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10006 void
10007 ix86_split_lshrdi (rtx *operands, rtx scratch)
10009 rtx low[2], high[2];
10010 int count;
10012 if (GET_CODE (operands[2]) == CONST_INT)
10014 split_di (operands, 2, low, high);
10015 count = INTVAL (operands[2]) & 63;
10017 if (count >= 32)
10019 emit_move_insn (low[0], high[1]);
10020 emit_move_insn (high[0], const0_rtx);
10022 if (count > 32)
10023 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10025 else
10027 if (!rtx_equal_p (operands[0], operands[1]))
10028 emit_move_insn (operands[0], operands[1]);
10029 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10030 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10033 else
10035 if (!rtx_equal_p (operands[0], operands[1]))
10036 emit_move_insn (operands[0], operands[1]);
10038 split_di (operands, 1, low, high);
10040 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10041 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10043 /* Heh. By reversing the arguments, we can reuse this pattern. */
10044 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10046 if (! no_new_pseudos)
10047 scratch = force_reg (SImode, const0_rtx);
10048 else
10049 emit_move_insn (scratch, const0_rtx);
10051 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10052 scratch));
10054 else
10055 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10059 /* Helper function for the string operations below. Dest VARIABLE whether
10060 it is aligned to VALUE bytes. If true, jump to the label. */
10061 static rtx
10062 ix86_expand_aligntest (rtx variable, int value)
10064 rtx label = gen_label_rtx ();
10065 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10066 if (GET_MODE (variable) == DImode)
10067 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10068 else
10069 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10070 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10071 1, label);
10072 return label;
10075 /* Adjust COUNTER by the VALUE. */
10076 static void
10077 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10079 if (GET_MODE (countreg) == DImode)
10080 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10081 else
10082 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10085 /* Zero extend possibly SImode EXP to Pmode register. */
10087 ix86_zero_extend_to_Pmode (rtx exp)
10089 rtx r;
10090 if (GET_MODE (exp) == VOIDmode)
10091 return force_reg (Pmode, exp);
10092 if (GET_MODE (exp) == Pmode)
10093 return copy_to_mode_reg (Pmode, exp);
10094 r = gen_reg_rtx (Pmode);
10095 emit_insn (gen_zero_extendsidi2 (r, exp));
10096 return r;
10099 /* Expand string move (memcpy) operation. Use i386 string operations when
10100 profitable. expand_clrmem contains similar code. */
10102 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10104 rtx srcreg, destreg, countreg, srcexp, destexp;
10105 enum machine_mode counter_mode;
10106 HOST_WIDE_INT align = 0;
10107 unsigned HOST_WIDE_INT count = 0;
10109 if (GET_CODE (align_exp) == CONST_INT)
10110 align = INTVAL (align_exp);
10112 /* Can't use any of this if the user has appropriated esi or edi. */
10113 if (global_regs[4] || global_regs[5])
10114 return 0;
10116 /* This simple hack avoids all inlining code and simplifies code below. */
10117 if (!TARGET_ALIGN_STRINGOPS)
10118 align = 64;
10120 if (GET_CODE (count_exp) == CONST_INT)
10122 count = INTVAL (count_exp);
10123 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10124 return 0;
10127 /* Figure out proper mode for counter. For 32bits it is always SImode,
10128 for 64bits use SImode when possible, otherwise DImode.
10129 Set count to number of bytes copied when known at compile time. */
10130 if (!TARGET_64BIT
10131 || GET_MODE (count_exp) == SImode
10132 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10133 counter_mode = SImode;
10134 else
10135 counter_mode = DImode;
10137 if (counter_mode != SImode && counter_mode != DImode)
10138 abort ();
10140 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10141 if (destreg != XEXP (dst, 0))
10142 dst = replace_equiv_address_nv (dst, destreg);
10143 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10144 if (srcreg != XEXP (src, 0))
10145 src = replace_equiv_address_nv (src, srcreg);
10147 /* When optimizing for size emit simple rep ; movsb instruction for
10148 counts not divisible by 4. */
10150 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10152 emit_insn (gen_cld ());
10153 countreg = ix86_zero_extend_to_Pmode (count_exp);
10154 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10155 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10156 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10157 destexp, srcexp));
10160 /* For constant aligned (or small unaligned) copies use rep movsl
10161 followed by code copying the rest. For PentiumPro ensure 8 byte
10162 alignment to allow rep movsl acceleration. */
10164 else if (count != 0
10165 && (align >= 8
10166 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10167 || optimize_size || count < (unsigned int) 64))
10169 unsigned HOST_WIDE_INT offset = 0;
10170 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10171 rtx srcmem, dstmem;
10173 emit_insn (gen_cld ());
10174 if (count & ~(size - 1))
10176 countreg = copy_to_mode_reg (counter_mode,
10177 GEN_INT ((count >> (size == 4 ? 2 : 3))
10178 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10179 countreg = ix86_zero_extend_to_Pmode (countreg);
10181 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10182 GEN_INT (size == 4 ? 2 : 3));
10183 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10184 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10186 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10187 countreg, destexp, srcexp));
10188 offset = count & ~(size - 1);
10190 if (size == 8 && (count & 0x04))
10192 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10193 offset);
10194 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10195 offset);
10196 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10197 offset += 4;
10199 if (count & 0x02)
10201 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10202 offset);
10203 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10204 offset);
10205 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10206 offset += 2;
10208 if (count & 0x01)
10210 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10211 offset);
10212 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10213 offset);
10214 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10217 /* The generic code based on the glibc implementation:
10218 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10219 allowing accelerated copying there)
10220 - copy the data using rep movsl
10221 - copy the rest. */
10222 else
10224 rtx countreg2;
10225 rtx label = NULL;
10226 rtx srcmem, dstmem;
10227 int desired_alignment = (TARGET_PENTIUMPRO
10228 && (count == 0 || count >= (unsigned int) 260)
10229 ? 8 : UNITS_PER_WORD);
10230 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10231 dst = change_address (dst, BLKmode, destreg);
10232 src = change_address (src, BLKmode, srcreg);
10234 /* In case we don't know anything about the alignment, default to
10235 library version, since it is usually equally fast and result in
10236 shorter code.
10238 Also emit call when we know that the count is large and call overhead
10239 will not be important. */
10240 if (!TARGET_INLINE_ALL_STRINGOPS
10241 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10242 return 0;
10244 if (TARGET_SINGLE_STRINGOP)
10245 emit_insn (gen_cld ());
10247 countreg2 = gen_reg_rtx (Pmode);
10248 countreg = copy_to_mode_reg (counter_mode, count_exp);
10250 /* We don't use loops to align destination and to copy parts smaller
10251 than 4 bytes, because gcc is able to optimize such code better (in
10252 the case the destination or the count really is aligned, gcc is often
10253 able to predict the branches) and also it is friendlier to the
10254 hardware branch prediction.
10256 Using loops is beneficial for generic case, because we can
10257 handle small counts using the loops. Many CPUs (such as Athlon)
10258 have large REP prefix setup costs.
10260 This is quite costly. Maybe we can revisit this decision later or
10261 add some customizability to this code. */
10263 if (count == 0 && align < desired_alignment)
10265 label = gen_label_rtx ();
10266 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10267 LEU, 0, counter_mode, 1, label);
10269 if (align <= 1)
10271 rtx label = ix86_expand_aligntest (destreg, 1);
10272 srcmem = change_address (src, QImode, srcreg);
10273 dstmem = change_address (dst, QImode, destreg);
10274 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10275 ix86_adjust_counter (countreg, 1);
10276 emit_label (label);
10277 LABEL_NUSES (label) = 1;
10279 if (align <= 2)
10281 rtx label = ix86_expand_aligntest (destreg, 2);
10282 srcmem = change_address (src, HImode, srcreg);
10283 dstmem = change_address (dst, HImode, destreg);
10284 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10285 ix86_adjust_counter (countreg, 2);
10286 emit_label (label);
10287 LABEL_NUSES (label) = 1;
10289 if (align <= 4 && desired_alignment > 4)
10291 rtx label = ix86_expand_aligntest (destreg, 4);
10292 srcmem = change_address (src, SImode, srcreg);
10293 dstmem = change_address (dst, SImode, destreg);
10294 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10295 ix86_adjust_counter (countreg, 4);
10296 emit_label (label);
10297 LABEL_NUSES (label) = 1;
10300 if (label && desired_alignment > 4 && !TARGET_64BIT)
10302 emit_label (label);
10303 LABEL_NUSES (label) = 1;
10304 label = NULL_RTX;
10306 if (!TARGET_SINGLE_STRINGOP)
10307 emit_insn (gen_cld ());
10308 if (TARGET_64BIT)
10310 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10311 GEN_INT (3)));
10312 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10314 else
10316 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10317 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10319 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10320 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10321 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10322 countreg2, destexp, srcexp));
10324 if (label)
10326 emit_label (label);
10327 LABEL_NUSES (label) = 1;
10329 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10331 srcmem = change_address (src, SImode, srcreg);
10332 dstmem = change_address (dst, SImode, destreg);
10333 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10335 if ((align <= 4 || count == 0) && TARGET_64BIT)
10337 rtx label = ix86_expand_aligntest (countreg, 4);
10338 srcmem = change_address (src, SImode, srcreg);
10339 dstmem = change_address (dst, SImode, destreg);
10340 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10341 emit_label (label);
10342 LABEL_NUSES (label) = 1;
10344 if (align > 2 && count != 0 && (count & 2))
10346 srcmem = change_address (src, HImode, srcreg);
10347 dstmem = change_address (dst, HImode, destreg);
10348 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10350 if (align <= 2 || count == 0)
10352 rtx label = ix86_expand_aligntest (countreg, 2);
10353 srcmem = change_address (src, HImode, srcreg);
10354 dstmem = change_address (dst, HImode, destreg);
10355 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10356 emit_label (label);
10357 LABEL_NUSES (label) = 1;
10359 if (align > 1 && count != 0 && (count & 1))
10361 srcmem = change_address (src, QImode, srcreg);
10362 dstmem = change_address (dst, QImode, destreg);
10363 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10365 if (align <= 1 || count == 0)
10367 rtx label = ix86_expand_aligntest (countreg, 1);
10368 srcmem = change_address (src, QImode, srcreg);
10369 dstmem = change_address (dst, QImode, destreg);
10370 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10371 emit_label (label);
10372 LABEL_NUSES (label) = 1;
10376 return 1;
10379 /* Expand string clear operation (bzero). Use i386 string operations when
10380 profitable. expand_movmem contains similar code. */
10382 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10384 rtx destreg, zeroreg, countreg, destexp;
10385 enum machine_mode counter_mode;
10386 HOST_WIDE_INT align = 0;
10387 unsigned HOST_WIDE_INT count = 0;
10389 if (GET_CODE (align_exp) == CONST_INT)
10390 align = INTVAL (align_exp);
10392 /* Can't use any of this if the user has appropriated esi. */
10393 if (global_regs[4])
10394 return 0;
10396 /* This simple hack avoids all inlining code and simplifies code below. */
10397 if (!TARGET_ALIGN_STRINGOPS)
10398 align = 32;
10400 if (GET_CODE (count_exp) == CONST_INT)
10402 count = INTVAL (count_exp);
10403 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10404 return 0;
10406 /* Figure out proper mode for counter. For 32bits it is always SImode,
10407 for 64bits use SImode when possible, otherwise DImode.
10408 Set count to number of bytes copied when known at compile time. */
10409 if (!TARGET_64BIT
10410 || GET_MODE (count_exp) == SImode
10411 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10412 counter_mode = SImode;
10413 else
10414 counter_mode = DImode;
10416 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10417 if (destreg != XEXP (dst, 0))
10418 dst = replace_equiv_address_nv (dst, destreg);
10421 /* When optimizing for size emit simple rep ; movsb instruction for
10422 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10423 sequence is 7 bytes long, so if optimizing for size and count is
10424 small enough that some stosl, stosw and stosb instructions without
10425 rep are shorter, fall back into the next if. */
10427 if ((!optimize || optimize_size)
10428 && (count == 0
10429 || ((count & 0x03)
10430 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10432 emit_insn (gen_cld ());
10434 countreg = ix86_zero_extend_to_Pmode (count_exp);
10435 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10436 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10437 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10439 else if (count != 0
10440 && (align >= 8
10441 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10442 || optimize_size || count < (unsigned int) 64))
10444 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10445 unsigned HOST_WIDE_INT offset = 0;
10447 emit_insn (gen_cld ());
10449 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10450 if (count & ~(size - 1))
10452 unsigned HOST_WIDE_INT repcount;
10453 unsigned int max_nonrep;
10455 repcount = count >> (size == 4 ? 2 : 3);
10456 if (!TARGET_64BIT)
10457 repcount &= 0x3fffffff;
10459 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10460 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10461 bytes. In both cases the latter seems to be faster for small
10462 values of N. */
10463 max_nonrep = size == 4 ? 7 : 4;
10464 if (!optimize_size)
10465 switch (ix86_tune)
10467 case PROCESSOR_PENTIUM4:
10468 case PROCESSOR_NOCONA:
10469 max_nonrep = 3;
10470 break;
10471 default:
10472 break;
10475 if (repcount <= max_nonrep)
10476 while (repcount-- > 0)
10478 rtx mem = adjust_automodify_address_nv (dst,
10479 GET_MODE (zeroreg),
10480 destreg, offset);
10481 emit_insn (gen_strset (destreg, mem, zeroreg));
10482 offset += size;
10484 else
10486 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10487 countreg = ix86_zero_extend_to_Pmode (countreg);
10488 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10489 GEN_INT (size == 4 ? 2 : 3));
10490 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10491 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10492 destexp));
10493 offset = count & ~(size - 1);
10496 if (size == 8 && (count & 0x04))
10498 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10499 offset);
10500 emit_insn (gen_strset (destreg, mem,
10501 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10502 offset += 4;
10504 if (count & 0x02)
10506 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10507 offset);
10508 emit_insn (gen_strset (destreg, mem,
10509 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10510 offset += 2;
10512 if (count & 0x01)
10514 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10515 offset);
10516 emit_insn (gen_strset (destreg, mem,
10517 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10520 else
10522 rtx countreg2;
10523 rtx label = NULL;
10524 /* Compute desired alignment of the string operation. */
10525 int desired_alignment = (TARGET_PENTIUMPRO
10526 && (count == 0 || count >= (unsigned int) 260)
10527 ? 8 : UNITS_PER_WORD);
10529 /* In case we don't know anything about the alignment, default to
10530 library version, since it is usually equally fast and result in
10531 shorter code.
10533 Also emit call when we know that the count is large and call overhead
10534 will not be important. */
10535 if (!TARGET_INLINE_ALL_STRINGOPS
10536 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10537 return 0;
10539 if (TARGET_SINGLE_STRINGOP)
10540 emit_insn (gen_cld ());
10542 countreg2 = gen_reg_rtx (Pmode);
10543 countreg = copy_to_mode_reg (counter_mode, count_exp);
10544 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10545 /* Get rid of MEM_OFFSET, it won't be accurate. */
10546 dst = change_address (dst, BLKmode, destreg);
10548 if (count == 0 && align < desired_alignment)
10550 label = gen_label_rtx ();
10551 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10552 LEU, 0, counter_mode, 1, label);
10554 if (align <= 1)
10556 rtx label = ix86_expand_aligntest (destreg, 1);
10557 emit_insn (gen_strset (destreg, dst,
10558 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10559 ix86_adjust_counter (countreg, 1);
10560 emit_label (label);
10561 LABEL_NUSES (label) = 1;
10563 if (align <= 2)
10565 rtx label = ix86_expand_aligntest (destreg, 2);
10566 emit_insn (gen_strset (destreg, dst,
10567 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10568 ix86_adjust_counter (countreg, 2);
10569 emit_label (label);
10570 LABEL_NUSES (label) = 1;
10572 if (align <= 4 && desired_alignment > 4)
10574 rtx label = ix86_expand_aligntest (destreg, 4);
10575 emit_insn (gen_strset (destreg, dst,
10576 (TARGET_64BIT
10577 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10578 : zeroreg)));
10579 ix86_adjust_counter (countreg, 4);
10580 emit_label (label);
10581 LABEL_NUSES (label) = 1;
10584 if (label && desired_alignment > 4 && !TARGET_64BIT)
10586 emit_label (label);
10587 LABEL_NUSES (label) = 1;
10588 label = NULL_RTX;
10591 if (!TARGET_SINGLE_STRINGOP)
10592 emit_insn (gen_cld ());
10593 if (TARGET_64BIT)
10595 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10596 GEN_INT (3)));
10597 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10599 else
10601 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10602 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10604 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10605 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10607 if (label)
10609 emit_label (label);
10610 LABEL_NUSES (label) = 1;
10613 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10614 emit_insn (gen_strset (destreg, dst,
10615 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10616 if (TARGET_64BIT && (align <= 4 || count == 0))
10618 rtx label = ix86_expand_aligntest (countreg, 4);
10619 emit_insn (gen_strset (destreg, dst,
10620 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10621 emit_label (label);
10622 LABEL_NUSES (label) = 1;
10624 if (align > 2 && count != 0 && (count & 2))
10625 emit_insn (gen_strset (destreg, dst,
10626 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10627 if (align <= 2 || count == 0)
10629 rtx label = ix86_expand_aligntest (countreg, 2);
10630 emit_insn (gen_strset (destreg, dst,
10631 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10632 emit_label (label);
10633 LABEL_NUSES (label) = 1;
10635 if (align > 1 && count != 0 && (count & 1))
10636 emit_insn (gen_strset (destreg, dst,
10637 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10638 if (align <= 1 || count == 0)
10640 rtx label = ix86_expand_aligntest (countreg, 1);
10641 emit_insn (gen_strset (destreg, dst,
10642 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10643 emit_label (label);
10644 LABEL_NUSES (label) = 1;
10647 return 1;
10650 /* Expand strlen. */
10652 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10654 rtx addr, scratch1, scratch2, scratch3, scratch4;
10656 /* The generic case of strlen expander is long. Avoid it's
10657 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10659 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10660 && !TARGET_INLINE_ALL_STRINGOPS
10661 && !optimize_size
10662 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10663 return 0;
10665 addr = force_reg (Pmode, XEXP (src, 0));
10666 scratch1 = gen_reg_rtx (Pmode);
10668 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10669 && !optimize_size)
10671 /* Well it seems that some optimizer does not combine a call like
10672 foo(strlen(bar), strlen(bar));
10673 when the move and the subtraction is done here. It does calculate
10674 the length just once when these instructions are done inside of
10675 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10676 often used and I use one fewer register for the lifetime of
10677 output_strlen_unroll() this is better. */
10679 emit_move_insn (out, addr);
10681 ix86_expand_strlensi_unroll_1 (out, src, align);
10683 /* strlensi_unroll_1 returns the address of the zero at the end of
10684 the string, like memchr(), so compute the length by subtracting
10685 the start address. */
10686 if (TARGET_64BIT)
10687 emit_insn (gen_subdi3 (out, out, addr));
10688 else
10689 emit_insn (gen_subsi3 (out, out, addr));
10691 else
10693 rtx unspec;
10694 scratch2 = gen_reg_rtx (Pmode);
10695 scratch3 = gen_reg_rtx (Pmode);
10696 scratch4 = force_reg (Pmode, constm1_rtx);
10698 emit_move_insn (scratch3, addr);
10699 eoschar = force_reg (QImode, eoschar);
10701 emit_insn (gen_cld ());
10702 src = replace_equiv_address_nv (src, scratch3);
10704 /* If .md starts supporting :P, this can be done in .md. */
10705 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10706 scratch4), UNSPEC_SCAS);
10707 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10708 if (TARGET_64BIT)
10710 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10711 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10713 else
10715 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10716 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10719 return 1;
10722 /* Expand the appropriate insns for doing strlen if not just doing
10723 repnz; scasb
10725 out = result, initialized with the start address
10726 align_rtx = alignment of the address.
10727 scratch = scratch register, initialized with the startaddress when
10728 not aligned, otherwise undefined
10730 This is just the body. It needs the initializations mentioned above and
10731 some address computing at the end. These things are done in i386.md. */
10733 static void
10734 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
10736 int align;
10737 rtx tmp;
10738 rtx align_2_label = NULL_RTX;
10739 rtx align_3_label = NULL_RTX;
10740 rtx align_4_label = gen_label_rtx ();
10741 rtx end_0_label = gen_label_rtx ();
10742 rtx mem;
10743 rtx tmpreg = gen_reg_rtx (SImode);
10744 rtx scratch = gen_reg_rtx (SImode);
10745 rtx cmp;
10747 align = 0;
10748 if (GET_CODE (align_rtx) == CONST_INT)
10749 align = INTVAL (align_rtx);
10751 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10753 /* Is there a known alignment and is it less than 4? */
10754 if (align < 4)
10756 rtx scratch1 = gen_reg_rtx (Pmode);
10757 emit_move_insn (scratch1, out);
10758 /* Is there a known alignment and is it not 2? */
10759 if (align != 2)
10761 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10762 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10764 /* Leave just the 3 lower bits. */
10765 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10766 NULL_RTX, 0, OPTAB_WIDEN);
10768 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10769 Pmode, 1, align_4_label);
10770 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
10771 Pmode, 1, align_2_label);
10772 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
10773 Pmode, 1, align_3_label);
10775 else
10777 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10778 check if is aligned to 4 - byte. */
10780 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
10781 NULL_RTX, 0, OPTAB_WIDEN);
10783 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10784 Pmode, 1, align_4_label);
10787 mem = change_address (src, QImode, out);
10789 /* Now compare the bytes. */
10791 /* Compare the first n unaligned byte on a byte per byte basis. */
10792 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10793 QImode, 1, end_0_label);
10795 /* Increment the address. */
10796 if (TARGET_64BIT)
10797 emit_insn (gen_adddi3 (out, out, const1_rtx));
10798 else
10799 emit_insn (gen_addsi3 (out, out, const1_rtx));
10801 /* Not needed with an alignment of 2 */
10802 if (align != 2)
10804 emit_label (align_2_label);
10806 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10807 end_0_label);
10809 if (TARGET_64BIT)
10810 emit_insn (gen_adddi3 (out, out, const1_rtx));
10811 else
10812 emit_insn (gen_addsi3 (out, out, const1_rtx));
10814 emit_label (align_3_label);
10817 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10818 end_0_label);
10820 if (TARGET_64BIT)
10821 emit_insn (gen_adddi3 (out, out, const1_rtx));
10822 else
10823 emit_insn (gen_addsi3 (out, out, const1_rtx));
10826 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10827 align this loop. It gives only huge programs, but does not help to
10828 speed up. */
10829 emit_label (align_4_label);
10831 mem = change_address (src, SImode, out);
10832 emit_move_insn (scratch, mem);
10833 if (TARGET_64BIT)
10834 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10835 else
10836 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10838 /* This formula yields a nonzero result iff one of the bytes is zero.
10839 This saves three branches inside loop and many cycles. */
10841 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10842 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10843 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10844 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10845 gen_int_mode (0x80808080, SImode)));
10846 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10847 align_4_label);
10849 if (TARGET_CMOVE)
10851 rtx reg = gen_reg_rtx (SImode);
10852 rtx reg2 = gen_reg_rtx (Pmode);
10853 emit_move_insn (reg, tmpreg);
10854 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10856 /* If zero is not in the first two bytes, move two bytes forward. */
10857 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10858 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10859 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10860 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10861 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10862 reg,
10863 tmpreg)));
10864 /* Emit lea manually to avoid clobbering of flags. */
10865 emit_insn (gen_rtx_SET (SImode, reg2,
10866 gen_rtx_PLUS (Pmode, out, const2_rtx)));
10868 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10869 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10870 emit_insn (gen_rtx_SET (VOIDmode, out,
10871 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10872 reg2,
10873 out)));
10876 else
10878 rtx end_2_label = gen_label_rtx ();
10879 /* Is zero in the first two bytes? */
10881 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10882 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10883 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10884 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10885 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10886 pc_rtx);
10887 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10888 JUMP_LABEL (tmp) = end_2_label;
10890 /* Not in the first two. Move two bytes forward. */
10891 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10892 if (TARGET_64BIT)
10893 emit_insn (gen_adddi3 (out, out, const2_rtx));
10894 else
10895 emit_insn (gen_addsi3 (out, out, const2_rtx));
10897 emit_label (end_2_label);
10901 /* Avoid branch in fixing the byte. */
10902 tmpreg = gen_lowpart (QImode, tmpreg);
10903 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10904 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
10905 if (TARGET_64BIT)
10906 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
10907 else
10908 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
10910 emit_label (end_0_label);
10913 void
10914 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
10915 rtx callarg2 ATTRIBUTE_UNUSED,
10916 rtx pop, int sibcall)
10918 rtx use = NULL, call;
10920 if (pop == const0_rtx)
10921 pop = NULL;
10922 if (TARGET_64BIT && pop)
10923 abort ();
10925 #if TARGET_MACHO
10926 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10927 fnaddr = machopic_indirect_call_target (fnaddr);
10928 #else
10929 /* Static functions and indirect calls don't need the pic register. */
10930 if (! TARGET_64BIT && flag_pic
10931 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10932 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
10933 use_reg (&use, pic_offset_table_rtx);
10935 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10937 rtx al = gen_rtx_REG (QImode, 0);
10938 emit_move_insn (al, callarg2);
10939 use_reg (&use, al);
10941 #endif /* TARGET_MACHO */
10943 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10945 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10946 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10948 if (sibcall && TARGET_64BIT
10949 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
10951 rtx addr;
10952 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10953 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
10954 emit_move_insn (fnaddr, addr);
10955 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10958 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10959 if (retval)
10960 call = gen_rtx_SET (VOIDmode, retval, call);
10961 if (pop)
10963 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10964 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10965 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10968 call = emit_call_insn (call);
10969 if (use)
10970 CALL_INSN_FUNCTION_USAGE (call) = use;
10974 /* Clear stack slot assignments remembered from previous functions.
10975 This is called from INIT_EXPANDERS once before RTL is emitted for each
10976 function. */
10978 static struct machine_function *
10979 ix86_init_machine_status (void)
10981 struct machine_function *f;
10983 f = ggc_alloc_cleared (sizeof (struct machine_function));
10984 f->use_fast_prologue_epilogue_nregs = -1;
10986 return f;
10989 /* Return a MEM corresponding to a stack slot with mode MODE.
10990 Allocate a new slot if necessary.
10992 The RTL for a function can have several slots available: N is
10993 which slot to use. */
10996 assign_386_stack_local (enum machine_mode mode, int n)
10998 struct stack_local_entry *s;
11000 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11001 abort ();
11003 for (s = ix86_stack_locals; s; s = s->next)
11004 if (s->mode == mode && s->n == n)
11005 return s->rtl;
11007 s = (struct stack_local_entry *)
11008 ggc_alloc (sizeof (struct stack_local_entry));
11009 s->n = n;
11010 s->mode = mode;
11011 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11013 s->next = ix86_stack_locals;
11014 ix86_stack_locals = s;
11015 return s->rtl;
11018 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11020 static GTY(()) rtx ix86_tls_symbol;
11022 ix86_tls_get_addr (void)
11025 if (!ix86_tls_symbol)
11027 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11028 (TARGET_GNU_TLS && !TARGET_64BIT)
11029 ? "___tls_get_addr"
11030 : "__tls_get_addr");
11033 return ix86_tls_symbol;
11036 /* Calculate the length of the memory address in the instruction
11037 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11040 memory_address_length (rtx addr)
11042 struct ix86_address parts;
11043 rtx base, index, disp;
11044 int len;
11046 if (GET_CODE (addr) == PRE_DEC
11047 || GET_CODE (addr) == POST_INC
11048 || GET_CODE (addr) == PRE_MODIFY
11049 || GET_CODE (addr) == POST_MODIFY)
11050 return 0;
11052 if (! ix86_decompose_address (addr, &parts))
11053 abort ();
11055 base = parts.base;
11056 index = parts.index;
11057 disp = parts.disp;
11058 len = 0;
11060 /* Rule of thumb:
11061 - esp as the base always wants an index,
11062 - ebp as the base always wants a displacement. */
11064 /* Register Indirect. */
11065 if (base && !index && !disp)
11067 /* esp (for its index) and ebp (for its displacement) need
11068 the two-byte modrm form. */
11069 if (addr == stack_pointer_rtx
11070 || addr == arg_pointer_rtx
11071 || addr == frame_pointer_rtx
11072 || addr == hard_frame_pointer_rtx)
11073 len = 1;
11076 /* Direct Addressing. */
11077 else if (disp && !base && !index)
11078 len = 4;
11080 else
11082 /* Find the length of the displacement constant. */
11083 if (disp)
11085 if (GET_CODE (disp) == CONST_INT
11086 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11087 && base)
11088 len = 1;
11089 else
11090 len = 4;
11092 /* ebp always wants a displacement. */
11093 else if (base == hard_frame_pointer_rtx)
11094 len = 1;
11096 /* An index requires the two-byte modrm form.... */
11097 if (index
11098 /* ...like esp, which always wants an index. */
11099 || base == stack_pointer_rtx
11100 || base == arg_pointer_rtx
11101 || base == frame_pointer_rtx)
11102 len += 1;
11105 return len;
11108 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11109 is set, expect that insn have 8bit immediate alternative. */
11111 ix86_attr_length_immediate_default (rtx insn, int shortform)
11113 int len = 0;
11114 int i;
11115 extract_insn_cached (insn);
11116 for (i = recog_data.n_operands - 1; i >= 0; --i)
11117 if (CONSTANT_P (recog_data.operand[i]))
11119 if (len)
11120 abort ();
11121 if (shortform
11122 && GET_CODE (recog_data.operand[i]) == CONST_INT
11123 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11124 len = 1;
11125 else
11127 switch (get_attr_mode (insn))
11129 case MODE_QI:
11130 len+=1;
11131 break;
11132 case MODE_HI:
11133 len+=2;
11134 break;
11135 case MODE_SI:
11136 len+=4;
11137 break;
11138 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11139 case MODE_DI:
11140 len+=4;
11141 break;
11142 default:
11143 fatal_insn ("unknown insn mode", insn);
11147 return len;
11149 /* Compute default value for "length_address" attribute. */
11151 ix86_attr_length_address_default (rtx insn)
11153 int i;
11155 if (get_attr_type (insn) == TYPE_LEA)
11157 rtx set = PATTERN (insn);
11158 if (GET_CODE (set) == SET)
11160 else if (GET_CODE (set) == PARALLEL
11161 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11162 set = XVECEXP (set, 0, 0);
11163 else
11165 #ifdef ENABLE_CHECKING
11166 abort ();
11167 #endif
11168 return 0;
11171 return memory_address_length (SET_SRC (set));
11174 extract_insn_cached (insn);
11175 for (i = recog_data.n_operands - 1; i >= 0; --i)
11176 if (GET_CODE (recog_data.operand[i]) == MEM)
11178 return memory_address_length (XEXP (recog_data.operand[i], 0));
11179 break;
11181 return 0;
11184 /* Return the maximum number of instructions a cpu can issue. */
11186 static int
11187 ix86_issue_rate (void)
11189 switch (ix86_tune)
11191 case PROCESSOR_PENTIUM:
11192 case PROCESSOR_K6:
11193 return 2;
11195 case PROCESSOR_PENTIUMPRO:
11196 case PROCESSOR_PENTIUM4:
11197 case PROCESSOR_ATHLON:
11198 case PROCESSOR_K8:
11199 case PROCESSOR_NOCONA:
11200 return 3;
11202 default:
11203 return 1;
11207 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11208 by DEP_INSN and nothing set by DEP_INSN. */
11210 static int
11211 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11213 rtx set, set2;
11215 /* Simplify the test for uninteresting insns. */
11216 if (insn_type != TYPE_SETCC
11217 && insn_type != TYPE_ICMOV
11218 && insn_type != TYPE_FCMOV
11219 && insn_type != TYPE_IBR)
11220 return 0;
11222 if ((set = single_set (dep_insn)) != 0)
11224 set = SET_DEST (set);
11225 set2 = NULL_RTX;
11227 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11228 && XVECLEN (PATTERN (dep_insn), 0) == 2
11229 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11230 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11232 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11233 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11235 else
11236 return 0;
11238 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11239 return 0;
11241 /* This test is true if the dependent insn reads the flags but
11242 not any other potentially set register. */
11243 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11244 return 0;
11246 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11247 return 0;
11249 return 1;
11252 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11253 address with operands set by DEP_INSN. */
11255 static int
11256 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11258 rtx addr;
11260 if (insn_type == TYPE_LEA
11261 && TARGET_PENTIUM)
11263 addr = PATTERN (insn);
11264 if (GET_CODE (addr) == SET)
11266 else if (GET_CODE (addr) == PARALLEL
11267 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11268 addr = XVECEXP (addr, 0, 0);
11269 else
11270 abort ();
11271 addr = SET_SRC (addr);
11273 else
11275 int i;
11276 extract_insn_cached (insn);
11277 for (i = recog_data.n_operands - 1; i >= 0; --i)
11278 if (GET_CODE (recog_data.operand[i]) == MEM)
11280 addr = XEXP (recog_data.operand[i], 0);
11281 goto found;
11283 return 0;
11284 found:;
11287 return modified_in_p (addr, dep_insn);
11290 static int
11291 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11293 enum attr_type insn_type, dep_insn_type;
11294 enum attr_memory memory;
11295 rtx set, set2;
11296 int dep_insn_code_number;
11298 /* Anti and output dependencies have zero cost on all CPUs. */
11299 if (REG_NOTE_KIND (link) != 0)
11300 return 0;
11302 dep_insn_code_number = recog_memoized (dep_insn);
11304 /* If we can't recognize the insns, we can't really do anything. */
11305 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11306 return cost;
11308 insn_type = get_attr_type (insn);
11309 dep_insn_type = get_attr_type (dep_insn);
11311 switch (ix86_tune)
11313 case PROCESSOR_PENTIUM:
11314 /* Address Generation Interlock adds a cycle of latency. */
11315 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11316 cost += 1;
11318 /* ??? Compares pair with jump/setcc. */
11319 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11320 cost = 0;
11322 /* Floating point stores require value to be ready one cycle earlier. */
11323 if (insn_type == TYPE_FMOV
11324 && get_attr_memory (insn) == MEMORY_STORE
11325 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11326 cost += 1;
11327 break;
11329 case PROCESSOR_PENTIUMPRO:
11330 memory = get_attr_memory (insn);
11332 /* INT->FP conversion is expensive. */
11333 if (get_attr_fp_int_src (dep_insn))
11334 cost += 5;
11336 /* There is one cycle extra latency between an FP op and a store. */
11337 if (insn_type == TYPE_FMOV
11338 && (set = single_set (dep_insn)) != NULL_RTX
11339 && (set2 = single_set (insn)) != NULL_RTX
11340 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11341 && GET_CODE (SET_DEST (set2)) == MEM)
11342 cost += 1;
11344 /* Show ability of reorder buffer to hide latency of load by executing
11345 in parallel with previous instruction in case
11346 previous instruction is not needed to compute the address. */
11347 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11348 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11350 /* Claim moves to take one cycle, as core can issue one load
11351 at time and the next load can start cycle later. */
11352 if (dep_insn_type == TYPE_IMOV
11353 || dep_insn_type == TYPE_FMOV)
11354 cost = 1;
11355 else if (cost > 1)
11356 cost--;
11358 break;
11360 case PROCESSOR_K6:
11361 memory = get_attr_memory (insn);
11363 /* The esp dependency is resolved before the instruction is really
11364 finished. */
11365 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11366 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11367 return 1;
11369 /* INT->FP conversion is expensive. */
11370 if (get_attr_fp_int_src (dep_insn))
11371 cost += 5;
11373 /* Show ability of reorder buffer to hide latency of load by executing
11374 in parallel with previous instruction in case
11375 previous instruction is not needed to compute the address. */
11376 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11377 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11379 /* Claim moves to take one cycle, as core can issue one load
11380 at time and the next load can start cycle later. */
11381 if (dep_insn_type == TYPE_IMOV
11382 || dep_insn_type == TYPE_FMOV)
11383 cost = 1;
11384 else if (cost > 2)
11385 cost -= 2;
11386 else
11387 cost = 1;
11389 break;
11391 case PROCESSOR_ATHLON:
11392 case PROCESSOR_K8:
11393 memory = get_attr_memory (insn);
11395 /* Show ability of reorder buffer to hide latency of load by executing
11396 in parallel with previous instruction in case
11397 previous instruction is not needed to compute the address. */
11398 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11399 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11401 enum attr_unit unit = get_attr_unit (insn);
11402 int loadcost = 3;
11404 /* Because of the difference between the length of integer and
11405 floating unit pipeline preparation stages, the memory operands
11406 for floating point are cheaper.
11408 ??? For Athlon it the difference is most probably 2. */
11409 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11410 loadcost = 3;
11411 else
11412 loadcost = TARGET_ATHLON ? 2 : 0;
11414 if (cost >= loadcost)
11415 cost -= loadcost;
11416 else
11417 cost = 0;
11420 default:
11421 break;
11424 return cost;
11427 /* How many alternative schedules to try. This should be as wide as the
11428 scheduling freedom in the DFA, but no wider. Making this value too
11429 large results extra work for the scheduler. */
11431 static int
11432 ia32_multipass_dfa_lookahead (void)
11434 if (ix86_tune == PROCESSOR_PENTIUM)
11435 return 2;
11437 if (ix86_tune == PROCESSOR_PENTIUMPRO
11438 || ix86_tune == PROCESSOR_K6)
11439 return 1;
11441 else
11442 return 0;
11446 /* Compute the alignment given to a constant that is being placed in memory.
11447 EXP is the constant and ALIGN is the alignment that the object would
11448 ordinarily have.
11449 The value of this function is used instead of that alignment to align
11450 the object. */
11453 ix86_constant_alignment (tree exp, int align)
11455 if (TREE_CODE (exp) == REAL_CST)
11457 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11458 return 64;
11459 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11460 return 128;
11462 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11463 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11464 return BITS_PER_WORD;
11466 return align;
11469 /* Compute the alignment for a static variable.
11470 TYPE is the data type, and ALIGN is the alignment that
11471 the object would ordinarily have. The value of this function is used
11472 instead of that alignment to align the object. */
11475 ix86_data_alignment (tree type, int align)
11477 if (AGGREGATE_TYPE_P (type)
11478 && TYPE_SIZE (type)
11479 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11480 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11481 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11482 return 256;
11484 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11485 to 16byte boundary. */
11486 if (TARGET_64BIT)
11488 if (AGGREGATE_TYPE_P (type)
11489 && TYPE_SIZE (type)
11490 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11491 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11492 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11493 return 128;
11496 if (TREE_CODE (type) == ARRAY_TYPE)
11498 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11499 return 64;
11500 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11501 return 128;
11503 else if (TREE_CODE (type) == COMPLEX_TYPE)
11506 if (TYPE_MODE (type) == DCmode && align < 64)
11507 return 64;
11508 if (TYPE_MODE (type) == XCmode && align < 128)
11509 return 128;
11511 else if ((TREE_CODE (type) == RECORD_TYPE
11512 || TREE_CODE (type) == UNION_TYPE
11513 || TREE_CODE (type) == QUAL_UNION_TYPE)
11514 && TYPE_FIELDS (type))
11516 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11517 return 64;
11518 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11519 return 128;
11521 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11522 || TREE_CODE (type) == INTEGER_TYPE)
11524 if (TYPE_MODE (type) == DFmode && align < 64)
11525 return 64;
11526 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11527 return 128;
11530 return align;
11533 /* Compute the alignment for a local variable.
11534 TYPE is the data type, and ALIGN is the alignment that
11535 the object would ordinarily have. The value of this macro is used
11536 instead of that alignment to align the object. */
11539 ix86_local_alignment (tree type, int align)
11541 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11542 to 16byte boundary. */
11543 if (TARGET_64BIT)
11545 if (AGGREGATE_TYPE_P (type)
11546 && TYPE_SIZE (type)
11547 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11548 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11549 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11550 return 128;
11552 if (TREE_CODE (type) == ARRAY_TYPE)
11554 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11555 return 64;
11556 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11557 return 128;
11559 else if (TREE_CODE (type) == COMPLEX_TYPE)
11561 if (TYPE_MODE (type) == DCmode && align < 64)
11562 return 64;
11563 if (TYPE_MODE (type) == XCmode && align < 128)
11564 return 128;
11566 else if ((TREE_CODE (type) == RECORD_TYPE
11567 || TREE_CODE (type) == UNION_TYPE
11568 || TREE_CODE (type) == QUAL_UNION_TYPE)
11569 && TYPE_FIELDS (type))
11571 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11572 return 64;
11573 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11574 return 128;
11576 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11577 || TREE_CODE (type) == INTEGER_TYPE)
11580 if (TYPE_MODE (type) == DFmode && align < 64)
11581 return 64;
11582 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11583 return 128;
11585 return align;
11588 /* Emit RTL insns to initialize the variable parts of a trampoline.
11589 FNADDR is an RTX for the address of the function's pure code.
11590 CXT is an RTX for the static chain value for the function. */
11591 void
11592 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11594 if (!TARGET_64BIT)
11596 /* Compute offset from the end of the jmp to the target function. */
11597 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11598 plus_constant (tramp, 10),
11599 NULL_RTX, 1, OPTAB_DIRECT);
11600 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11601 gen_int_mode (0xb9, QImode));
11602 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11603 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11604 gen_int_mode (0xe9, QImode));
11605 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11607 else
11609 int offset = 0;
11610 /* Try to load address using shorter movl instead of movabs.
11611 We may want to support movq for kernel mode, but kernel does not use
11612 trampolines at the moment. */
11613 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11615 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11616 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11617 gen_int_mode (0xbb41, HImode));
11618 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11619 gen_lowpart (SImode, fnaddr));
11620 offset += 6;
11622 else
11624 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11625 gen_int_mode (0xbb49, HImode));
11626 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11627 fnaddr);
11628 offset += 10;
11630 /* Load static chain using movabs to r10. */
11631 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11632 gen_int_mode (0xba49, HImode));
11633 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11634 cxt);
11635 offset += 10;
11636 /* Jump to the r11 */
11637 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11638 gen_int_mode (0xff49, HImode));
11639 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11640 gen_int_mode (0xe3, QImode));
11641 offset += 3;
11642 if (offset > TRAMPOLINE_SIZE)
11643 abort ();
11646 #ifdef ENABLE_EXECUTE_STACK
11647 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11648 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11649 #endif
11652 #define def_builtin(MASK, NAME, TYPE, CODE) \
11653 do { \
11654 if ((MASK) & target_flags \
11655 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11656 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11657 NULL, NULL_TREE); \
11658 } while (0)
11660 struct builtin_description
11662 const unsigned int mask;
11663 const enum insn_code icode;
11664 const char *const name;
11665 const enum ix86_builtins code;
11666 const enum rtx_code comparison;
11667 const unsigned int flag;
11670 static const struct builtin_description bdesc_comi[] =
11672 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11673 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11674 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11675 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11676 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11677 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11678 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11679 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11680 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11681 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11682 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11683 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11684 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11685 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11686 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11687 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11688 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11689 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11690 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11691 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11692 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11693 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11694 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11695 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11698 static const struct builtin_description bdesc_2arg[] =
11700 /* SSE */
11701 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11702 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11703 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11704 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11705 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11706 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11707 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11708 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11710 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11711 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11712 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11713 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11714 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11715 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11716 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11717 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11718 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11719 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11720 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11721 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11722 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11723 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11724 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11725 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11726 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11727 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11728 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11729 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11731 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11732 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11733 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11734 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11736 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11737 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11738 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11739 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11741 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11742 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11743 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11744 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11745 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11747 /* MMX */
11748 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11749 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11750 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11751 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
11752 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11753 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11754 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11755 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
11757 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11758 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11759 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11760 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11761 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11762 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11763 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11764 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11766 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11767 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11768 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11770 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11771 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11772 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11773 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11775 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11776 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11778 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11779 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11780 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11781 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11782 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11783 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11785 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11786 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11787 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11788 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11790 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11791 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11792 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11793 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11794 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11795 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11797 /* Special. */
11798 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11799 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11800 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11802 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11803 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11804 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
11806 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11807 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11808 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11809 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11810 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11811 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11813 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11814 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11815 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11816 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11817 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11818 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11820 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11821 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11822 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11823 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11825 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11826 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11828 /* SSE2 */
11829 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11830 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11831 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11832 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11833 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11834 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11835 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11836 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11839 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11840 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11841 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11842 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11843 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11844 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11845 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11846 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11847 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11848 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11849 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11850 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11851 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11852 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11853 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11854 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11855 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11856 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11857 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11859 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11860 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11861 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11862 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11864 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11865 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11866 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11867 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11869 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11873 /* SSE2 MMX */
11874 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11875 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11877 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11878 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11879 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11880 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11881 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11883 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11884 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11885 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11886 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11887 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11888 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11889 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11890 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11892 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11908 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11913 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11915 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11922 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
11924 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11925 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11928 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11929 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
11934 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11937 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11938 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11939 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11941 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11942 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11944 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11945 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11946 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11948 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11949 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11956 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
11957 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
11960 /* SSE3 MMX */
11961 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
11962 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
11963 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
11964 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
11965 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
11966 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
11969 static const struct builtin_description bdesc_1arg[] =
11971 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11972 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11974 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11975 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11976 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11978 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11979 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11980 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
11981 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11982 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11983 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
11985 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11986 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11988 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11992 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11993 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11995 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11996 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11997 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11998 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11999 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12001 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12003 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12004 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12005 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12006 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12009 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12010 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12012 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12014 /* SSE3 */
12015 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12016 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12017 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12020 void
12021 ix86_init_builtins (void)
12023 if (TARGET_MMX)
12024 ix86_init_mmx_sse_builtins ();
12027 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12028 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12029 builtins. */
12030 static void
12031 ix86_init_mmx_sse_builtins (void)
12033 const struct builtin_description * d;
12034 size_t i;
12036 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12037 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12038 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12039 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12040 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12041 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12042 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12043 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12044 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12045 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12047 tree pchar_type_node = build_pointer_type (char_type_node);
12048 tree pcchar_type_node = build_pointer_type (
12049 build_type_variant (char_type_node, 1, 0));
12050 tree pfloat_type_node = build_pointer_type (float_type_node);
12051 tree pcfloat_type_node = build_pointer_type (
12052 build_type_variant (float_type_node, 1, 0));
12053 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12054 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12055 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12057 /* Comparisons. */
12058 tree int_ftype_v4sf_v4sf
12059 = build_function_type_list (integer_type_node,
12060 V4SF_type_node, V4SF_type_node, NULL_TREE);
12061 tree v4si_ftype_v4sf_v4sf
12062 = build_function_type_list (V4SI_type_node,
12063 V4SF_type_node, V4SF_type_node, NULL_TREE);
12064 /* MMX/SSE/integer conversions. */
12065 tree int_ftype_v4sf
12066 = build_function_type_list (integer_type_node,
12067 V4SF_type_node, NULL_TREE);
12068 tree int64_ftype_v4sf
12069 = build_function_type_list (long_long_integer_type_node,
12070 V4SF_type_node, NULL_TREE);
12071 tree int_ftype_v8qi
12072 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12073 tree v4sf_ftype_v4sf_int
12074 = build_function_type_list (V4SF_type_node,
12075 V4SF_type_node, integer_type_node, NULL_TREE);
12076 tree v4sf_ftype_v4sf_int64
12077 = build_function_type_list (V4SF_type_node,
12078 V4SF_type_node, long_long_integer_type_node,
12079 NULL_TREE);
12080 tree v4sf_ftype_v4sf_v2si
12081 = build_function_type_list (V4SF_type_node,
12082 V4SF_type_node, V2SI_type_node, NULL_TREE);
12083 tree int_ftype_v4hi_int
12084 = build_function_type_list (integer_type_node,
12085 V4HI_type_node, integer_type_node, NULL_TREE);
12086 tree v4hi_ftype_v4hi_int_int
12087 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12088 integer_type_node, integer_type_node,
12089 NULL_TREE);
12090 /* Miscellaneous. */
12091 tree v8qi_ftype_v4hi_v4hi
12092 = build_function_type_list (V8QI_type_node,
12093 V4HI_type_node, V4HI_type_node, NULL_TREE);
12094 tree v4hi_ftype_v2si_v2si
12095 = build_function_type_list (V4HI_type_node,
12096 V2SI_type_node, V2SI_type_node, NULL_TREE);
12097 tree v4sf_ftype_v4sf_v4sf_int
12098 = build_function_type_list (V4SF_type_node,
12099 V4SF_type_node, V4SF_type_node,
12100 integer_type_node, NULL_TREE);
12101 tree v2si_ftype_v4hi_v4hi
12102 = build_function_type_list (V2SI_type_node,
12103 V4HI_type_node, V4HI_type_node, NULL_TREE);
12104 tree v4hi_ftype_v4hi_int
12105 = build_function_type_list (V4HI_type_node,
12106 V4HI_type_node, integer_type_node, NULL_TREE);
12107 tree v4hi_ftype_v4hi_di
12108 = build_function_type_list (V4HI_type_node,
12109 V4HI_type_node, long_long_unsigned_type_node,
12110 NULL_TREE);
12111 tree v2si_ftype_v2si_di
12112 = build_function_type_list (V2SI_type_node,
12113 V2SI_type_node, long_long_unsigned_type_node,
12114 NULL_TREE);
12115 tree void_ftype_void
12116 = build_function_type (void_type_node, void_list_node);
12117 tree void_ftype_unsigned
12118 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12119 tree void_ftype_unsigned_unsigned
12120 = build_function_type_list (void_type_node, unsigned_type_node,
12121 unsigned_type_node, NULL_TREE);
12122 tree void_ftype_pcvoid_unsigned_unsigned
12123 = build_function_type_list (void_type_node, const_ptr_type_node,
12124 unsigned_type_node, unsigned_type_node,
12125 NULL_TREE);
12126 tree unsigned_ftype_void
12127 = build_function_type (unsigned_type_node, void_list_node);
12128 tree di_ftype_void
12129 = build_function_type (long_long_unsigned_type_node, void_list_node);
12130 tree v4sf_ftype_void
12131 = build_function_type (V4SF_type_node, void_list_node);
12132 tree v2si_ftype_v4sf
12133 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12134 /* Loads/stores. */
12135 tree void_ftype_v8qi_v8qi_pchar
12136 = build_function_type_list (void_type_node,
12137 V8QI_type_node, V8QI_type_node,
12138 pchar_type_node, NULL_TREE);
12139 tree v4sf_ftype_pcfloat
12140 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12141 /* @@@ the type is bogus */
12142 tree v4sf_ftype_v4sf_pv2si
12143 = build_function_type_list (V4SF_type_node,
12144 V4SF_type_node, pv2si_type_node, NULL_TREE);
12145 tree void_ftype_pv2si_v4sf
12146 = build_function_type_list (void_type_node,
12147 pv2si_type_node, V4SF_type_node, NULL_TREE);
12148 tree void_ftype_pfloat_v4sf
12149 = build_function_type_list (void_type_node,
12150 pfloat_type_node, V4SF_type_node, NULL_TREE);
12151 tree void_ftype_pdi_di
12152 = build_function_type_list (void_type_node,
12153 pdi_type_node, long_long_unsigned_type_node,
12154 NULL_TREE);
12155 tree void_ftype_pv2di_v2di
12156 = build_function_type_list (void_type_node,
12157 pv2di_type_node, V2DI_type_node, NULL_TREE);
12158 /* Normal vector unops. */
12159 tree v4sf_ftype_v4sf
12160 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12162 /* Normal vector binops. */
12163 tree v4sf_ftype_v4sf_v4sf
12164 = build_function_type_list (V4SF_type_node,
12165 V4SF_type_node, V4SF_type_node, NULL_TREE);
12166 tree v8qi_ftype_v8qi_v8qi
12167 = build_function_type_list (V8QI_type_node,
12168 V8QI_type_node, V8QI_type_node, NULL_TREE);
12169 tree v4hi_ftype_v4hi_v4hi
12170 = build_function_type_list (V4HI_type_node,
12171 V4HI_type_node, V4HI_type_node, NULL_TREE);
12172 tree v2si_ftype_v2si_v2si
12173 = build_function_type_list (V2SI_type_node,
12174 V2SI_type_node, V2SI_type_node, NULL_TREE);
12175 tree di_ftype_di_di
12176 = build_function_type_list (long_long_unsigned_type_node,
12177 long_long_unsigned_type_node,
12178 long_long_unsigned_type_node, NULL_TREE);
12180 tree v2si_ftype_v2sf
12181 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12182 tree v2sf_ftype_v2si
12183 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12184 tree v2si_ftype_v2si
12185 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12186 tree v2sf_ftype_v2sf
12187 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12188 tree v2sf_ftype_v2sf_v2sf
12189 = build_function_type_list (V2SF_type_node,
12190 V2SF_type_node, V2SF_type_node, NULL_TREE);
12191 tree v2si_ftype_v2sf_v2sf
12192 = build_function_type_list (V2SI_type_node,
12193 V2SF_type_node, V2SF_type_node, NULL_TREE);
12194 tree pint_type_node = build_pointer_type (integer_type_node);
12195 tree pcint_type_node = build_pointer_type (
12196 build_type_variant (integer_type_node, 1, 0));
12197 tree pdouble_type_node = build_pointer_type (double_type_node);
12198 tree pcdouble_type_node = build_pointer_type (
12199 build_type_variant (double_type_node, 1, 0));
12200 tree int_ftype_v2df_v2df
12201 = build_function_type_list (integer_type_node,
12202 V2DF_type_node, V2DF_type_node, NULL_TREE);
12204 tree ti_ftype_void
12205 = build_function_type (intTI_type_node, void_list_node);
12206 tree v2di_ftype_void
12207 = build_function_type (V2DI_type_node, void_list_node);
12208 tree ti_ftype_ti_ti
12209 = build_function_type_list (intTI_type_node,
12210 intTI_type_node, intTI_type_node, NULL_TREE);
12211 tree void_ftype_pcvoid
12212 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12213 tree v2di_ftype_di
12214 = build_function_type_list (V2DI_type_node,
12215 long_long_unsigned_type_node, NULL_TREE);
12216 tree di_ftype_v2di
12217 = build_function_type_list (long_long_unsigned_type_node,
12218 V2DI_type_node, NULL_TREE);
12219 tree v4sf_ftype_v4si
12220 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12221 tree v4si_ftype_v4sf
12222 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12223 tree v2df_ftype_v4si
12224 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12225 tree v4si_ftype_v2df
12226 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12227 tree v2si_ftype_v2df
12228 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12229 tree v4sf_ftype_v2df
12230 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12231 tree v2df_ftype_v2si
12232 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12233 tree v2df_ftype_v4sf
12234 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12235 tree int_ftype_v2df
12236 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12237 tree int64_ftype_v2df
12238 = build_function_type_list (long_long_integer_type_node,
12239 V2DF_type_node, NULL_TREE);
12240 tree v2df_ftype_v2df_int
12241 = build_function_type_list (V2DF_type_node,
12242 V2DF_type_node, integer_type_node, NULL_TREE);
12243 tree v2df_ftype_v2df_int64
12244 = build_function_type_list (V2DF_type_node,
12245 V2DF_type_node, long_long_integer_type_node,
12246 NULL_TREE);
12247 tree v4sf_ftype_v4sf_v2df
12248 = build_function_type_list (V4SF_type_node,
12249 V4SF_type_node, V2DF_type_node, NULL_TREE);
12250 tree v2df_ftype_v2df_v4sf
12251 = build_function_type_list (V2DF_type_node,
12252 V2DF_type_node, V4SF_type_node, NULL_TREE);
12253 tree v2df_ftype_v2df_v2df_int
12254 = build_function_type_list (V2DF_type_node,
12255 V2DF_type_node, V2DF_type_node,
12256 integer_type_node,
12257 NULL_TREE);
12258 tree v2df_ftype_v2df_pv2si
12259 = build_function_type_list (V2DF_type_node,
12260 V2DF_type_node, pv2si_type_node, NULL_TREE);
12261 tree void_ftype_pv2si_v2df
12262 = build_function_type_list (void_type_node,
12263 pv2si_type_node, V2DF_type_node, NULL_TREE);
12264 tree void_ftype_pdouble_v2df
12265 = build_function_type_list (void_type_node,
12266 pdouble_type_node, V2DF_type_node, NULL_TREE);
12267 tree void_ftype_pint_int
12268 = build_function_type_list (void_type_node,
12269 pint_type_node, integer_type_node, NULL_TREE);
12270 tree void_ftype_v16qi_v16qi_pchar
12271 = build_function_type_list (void_type_node,
12272 V16QI_type_node, V16QI_type_node,
12273 pchar_type_node, NULL_TREE);
12274 tree v2df_ftype_pcdouble
12275 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12276 tree v2df_ftype_v2df_v2df
12277 = build_function_type_list (V2DF_type_node,
12278 V2DF_type_node, V2DF_type_node, NULL_TREE);
12279 tree v16qi_ftype_v16qi_v16qi
12280 = build_function_type_list (V16QI_type_node,
12281 V16QI_type_node, V16QI_type_node, NULL_TREE);
12282 tree v8hi_ftype_v8hi_v8hi
12283 = build_function_type_list (V8HI_type_node,
12284 V8HI_type_node, V8HI_type_node, NULL_TREE);
12285 tree v4si_ftype_v4si_v4si
12286 = build_function_type_list (V4SI_type_node,
12287 V4SI_type_node, V4SI_type_node, NULL_TREE);
12288 tree v2di_ftype_v2di_v2di
12289 = build_function_type_list (V2DI_type_node,
12290 V2DI_type_node, V2DI_type_node, NULL_TREE);
12291 tree v2di_ftype_v2df_v2df
12292 = build_function_type_list (V2DI_type_node,
12293 V2DF_type_node, V2DF_type_node, NULL_TREE);
12294 tree v2df_ftype_v2df
12295 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12296 tree v2df_ftype_double
12297 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12298 tree v2df_ftype_double_double
12299 = build_function_type_list (V2DF_type_node,
12300 double_type_node, double_type_node, NULL_TREE);
12301 tree int_ftype_v8hi_int
12302 = build_function_type_list (integer_type_node,
12303 V8HI_type_node, integer_type_node, NULL_TREE);
12304 tree v8hi_ftype_v8hi_int_int
12305 = build_function_type_list (V8HI_type_node,
12306 V8HI_type_node, integer_type_node,
12307 integer_type_node, NULL_TREE);
12308 tree v2di_ftype_v2di_int
12309 = build_function_type_list (V2DI_type_node,
12310 V2DI_type_node, integer_type_node, NULL_TREE);
12311 tree v4si_ftype_v4si_int
12312 = build_function_type_list (V4SI_type_node,
12313 V4SI_type_node, integer_type_node, NULL_TREE);
12314 tree v8hi_ftype_v8hi_int
12315 = build_function_type_list (V8HI_type_node,
12316 V8HI_type_node, integer_type_node, NULL_TREE);
12317 tree v8hi_ftype_v8hi_v2di
12318 = build_function_type_list (V8HI_type_node,
12319 V8HI_type_node, V2DI_type_node, NULL_TREE);
12320 tree v4si_ftype_v4si_v2di
12321 = build_function_type_list (V4SI_type_node,
12322 V4SI_type_node, V2DI_type_node, NULL_TREE);
12323 tree v4si_ftype_v8hi_v8hi
12324 = build_function_type_list (V4SI_type_node,
12325 V8HI_type_node, V8HI_type_node, NULL_TREE);
12326 tree di_ftype_v8qi_v8qi
12327 = build_function_type_list (long_long_unsigned_type_node,
12328 V8QI_type_node, V8QI_type_node, NULL_TREE);
12329 tree di_ftype_v2si_v2si
12330 = build_function_type_list (long_long_unsigned_type_node,
12331 V2SI_type_node, V2SI_type_node, NULL_TREE);
12332 tree v2di_ftype_v16qi_v16qi
12333 = build_function_type_list (V2DI_type_node,
12334 V16QI_type_node, V16QI_type_node, NULL_TREE);
12335 tree v2di_ftype_v4si_v4si
12336 = build_function_type_list (V2DI_type_node,
12337 V4SI_type_node, V4SI_type_node, NULL_TREE);
12338 tree int_ftype_v16qi
12339 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12340 tree v16qi_ftype_pcchar
12341 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12342 tree void_ftype_pchar_v16qi
12343 = build_function_type_list (void_type_node,
12344 pchar_type_node, V16QI_type_node, NULL_TREE);
12345 tree v4si_ftype_pcint
12346 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12347 tree void_ftype_pcint_v4si
12348 = build_function_type_list (void_type_node,
12349 pcint_type_node, V4SI_type_node, NULL_TREE);
12350 tree v2di_ftype_v2di
12351 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12353 tree float80_type;
12354 tree float128_type;
12356 /* The __float80 type. */
12357 if (TYPE_MODE (long_double_type_node) == XFmode)
12358 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12359 "__float80");
12360 else
12362 /* The __float80 type. */
12363 float80_type = make_node (REAL_TYPE);
12364 TYPE_PRECISION (float80_type) = 80;
12365 layout_type (float80_type);
12366 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12369 float128_type = make_node (REAL_TYPE);
12370 TYPE_PRECISION (float128_type) = 128;
12371 layout_type (float128_type);
12372 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12374 /* Add all builtins that are more or less simple operations on two
12375 operands. */
12376 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12378 /* Use one of the operands; the target can have a different mode for
12379 mask-generating compares. */
12380 enum machine_mode mode;
12381 tree type;
12383 if (d->name == 0)
12384 continue;
12385 mode = insn_data[d->icode].operand[1].mode;
12387 switch (mode)
12389 case V16QImode:
12390 type = v16qi_ftype_v16qi_v16qi;
12391 break;
12392 case V8HImode:
12393 type = v8hi_ftype_v8hi_v8hi;
12394 break;
12395 case V4SImode:
12396 type = v4si_ftype_v4si_v4si;
12397 break;
12398 case V2DImode:
12399 type = v2di_ftype_v2di_v2di;
12400 break;
12401 case V2DFmode:
12402 type = v2df_ftype_v2df_v2df;
12403 break;
12404 case TImode:
12405 type = ti_ftype_ti_ti;
12406 break;
12407 case V4SFmode:
12408 type = v4sf_ftype_v4sf_v4sf;
12409 break;
12410 case V8QImode:
12411 type = v8qi_ftype_v8qi_v8qi;
12412 break;
12413 case V4HImode:
12414 type = v4hi_ftype_v4hi_v4hi;
12415 break;
12416 case V2SImode:
12417 type = v2si_ftype_v2si_v2si;
12418 break;
12419 case DImode:
12420 type = di_ftype_di_di;
12421 break;
12423 default:
12424 abort ();
12427 /* Override for comparisons. */
12428 if (d->icode == CODE_FOR_maskcmpv4sf3
12429 || d->icode == CODE_FOR_maskncmpv4sf3
12430 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12431 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12432 type = v4si_ftype_v4sf_v4sf;
12434 if (d->icode == CODE_FOR_maskcmpv2df3
12435 || d->icode == CODE_FOR_maskncmpv2df3
12436 || d->icode == CODE_FOR_vmmaskcmpv2df3
12437 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12438 type = v2di_ftype_v2df_v2df;
12440 def_builtin (d->mask, d->name, type, d->code);
12443 /* Add the remaining MMX insns with somewhat more complicated types. */
12444 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12445 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12446 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12447 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12448 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12450 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12451 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12452 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12454 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12455 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12457 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12458 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12460 /* comi/ucomi insns. */
12461 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12462 if (d->mask == MASK_SSE2)
12463 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12464 else
12465 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12467 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12468 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12469 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12471 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12472 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12473 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12474 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12475 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12476 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12477 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12478 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12479 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12480 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12481 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12483 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12484 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12486 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12488 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12489 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12490 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12491 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12492 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12493 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12495 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12496 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12497 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12498 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12500 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12501 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12502 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12503 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12505 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12507 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12509 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12510 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12511 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12512 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12513 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12514 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12516 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12518 /* Original 3DNow! */
12519 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12520 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12521 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12522 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12523 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12524 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12525 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12526 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12527 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12528 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12529 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12530 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12531 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12532 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12533 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12534 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12535 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12536 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12537 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12538 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12540 /* 3DNow! extension as used in the Athlon CPU. */
12541 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12542 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12543 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12544 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12545 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12546 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12548 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12550 /* SSE2 */
12551 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12552 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12554 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12555 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12556 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12558 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12560 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12561 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12562 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12563 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12565 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12566 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12567 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12568 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12570 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12571 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12572 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12573 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12574 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12576 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12577 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12578 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12579 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12581 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12582 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12584 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12586 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12587 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12589 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12590 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12591 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12592 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12593 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12595 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12597 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12598 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12599 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12600 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12602 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12603 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12604 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12606 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12607 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12608 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12609 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12611 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12612 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12613 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12614 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12615 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12616 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12617 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12619 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12620 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12621 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12623 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12624 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12625 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12626 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12627 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12628 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12629 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12631 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12633 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12634 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12636 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12637 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12638 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12640 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12641 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12642 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12644 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12645 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12647 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12648 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12649 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12650 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12652 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12653 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12654 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12655 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12657 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12658 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12660 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12662 /* Prescott New Instructions. */
12663 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12664 void_ftype_pcvoid_unsigned_unsigned,
12665 IX86_BUILTIN_MONITOR);
12666 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12667 void_ftype_unsigned_unsigned,
12668 IX86_BUILTIN_MWAIT);
12669 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12670 v4sf_ftype_v4sf,
12671 IX86_BUILTIN_MOVSHDUP);
12672 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12673 v4sf_ftype_v4sf,
12674 IX86_BUILTIN_MOVSLDUP);
12675 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12676 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12677 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12678 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12679 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12680 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12683 /* Errors in the source file can cause expand_expr to return const0_rtx
12684 where we expect a vector. To avoid crashing, use one of the vector
12685 clear instructions. */
12686 static rtx
12687 safe_vector_operand (rtx x, enum machine_mode mode)
12689 if (x != const0_rtx)
12690 return x;
12691 x = gen_reg_rtx (mode);
12693 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12694 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12695 : gen_rtx_SUBREG (DImode, x, 0)));
12696 else
12697 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12698 : gen_rtx_SUBREG (V4SFmode, x, 0),
12699 CONST0_RTX (V4SFmode)));
12700 return x;
12703 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12705 static rtx
12706 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
12708 rtx pat;
12709 tree arg0 = TREE_VALUE (arglist);
12710 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12711 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12712 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12713 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12714 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12715 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12717 if (VECTOR_MODE_P (mode0))
12718 op0 = safe_vector_operand (op0, mode0);
12719 if (VECTOR_MODE_P (mode1))
12720 op1 = safe_vector_operand (op1, mode1);
12722 if (! target
12723 || GET_MODE (target) != tmode
12724 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12725 target = gen_reg_rtx (tmode);
12727 if (GET_MODE (op1) == SImode && mode1 == TImode)
12729 rtx x = gen_reg_rtx (V4SImode);
12730 emit_insn (gen_sse2_loadd (x, op1));
12731 op1 = gen_lowpart (TImode, x);
12734 /* In case the insn wants input operands in modes different from
12735 the result, abort. */
12736 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12737 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
12738 abort ();
12740 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12741 op0 = copy_to_mode_reg (mode0, op0);
12742 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12743 op1 = copy_to_mode_reg (mode1, op1);
12745 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12746 yet one of the two must not be a memory. This is normally enforced
12747 by expanders, but we didn't bother to create one here. */
12748 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12749 op0 = copy_to_mode_reg (mode0, op0);
12751 pat = GEN_FCN (icode) (target, op0, op1);
12752 if (! pat)
12753 return 0;
12754 emit_insn (pat);
12755 return target;
12758 /* Subroutine of ix86_expand_builtin to take care of stores. */
12760 static rtx
12761 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
12763 rtx pat;
12764 tree arg0 = TREE_VALUE (arglist);
12765 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12766 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12767 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12768 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12769 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12771 if (VECTOR_MODE_P (mode1))
12772 op1 = safe_vector_operand (op1, mode1);
12774 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12775 op1 = copy_to_mode_reg (mode1, op1);
12777 pat = GEN_FCN (icode) (op0, op1);
12778 if (pat)
12779 emit_insn (pat);
12780 return 0;
12783 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12785 static rtx
12786 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
12787 rtx target, int do_load)
12789 rtx pat;
12790 tree arg0 = TREE_VALUE (arglist);
12791 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12792 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12793 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12795 if (! target
12796 || GET_MODE (target) != tmode
12797 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12798 target = gen_reg_rtx (tmode);
12799 if (do_load)
12800 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12801 else
12803 if (VECTOR_MODE_P (mode0))
12804 op0 = safe_vector_operand (op0, mode0);
12806 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12807 op0 = copy_to_mode_reg (mode0, op0);
12810 pat = GEN_FCN (icode) (target, op0);
12811 if (! pat)
12812 return 0;
12813 emit_insn (pat);
12814 return target;
12817 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12818 sqrtss, rsqrtss, rcpss. */
12820 static rtx
12821 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
12823 rtx pat;
12824 tree arg0 = TREE_VALUE (arglist);
12825 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12826 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12827 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12829 if (! target
12830 || GET_MODE (target) != tmode
12831 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12832 target = gen_reg_rtx (tmode);
12834 if (VECTOR_MODE_P (mode0))
12835 op0 = safe_vector_operand (op0, mode0);
12837 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12838 op0 = copy_to_mode_reg (mode0, op0);
12840 op1 = op0;
12841 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12842 op1 = copy_to_mode_reg (mode0, op1);
12844 pat = GEN_FCN (icode) (target, op0, op1);
12845 if (! pat)
12846 return 0;
12847 emit_insn (pat);
12848 return target;
12851 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12853 static rtx
12854 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
12855 rtx target)
12857 rtx pat;
12858 tree arg0 = TREE_VALUE (arglist);
12859 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12860 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12861 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12862 rtx op2;
12863 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12864 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12865 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12866 enum rtx_code comparison = d->comparison;
12868 if (VECTOR_MODE_P (mode0))
12869 op0 = safe_vector_operand (op0, mode0);
12870 if (VECTOR_MODE_P (mode1))
12871 op1 = safe_vector_operand (op1, mode1);
12873 /* Swap operands if we have a comparison that isn't available in
12874 hardware. */
12875 if (d->flag)
12877 rtx tmp = gen_reg_rtx (mode1);
12878 emit_move_insn (tmp, op1);
12879 op1 = op0;
12880 op0 = tmp;
12883 if (! target
12884 || GET_MODE (target) != tmode
12885 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12886 target = gen_reg_rtx (tmode);
12888 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12889 op0 = copy_to_mode_reg (mode0, op0);
12890 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12891 op1 = copy_to_mode_reg (mode1, op1);
12893 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12894 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12895 if (! pat)
12896 return 0;
12897 emit_insn (pat);
12898 return target;
12901 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12903 static rtx
12904 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
12905 rtx target)
12907 rtx pat;
12908 tree arg0 = TREE_VALUE (arglist);
12909 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12910 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12911 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12912 rtx op2;
12913 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12914 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12915 enum rtx_code comparison = d->comparison;
12917 if (VECTOR_MODE_P (mode0))
12918 op0 = safe_vector_operand (op0, mode0);
12919 if (VECTOR_MODE_P (mode1))
12920 op1 = safe_vector_operand (op1, mode1);
12922 /* Swap operands if we have a comparison that isn't available in
12923 hardware. */
12924 if (d->flag)
12926 rtx tmp = op1;
12927 op1 = op0;
12928 op0 = tmp;
12931 target = gen_reg_rtx (SImode);
12932 emit_move_insn (target, const0_rtx);
12933 target = gen_rtx_SUBREG (QImode, target, 0);
12935 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12936 op0 = copy_to_mode_reg (mode0, op0);
12937 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12938 op1 = copy_to_mode_reg (mode1, op1);
12940 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12941 pat = GEN_FCN (d->icode) (op0, op1);
12942 if (! pat)
12943 return 0;
12944 emit_insn (pat);
12945 emit_insn (gen_rtx_SET (VOIDmode,
12946 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12947 gen_rtx_fmt_ee (comparison, QImode,
12948 SET_DEST (pat),
12949 const0_rtx)));
12951 return SUBREG_REG (target);
12954 /* Expand an expression EXP that calls a built-in function,
12955 with result going to TARGET if that's convenient
12956 (and in mode MODE if that's convenient).
12957 SUBTARGET may be used as the target for computing one of EXP's operands.
12958 IGNORE is nonzero if the value is to be ignored. */
12961 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12962 enum machine_mode mode ATTRIBUTE_UNUSED,
12963 int ignore ATTRIBUTE_UNUSED)
12965 const struct builtin_description *d;
12966 size_t i;
12967 enum insn_code icode;
12968 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12969 tree arglist = TREE_OPERAND (exp, 1);
12970 tree arg0, arg1, arg2;
12971 rtx op0, op1, op2, pat;
12972 enum machine_mode tmode, mode0, mode1, mode2;
12973 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12975 switch (fcode)
12977 case IX86_BUILTIN_EMMS:
12978 emit_insn (gen_emms ());
12979 return 0;
12981 case IX86_BUILTIN_SFENCE:
12982 emit_insn (gen_sfence ());
12983 return 0;
12985 case IX86_BUILTIN_PEXTRW:
12986 case IX86_BUILTIN_PEXTRW128:
12987 icode = (fcode == IX86_BUILTIN_PEXTRW
12988 ? CODE_FOR_mmx_pextrw
12989 : CODE_FOR_sse2_pextrw);
12990 arg0 = TREE_VALUE (arglist);
12991 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12992 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12993 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12994 tmode = insn_data[icode].operand[0].mode;
12995 mode0 = insn_data[icode].operand[1].mode;
12996 mode1 = insn_data[icode].operand[2].mode;
12998 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12999 op0 = copy_to_mode_reg (mode0, op0);
13000 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13002 error ("selector must be an integer constant in the range 0..%i",
13003 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13004 return gen_reg_rtx (tmode);
13006 if (target == 0
13007 || GET_MODE (target) != tmode
13008 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13009 target = gen_reg_rtx (tmode);
13010 pat = GEN_FCN (icode) (target, op0, op1);
13011 if (! pat)
13012 return 0;
13013 emit_insn (pat);
13014 return target;
13016 case IX86_BUILTIN_PINSRW:
13017 case IX86_BUILTIN_PINSRW128:
13018 icode = (fcode == IX86_BUILTIN_PINSRW
13019 ? CODE_FOR_mmx_pinsrw
13020 : CODE_FOR_sse2_pinsrw);
13021 arg0 = TREE_VALUE (arglist);
13022 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13023 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13024 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13025 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13026 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13027 tmode = insn_data[icode].operand[0].mode;
13028 mode0 = insn_data[icode].operand[1].mode;
13029 mode1 = insn_data[icode].operand[2].mode;
13030 mode2 = insn_data[icode].operand[3].mode;
13032 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13033 op0 = copy_to_mode_reg (mode0, op0);
13034 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13035 op1 = copy_to_mode_reg (mode1, op1);
13036 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13038 error ("selector must be an integer constant in the range 0..%i",
13039 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13040 return const0_rtx;
13042 if (target == 0
13043 || GET_MODE (target) != tmode
13044 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13045 target = gen_reg_rtx (tmode);
13046 pat = GEN_FCN (icode) (target, op0, op1, op2);
13047 if (! pat)
13048 return 0;
13049 emit_insn (pat);
13050 return target;
13052 case IX86_BUILTIN_MASKMOVQ:
13053 case IX86_BUILTIN_MASKMOVDQU:
13054 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13055 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13056 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13057 : CODE_FOR_sse2_maskmovdqu));
13058 /* Note the arg order is different from the operand order. */
13059 arg1 = TREE_VALUE (arglist);
13060 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13061 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13062 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13063 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13064 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13065 mode0 = insn_data[icode].operand[0].mode;
13066 mode1 = insn_data[icode].operand[1].mode;
13067 mode2 = insn_data[icode].operand[2].mode;
13069 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13070 op0 = copy_to_mode_reg (mode0, op0);
13071 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13072 op1 = copy_to_mode_reg (mode1, op1);
13073 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13074 op2 = copy_to_mode_reg (mode2, op2);
13075 pat = GEN_FCN (icode) (op0, op1, op2);
13076 if (! pat)
13077 return 0;
13078 emit_insn (pat);
13079 return 0;
13081 case IX86_BUILTIN_SQRTSS:
13082 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13083 case IX86_BUILTIN_RSQRTSS:
13084 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13085 case IX86_BUILTIN_RCPSS:
13086 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13088 case IX86_BUILTIN_LOADAPS:
13089 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13091 case IX86_BUILTIN_LOADUPS:
13092 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13094 case IX86_BUILTIN_STOREAPS:
13095 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13097 case IX86_BUILTIN_STOREUPS:
13098 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13100 case IX86_BUILTIN_LOADSS:
13101 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13103 case IX86_BUILTIN_STORESS:
13104 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13106 case IX86_BUILTIN_LOADHPS:
13107 case IX86_BUILTIN_LOADLPS:
13108 case IX86_BUILTIN_LOADHPD:
13109 case IX86_BUILTIN_LOADLPD:
13110 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13111 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13112 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13113 : CODE_FOR_sse2_movsd);
13114 arg0 = TREE_VALUE (arglist);
13115 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13116 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13117 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13118 tmode = insn_data[icode].operand[0].mode;
13119 mode0 = insn_data[icode].operand[1].mode;
13120 mode1 = insn_data[icode].operand[2].mode;
13122 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13123 op0 = copy_to_mode_reg (mode0, op0);
13124 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13125 if (target == 0
13126 || GET_MODE (target) != tmode
13127 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13128 target = gen_reg_rtx (tmode);
13129 pat = GEN_FCN (icode) (target, op0, op1);
13130 if (! pat)
13131 return 0;
13132 emit_insn (pat);
13133 return target;
13135 case IX86_BUILTIN_STOREHPS:
13136 case IX86_BUILTIN_STORELPS:
13137 case IX86_BUILTIN_STOREHPD:
13138 case IX86_BUILTIN_STORELPD:
13139 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13140 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13141 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13142 : CODE_FOR_sse2_movsd);
13143 arg0 = TREE_VALUE (arglist);
13144 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13145 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13146 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13147 mode0 = insn_data[icode].operand[1].mode;
13148 mode1 = insn_data[icode].operand[2].mode;
13150 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13151 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13152 op1 = copy_to_mode_reg (mode1, op1);
13154 pat = GEN_FCN (icode) (op0, op0, op1);
13155 if (! pat)
13156 return 0;
13157 emit_insn (pat);
13158 return 0;
13160 case IX86_BUILTIN_MOVNTPS:
13161 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13162 case IX86_BUILTIN_MOVNTQ:
13163 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13165 case IX86_BUILTIN_LDMXCSR:
13166 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13167 target = assign_386_stack_local (SImode, 0);
13168 emit_move_insn (target, op0);
13169 emit_insn (gen_ldmxcsr (target));
13170 return 0;
13172 case IX86_BUILTIN_STMXCSR:
13173 target = assign_386_stack_local (SImode, 0);
13174 emit_insn (gen_stmxcsr (target));
13175 return copy_to_mode_reg (SImode, target);
13177 case IX86_BUILTIN_SHUFPS:
13178 case IX86_BUILTIN_SHUFPD:
13179 icode = (fcode == IX86_BUILTIN_SHUFPS
13180 ? CODE_FOR_sse_shufps
13181 : CODE_FOR_sse2_shufpd);
13182 arg0 = TREE_VALUE (arglist);
13183 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13184 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13185 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13186 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13187 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13188 tmode = insn_data[icode].operand[0].mode;
13189 mode0 = insn_data[icode].operand[1].mode;
13190 mode1 = insn_data[icode].operand[2].mode;
13191 mode2 = insn_data[icode].operand[3].mode;
13193 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13194 op0 = copy_to_mode_reg (mode0, op0);
13195 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13196 op1 = copy_to_mode_reg (mode1, op1);
13197 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13199 /* @@@ better error message */
13200 error ("mask must be an immediate");
13201 return gen_reg_rtx (tmode);
13203 if (target == 0
13204 || GET_MODE (target) != tmode
13205 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13206 target = gen_reg_rtx (tmode);
13207 pat = GEN_FCN (icode) (target, op0, op1, op2);
13208 if (! pat)
13209 return 0;
13210 emit_insn (pat);
13211 return target;
13213 case IX86_BUILTIN_PSHUFW:
13214 case IX86_BUILTIN_PSHUFD:
13215 case IX86_BUILTIN_PSHUFHW:
13216 case IX86_BUILTIN_PSHUFLW:
13217 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13218 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13219 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13220 : CODE_FOR_mmx_pshufw);
13221 arg0 = TREE_VALUE (arglist);
13222 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13223 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13224 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13225 tmode = insn_data[icode].operand[0].mode;
13226 mode1 = insn_data[icode].operand[1].mode;
13227 mode2 = insn_data[icode].operand[2].mode;
13229 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13230 op0 = copy_to_mode_reg (mode1, op0);
13231 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13233 /* @@@ better error message */
13234 error ("mask must be an immediate");
13235 return const0_rtx;
13237 if (target == 0
13238 || GET_MODE (target) != tmode
13239 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13240 target = gen_reg_rtx (tmode);
13241 pat = GEN_FCN (icode) (target, op0, op1);
13242 if (! pat)
13243 return 0;
13244 emit_insn (pat);
13245 return target;
13247 case IX86_BUILTIN_PSLLDQI128:
13248 case IX86_BUILTIN_PSRLDQI128:
13249 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13250 : CODE_FOR_sse2_lshrti3);
13251 arg0 = TREE_VALUE (arglist);
13252 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13253 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13254 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13255 tmode = insn_data[icode].operand[0].mode;
13256 mode1 = insn_data[icode].operand[1].mode;
13257 mode2 = insn_data[icode].operand[2].mode;
13259 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13261 op0 = copy_to_reg (op0);
13262 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13264 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13266 error ("shift must be an immediate");
13267 return const0_rtx;
13269 target = gen_reg_rtx (V2DImode);
13270 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13271 if (! pat)
13272 return 0;
13273 emit_insn (pat);
13274 return target;
13276 case IX86_BUILTIN_FEMMS:
13277 emit_insn (gen_femms ());
13278 return NULL_RTX;
13280 case IX86_BUILTIN_PAVGUSB:
13281 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13283 case IX86_BUILTIN_PF2ID:
13284 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13286 case IX86_BUILTIN_PFACC:
13287 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13289 case IX86_BUILTIN_PFADD:
13290 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13292 case IX86_BUILTIN_PFCMPEQ:
13293 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13295 case IX86_BUILTIN_PFCMPGE:
13296 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13298 case IX86_BUILTIN_PFCMPGT:
13299 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13301 case IX86_BUILTIN_PFMAX:
13302 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13304 case IX86_BUILTIN_PFMIN:
13305 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13307 case IX86_BUILTIN_PFMUL:
13308 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13310 case IX86_BUILTIN_PFRCP:
13311 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13313 case IX86_BUILTIN_PFRCPIT1:
13314 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13316 case IX86_BUILTIN_PFRCPIT2:
13317 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13319 case IX86_BUILTIN_PFRSQIT1:
13320 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13322 case IX86_BUILTIN_PFRSQRT:
13323 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13325 case IX86_BUILTIN_PFSUB:
13326 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13328 case IX86_BUILTIN_PFSUBR:
13329 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13331 case IX86_BUILTIN_PI2FD:
13332 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13334 case IX86_BUILTIN_PMULHRW:
13335 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13337 case IX86_BUILTIN_PF2IW:
13338 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13340 case IX86_BUILTIN_PFNACC:
13341 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13343 case IX86_BUILTIN_PFPNACC:
13344 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13346 case IX86_BUILTIN_PI2FW:
13347 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13349 case IX86_BUILTIN_PSWAPDSI:
13350 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13352 case IX86_BUILTIN_PSWAPDSF:
13353 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13355 case IX86_BUILTIN_SSE_ZERO:
13356 target = gen_reg_rtx (V4SFmode);
13357 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13358 return target;
13360 case IX86_BUILTIN_MMX_ZERO:
13361 target = gen_reg_rtx (DImode);
13362 emit_insn (gen_mmx_clrdi (target));
13363 return target;
13365 case IX86_BUILTIN_CLRTI:
13366 target = gen_reg_rtx (V2DImode);
13367 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13368 return target;
13371 case IX86_BUILTIN_SQRTSD:
13372 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13373 case IX86_BUILTIN_LOADAPD:
13374 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13375 case IX86_BUILTIN_LOADUPD:
13376 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13378 case IX86_BUILTIN_STOREAPD:
13379 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13380 case IX86_BUILTIN_STOREUPD:
13381 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13383 case IX86_BUILTIN_LOADSD:
13384 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13386 case IX86_BUILTIN_STORESD:
13387 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13389 case IX86_BUILTIN_SETPD1:
13390 target = assign_386_stack_local (DFmode, 0);
13391 arg0 = TREE_VALUE (arglist);
13392 emit_move_insn (adjust_address (target, DFmode, 0),
13393 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13394 op0 = gen_reg_rtx (V2DFmode);
13395 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13396 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13397 return op0;
13399 case IX86_BUILTIN_SETPD:
13400 target = assign_386_stack_local (V2DFmode, 0);
13401 arg0 = TREE_VALUE (arglist);
13402 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13403 emit_move_insn (adjust_address (target, DFmode, 0),
13404 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13405 emit_move_insn (adjust_address (target, DFmode, 8),
13406 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13407 op0 = gen_reg_rtx (V2DFmode);
13408 emit_insn (gen_sse2_movapd (op0, target));
13409 return op0;
13411 case IX86_BUILTIN_LOADRPD:
13412 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13413 gen_reg_rtx (V2DFmode), 1);
13414 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13415 return target;
13417 case IX86_BUILTIN_LOADPD1:
13418 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13419 gen_reg_rtx (V2DFmode), 1);
13420 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13421 return target;
13423 case IX86_BUILTIN_STOREPD1:
13424 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13425 case IX86_BUILTIN_STORERPD:
13426 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13428 case IX86_BUILTIN_CLRPD:
13429 target = gen_reg_rtx (V2DFmode);
13430 emit_insn (gen_sse_clrv2df (target));
13431 return target;
13433 case IX86_BUILTIN_MFENCE:
13434 emit_insn (gen_sse2_mfence ());
13435 return 0;
13436 case IX86_BUILTIN_LFENCE:
13437 emit_insn (gen_sse2_lfence ());
13438 return 0;
13440 case IX86_BUILTIN_CLFLUSH:
13441 arg0 = TREE_VALUE (arglist);
13442 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13443 icode = CODE_FOR_sse2_clflush;
13444 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13445 op0 = copy_to_mode_reg (Pmode, op0);
13447 emit_insn (gen_sse2_clflush (op0));
13448 return 0;
13450 case IX86_BUILTIN_MOVNTPD:
13451 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13452 case IX86_BUILTIN_MOVNTDQ:
13453 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13454 case IX86_BUILTIN_MOVNTI:
13455 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13457 case IX86_BUILTIN_LOADDQA:
13458 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13459 case IX86_BUILTIN_LOADDQU:
13460 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13461 case IX86_BUILTIN_LOADD:
13462 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13464 case IX86_BUILTIN_STOREDQA:
13465 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13466 case IX86_BUILTIN_STOREDQU:
13467 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13468 case IX86_BUILTIN_STORED:
13469 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13471 case IX86_BUILTIN_MONITOR:
13472 arg0 = TREE_VALUE (arglist);
13473 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13474 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13475 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13476 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13477 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13478 if (!REG_P (op0))
13479 op0 = copy_to_mode_reg (SImode, op0);
13480 if (!REG_P (op1))
13481 op1 = copy_to_mode_reg (SImode, op1);
13482 if (!REG_P (op2))
13483 op2 = copy_to_mode_reg (SImode, op2);
13484 emit_insn (gen_monitor (op0, op1, op2));
13485 return 0;
13487 case IX86_BUILTIN_MWAIT:
13488 arg0 = TREE_VALUE (arglist);
13489 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13490 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13491 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13492 if (!REG_P (op0))
13493 op0 = copy_to_mode_reg (SImode, op0);
13494 if (!REG_P (op1))
13495 op1 = copy_to_mode_reg (SImode, op1);
13496 emit_insn (gen_mwait (op0, op1));
13497 return 0;
13499 case IX86_BUILTIN_LOADDDUP:
13500 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13502 case IX86_BUILTIN_LDDQU:
13503 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13506 default:
13507 break;
13510 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13511 if (d->code == fcode)
13513 /* Compares are treated specially. */
13514 if (d->icode == CODE_FOR_maskcmpv4sf3
13515 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13516 || d->icode == CODE_FOR_maskncmpv4sf3
13517 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13518 || d->icode == CODE_FOR_maskcmpv2df3
13519 || d->icode == CODE_FOR_vmmaskcmpv2df3
13520 || d->icode == CODE_FOR_maskncmpv2df3
13521 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13522 return ix86_expand_sse_compare (d, arglist, target);
13524 return ix86_expand_binop_builtin (d->icode, arglist, target);
13527 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13528 if (d->code == fcode)
13529 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13531 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13532 if (d->code == fcode)
13533 return ix86_expand_sse_comi (d, arglist, target);
13535 /* @@@ Should really do something sensible here. */
13536 return 0;
13539 /* Store OPERAND to the memory after reload is completed. This means
13540 that we can't easily use assign_stack_local. */
13542 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13544 rtx result;
13545 if (!reload_completed)
13546 abort ();
13547 if (TARGET_RED_ZONE)
13549 result = gen_rtx_MEM (mode,
13550 gen_rtx_PLUS (Pmode,
13551 stack_pointer_rtx,
13552 GEN_INT (-RED_ZONE_SIZE)));
13553 emit_move_insn (result, operand);
13555 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13557 switch (mode)
13559 case HImode:
13560 case SImode:
13561 operand = gen_lowpart (DImode, operand);
13562 /* FALLTHRU */
13563 case DImode:
13564 emit_insn (
13565 gen_rtx_SET (VOIDmode,
13566 gen_rtx_MEM (DImode,
13567 gen_rtx_PRE_DEC (DImode,
13568 stack_pointer_rtx)),
13569 operand));
13570 break;
13571 default:
13572 abort ();
13574 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13576 else
13578 switch (mode)
13580 case DImode:
13582 rtx operands[2];
13583 split_di (&operand, 1, operands, operands + 1);
13584 emit_insn (
13585 gen_rtx_SET (VOIDmode,
13586 gen_rtx_MEM (SImode,
13587 gen_rtx_PRE_DEC (Pmode,
13588 stack_pointer_rtx)),
13589 operands[1]));
13590 emit_insn (
13591 gen_rtx_SET (VOIDmode,
13592 gen_rtx_MEM (SImode,
13593 gen_rtx_PRE_DEC (Pmode,
13594 stack_pointer_rtx)),
13595 operands[0]));
13597 break;
13598 case HImode:
13599 /* It is better to store HImodes as SImodes. */
13600 if (!TARGET_PARTIAL_REG_STALL)
13601 operand = gen_lowpart (SImode, operand);
13602 /* FALLTHRU */
13603 case SImode:
13604 emit_insn (
13605 gen_rtx_SET (VOIDmode,
13606 gen_rtx_MEM (GET_MODE (operand),
13607 gen_rtx_PRE_DEC (SImode,
13608 stack_pointer_rtx)),
13609 operand));
13610 break;
13611 default:
13612 abort ();
13614 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13616 return result;
13619 /* Free operand from the memory. */
13620 void
13621 ix86_free_from_memory (enum machine_mode mode)
13623 if (!TARGET_RED_ZONE)
13625 int size;
13627 if (mode == DImode || TARGET_64BIT)
13628 size = 8;
13629 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13630 size = 2;
13631 else
13632 size = 4;
13633 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13634 to pop or add instruction if registers are available. */
13635 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13636 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13637 GEN_INT (size))));
13641 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13642 QImode must go into class Q_REGS.
13643 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13644 movdf to do mem-to-mem moves through integer regs. */
13645 enum reg_class
13646 ix86_preferred_reload_class (rtx x, enum reg_class class)
13648 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13649 return NO_REGS;
13650 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13652 /* SSE can't load any constant directly yet. */
13653 if (SSE_CLASS_P (class))
13654 return NO_REGS;
13655 /* Floats can load 0 and 1. */
13656 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13658 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13659 if (MAYBE_SSE_CLASS_P (class))
13660 return (reg_class_subset_p (class, GENERAL_REGS)
13661 ? GENERAL_REGS : FLOAT_REGS);
13662 else
13663 return class;
13665 /* General regs can load everything. */
13666 if (reg_class_subset_p (class, GENERAL_REGS))
13667 return GENERAL_REGS;
13668 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13669 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13670 return NO_REGS;
13672 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13673 return NO_REGS;
13674 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13675 return Q_REGS;
13676 return class;
13679 /* If we are copying between general and FP registers, we need a memory
13680 location. The same is true for SSE and MMX registers.
13682 The macro can't work reliably when one of the CLASSES is class containing
13683 registers from multiple units (SSE, MMX, integer). We avoid this by never
13684 combining those units in single alternative in the machine description.
13685 Ensure that this constraint holds to avoid unexpected surprises.
13687 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13688 enforce these sanity checks. */
13690 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13691 enum machine_mode mode, int strict)
13693 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13694 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13695 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13696 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13697 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13698 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13700 if (strict)
13701 abort ();
13702 else
13703 return 1;
13705 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13706 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13707 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13708 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13709 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
13711 /* Return the cost of moving data from a register in class CLASS1 to
13712 one in class CLASS2.
13714 It is not required that the cost always equal 2 when FROM is the same as TO;
13715 on some machines it is expensive to move between registers if they are not
13716 general registers. */
13718 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13719 enum reg_class class2)
13721 /* In case we require secondary memory, compute cost of the store followed
13722 by load. In order to avoid bad register allocation choices, we need
13723 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13725 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13727 int cost = 1;
13729 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13730 MEMORY_MOVE_COST (mode, class1, 1));
13731 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13732 MEMORY_MOVE_COST (mode, class2, 1));
13734 /* In case of copying from general_purpose_register we may emit multiple
13735 stores followed by single load causing memory size mismatch stall.
13736 Count this as arbitrarily high cost of 20. */
13737 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13738 cost += 20;
13740 /* In the case of FP/MMX moves, the registers actually overlap, and we
13741 have to switch modes in order to treat them differently. */
13742 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13743 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13744 cost += 20;
13746 return cost;
13749 /* Moves between SSE/MMX and integer unit are expensive. */
13750 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13751 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13752 return ix86_cost->mmxsse_to_integer;
13753 if (MAYBE_FLOAT_CLASS_P (class1))
13754 return ix86_cost->fp_move;
13755 if (MAYBE_SSE_CLASS_P (class1))
13756 return ix86_cost->sse_move;
13757 if (MAYBE_MMX_CLASS_P (class1))
13758 return ix86_cost->mmx_move;
13759 return 2;
13762 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13764 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
13766 /* Flags and only flags can only hold CCmode values. */
13767 if (CC_REGNO_P (regno))
13768 return GET_MODE_CLASS (mode) == MODE_CC;
13769 if (GET_MODE_CLASS (mode) == MODE_CC
13770 || GET_MODE_CLASS (mode) == MODE_RANDOM
13771 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13772 return 0;
13773 if (FP_REGNO_P (regno))
13774 return VALID_FP_MODE_P (mode);
13775 if (SSE_REGNO_P (regno))
13776 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
13777 if (MMX_REGNO_P (regno))
13778 return (TARGET_MMX
13779 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
13780 /* We handle both integer and floats in the general purpose registers.
13781 In future we should be able to handle vector modes as well. */
13782 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13783 return 0;
13784 /* Take care for QImode values - they can be in non-QI regs, but then
13785 they do cause partial register stalls. */
13786 if (regno < 4 || mode != QImode || TARGET_64BIT)
13787 return 1;
13788 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13791 /* Return the cost of moving data of mode M between a
13792 register and memory. A value of 2 is the default; this cost is
13793 relative to those in `REGISTER_MOVE_COST'.
13795 If moving between registers and memory is more expensive than
13796 between two registers, you should define this macro to express the
13797 relative cost.
13799 Model also increased moving costs of QImode registers in non
13800 Q_REGS classes.
13803 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
13805 if (FLOAT_CLASS_P (class))
13807 int index;
13808 switch (mode)
13810 case SFmode:
13811 index = 0;
13812 break;
13813 case DFmode:
13814 index = 1;
13815 break;
13816 case XFmode:
13817 index = 2;
13818 break;
13819 default:
13820 return 100;
13822 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13824 if (SSE_CLASS_P (class))
13826 int index;
13827 switch (GET_MODE_SIZE (mode))
13829 case 4:
13830 index = 0;
13831 break;
13832 case 8:
13833 index = 1;
13834 break;
13835 case 16:
13836 index = 2;
13837 break;
13838 default:
13839 return 100;
13841 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13843 if (MMX_CLASS_P (class))
13845 int index;
13846 switch (GET_MODE_SIZE (mode))
13848 case 4:
13849 index = 0;
13850 break;
13851 case 8:
13852 index = 1;
13853 break;
13854 default:
13855 return 100;
13857 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13859 switch (GET_MODE_SIZE (mode))
13861 case 1:
13862 if (in)
13863 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13864 : ix86_cost->movzbl_load);
13865 else
13866 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13867 : ix86_cost->int_store[0] + 4);
13868 break;
13869 case 2:
13870 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13871 default:
13872 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13873 if (mode == TFmode)
13874 mode = XFmode;
13875 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13876 * (((int) GET_MODE_SIZE (mode)
13877 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
13881 /* Compute a (partial) cost for rtx X. Return true if the complete
13882 cost has been computed, and false if subexpressions should be
13883 scanned. In either case, *TOTAL contains the cost result. */
13885 static bool
13886 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
13888 enum machine_mode mode = GET_MODE (x);
13890 switch (code)
13892 case CONST_INT:
13893 case CONST:
13894 case LABEL_REF:
13895 case SYMBOL_REF:
13896 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
13897 *total = 3;
13898 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
13899 *total = 2;
13900 else if (flag_pic && SYMBOLIC_CONST (x)
13901 && (!TARGET_64BIT
13902 || (!GET_CODE (x) != LABEL_REF
13903 && (GET_CODE (x) != SYMBOL_REF
13904 || !SYMBOL_REF_LOCAL_P (x)))))
13905 *total = 1;
13906 else
13907 *total = 0;
13908 return true;
13910 case CONST_DOUBLE:
13911 if (mode == VOIDmode)
13912 *total = 0;
13913 else
13914 switch (standard_80387_constant_p (x))
13916 case 1: /* 0.0 */
13917 *total = 1;
13918 break;
13919 default: /* Other constants */
13920 *total = 2;
13921 break;
13922 case 0:
13923 case -1:
13924 /* Start with (MEM (SYMBOL_REF)), since that's where
13925 it'll probably end up. Add a penalty for size. */
13926 *total = (COSTS_N_INSNS (1)
13927 + (flag_pic != 0 && !TARGET_64BIT)
13928 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
13929 break;
13931 return true;
13933 case ZERO_EXTEND:
13934 /* The zero extensions is often completely free on x86_64, so make
13935 it as cheap as possible. */
13936 if (TARGET_64BIT && mode == DImode
13937 && GET_MODE (XEXP (x, 0)) == SImode)
13938 *total = 1;
13939 else if (TARGET_ZERO_EXTEND_WITH_AND)
13940 *total = COSTS_N_INSNS (ix86_cost->add);
13941 else
13942 *total = COSTS_N_INSNS (ix86_cost->movzx);
13943 return false;
13945 case SIGN_EXTEND:
13946 *total = COSTS_N_INSNS (ix86_cost->movsx);
13947 return false;
13949 case ASHIFT:
13950 if (GET_CODE (XEXP (x, 1)) == CONST_INT
13951 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
13953 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
13954 if (value == 1)
13956 *total = COSTS_N_INSNS (ix86_cost->add);
13957 return false;
13959 if ((value == 2 || value == 3)
13960 && ix86_cost->lea <= ix86_cost->shift_const)
13962 *total = COSTS_N_INSNS (ix86_cost->lea);
13963 return false;
13966 /* FALLTHRU */
13968 case ROTATE:
13969 case ASHIFTRT:
13970 case LSHIFTRT:
13971 case ROTATERT:
13972 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
13974 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13976 if (INTVAL (XEXP (x, 1)) > 32)
13977 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
13978 else
13979 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
13981 else
13983 if (GET_CODE (XEXP (x, 1)) == AND)
13984 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
13985 else
13986 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
13989 else
13991 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13992 *total = COSTS_N_INSNS (ix86_cost->shift_const);
13993 else
13994 *total = COSTS_N_INSNS (ix86_cost->shift_var);
13996 return false;
13998 case MULT:
13999 if (FLOAT_MODE_P (mode))
14001 *total = COSTS_N_INSNS (ix86_cost->fmul);
14002 return false;
14004 else
14006 rtx op0 = XEXP (x, 0);
14007 rtx op1 = XEXP (x, 1);
14008 int nbits;
14009 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14011 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14012 for (nbits = 0; value != 0; value &= value - 1)
14013 nbits++;
14015 else
14016 /* This is arbitrary. */
14017 nbits = 7;
14019 /* Compute costs correctly for widening multiplication. */
14020 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14021 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14022 == GET_MODE_SIZE (mode))
14024 int is_mulwiden = 0;
14025 enum machine_mode inner_mode = GET_MODE (op0);
14027 if (GET_CODE (op0) == GET_CODE (op1))
14028 is_mulwiden = 1, op1 = XEXP (op1, 0);
14029 else if (GET_CODE (op1) == CONST_INT)
14031 if (GET_CODE (op0) == SIGN_EXTEND)
14032 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14033 == INTVAL (op1);
14034 else
14035 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14038 if (is_mulwiden)
14039 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14042 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14043 + nbits * ix86_cost->mult_bit)
14044 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14046 return true;
14049 case DIV:
14050 case UDIV:
14051 case MOD:
14052 case UMOD:
14053 if (FLOAT_MODE_P (mode))
14054 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14055 else
14056 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14057 return false;
14059 case PLUS:
14060 if (FLOAT_MODE_P (mode))
14061 *total = COSTS_N_INSNS (ix86_cost->fadd);
14062 else if (GET_MODE_CLASS (mode) == MODE_INT
14063 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14065 if (GET_CODE (XEXP (x, 0)) == PLUS
14066 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14067 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14068 && CONSTANT_P (XEXP (x, 1)))
14070 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14071 if (val == 2 || val == 4 || val == 8)
14073 *total = COSTS_N_INSNS (ix86_cost->lea);
14074 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14075 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14076 outer_code);
14077 *total += rtx_cost (XEXP (x, 1), outer_code);
14078 return true;
14081 else if (GET_CODE (XEXP (x, 0)) == MULT
14082 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14084 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14085 if (val == 2 || val == 4 || val == 8)
14087 *total = COSTS_N_INSNS (ix86_cost->lea);
14088 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14089 *total += rtx_cost (XEXP (x, 1), outer_code);
14090 return true;
14093 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14095 *total = COSTS_N_INSNS (ix86_cost->lea);
14096 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14097 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14098 *total += rtx_cost (XEXP (x, 1), outer_code);
14099 return true;
14102 /* FALLTHRU */
14104 case MINUS:
14105 if (FLOAT_MODE_P (mode))
14107 *total = COSTS_N_INSNS (ix86_cost->fadd);
14108 return false;
14110 /* FALLTHRU */
14112 case AND:
14113 case IOR:
14114 case XOR:
14115 if (!TARGET_64BIT && mode == DImode)
14117 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14118 + (rtx_cost (XEXP (x, 0), outer_code)
14119 << (GET_MODE (XEXP (x, 0)) != DImode))
14120 + (rtx_cost (XEXP (x, 1), outer_code)
14121 << (GET_MODE (XEXP (x, 1)) != DImode)));
14122 return true;
14124 /* FALLTHRU */
14126 case NEG:
14127 if (FLOAT_MODE_P (mode))
14129 *total = COSTS_N_INSNS (ix86_cost->fchs);
14130 return false;
14132 /* FALLTHRU */
14134 case NOT:
14135 if (!TARGET_64BIT && mode == DImode)
14136 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14137 else
14138 *total = COSTS_N_INSNS (ix86_cost->add);
14139 return false;
14141 case FLOAT_EXTEND:
14142 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14143 *total = 0;
14144 return false;
14146 case ABS:
14147 if (FLOAT_MODE_P (mode))
14148 *total = COSTS_N_INSNS (ix86_cost->fabs);
14149 return false;
14151 case SQRT:
14152 if (FLOAT_MODE_P (mode))
14153 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14154 return false;
14156 case UNSPEC:
14157 if (XINT (x, 1) == UNSPEC_TP)
14158 *total = 0;
14159 return false;
14161 default:
14162 return false;
14166 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14167 static void
14168 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14170 init_section ();
14171 fputs ("\tpushl $", asm_out_file);
14172 assemble_name (asm_out_file, XSTR (symbol, 0));
14173 fputc ('\n', asm_out_file);
14175 #endif
14177 #if TARGET_MACHO
14179 static int current_machopic_label_num;
14181 /* Given a symbol name and its associated stub, write out the
14182 definition of the stub. */
14184 void
14185 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14187 unsigned int length;
14188 char *binder_name, *symbol_name, lazy_ptr_name[32];
14189 int label = ++current_machopic_label_num;
14191 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14192 symb = (*targetm.strip_name_encoding) (symb);
14194 length = strlen (stub);
14195 binder_name = alloca (length + 32);
14196 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14198 length = strlen (symb);
14199 symbol_name = alloca (length + 32);
14200 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14202 sprintf (lazy_ptr_name, "L%d$lz", label);
14204 if (MACHOPIC_PURE)
14205 machopic_picsymbol_stub_section ();
14206 else
14207 machopic_symbol_stub_section ();
14209 fprintf (file, "%s:\n", stub);
14210 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14212 if (MACHOPIC_PURE)
14214 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14215 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14216 fprintf (file, "\tjmp %%edx\n");
14218 else
14219 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14221 fprintf (file, "%s:\n", binder_name);
14223 if (MACHOPIC_PURE)
14225 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14226 fprintf (file, "\tpushl %%eax\n");
14228 else
14229 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14231 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14233 machopic_lazy_symbol_ptr_section ();
14234 fprintf (file, "%s:\n", lazy_ptr_name);
14235 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14236 fprintf (file, "\t.long %s\n", binder_name);
14238 #endif /* TARGET_MACHO */
14240 /* Order the registers for register allocator. */
14242 void
14243 x86_order_regs_for_local_alloc (void)
14245 int pos = 0;
14246 int i;
14248 /* First allocate the local general purpose registers. */
14249 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14250 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14251 reg_alloc_order [pos++] = i;
14253 /* Global general purpose registers. */
14254 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14255 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14256 reg_alloc_order [pos++] = i;
14258 /* x87 registers come first in case we are doing FP math
14259 using them. */
14260 if (!TARGET_SSE_MATH)
14261 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14262 reg_alloc_order [pos++] = i;
14264 /* SSE registers. */
14265 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14266 reg_alloc_order [pos++] = i;
14267 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14268 reg_alloc_order [pos++] = i;
14270 /* x87 registers. */
14271 if (TARGET_SSE_MATH)
14272 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14273 reg_alloc_order [pos++] = i;
14275 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14276 reg_alloc_order [pos++] = i;
14278 /* Initialize the rest of array as we do not allocate some registers
14279 at all. */
14280 while (pos < FIRST_PSEUDO_REGISTER)
14281 reg_alloc_order [pos++] = 0;
14284 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14285 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14286 #endif
14288 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14289 struct attribute_spec.handler. */
14290 static tree
14291 ix86_handle_struct_attribute (tree *node, tree name,
14292 tree args ATTRIBUTE_UNUSED,
14293 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14295 tree *type = NULL;
14296 if (DECL_P (*node))
14298 if (TREE_CODE (*node) == TYPE_DECL)
14299 type = &TREE_TYPE (*node);
14301 else
14302 type = node;
14304 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14305 || TREE_CODE (*type) == UNION_TYPE)))
14307 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14308 *no_add_attrs = true;
14311 else if ((is_attribute_p ("ms_struct", name)
14312 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14313 || ((is_attribute_p ("gcc_struct", name)
14314 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14316 warning ("`%s' incompatible attribute ignored",
14317 IDENTIFIER_POINTER (name));
14318 *no_add_attrs = true;
14321 return NULL_TREE;
14324 static bool
14325 ix86_ms_bitfield_layout_p (tree record_type)
14327 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14328 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14329 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14332 /* Returns an expression indicating where the this parameter is
14333 located on entry to the FUNCTION. */
14335 static rtx
14336 x86_this_parameter (tree function)
14338 tree type = TREE_TYPE (function);
14340 if (TARGET_64BIT)
14342 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14343 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14346 if (ix86_function_regparm (type, function) > 0)
14348 tree parm;
14350 parm = TYPE_ARG_TYPES (type);
14351 /* Figure out whether or not the function has a variable number of
14352 arguments. */
14353 for (; parm; parm = TREE_CHAIN (parm))
14354 if (TREE_VALUE (parm) == void_type_node)
14355 break;
14356 /* If not, the this parameter is in the first argument. */
14357 if (parm)
14359 int regno = 0;
14360 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14361 regno = 2;
14362 return gen_rtx_REG (SImode, regno);
14366 if (aggregate_value_p (TREE_TYPE (type), type))
14367 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14368 else
14369 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14372 /* Determine whether x86_output_mi_thunk can succeed. */
14374 static bool
14375 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14376 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14377 HOST_WIDE_INT vcall_offset, tree function)
14379 /* 64-bit can handle anything. */
14380 if (TARGET_64BIT)
14381 return true;
14383 /* For 32-bit, everything's fine if we have one free register. */
14384 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14385 return true;
14387 /* Need a free register for vcall_offset. */
14388 if (vcall_offset)
14389 return false;
14391 /* Need a free register for GOT references. */
14392 if (flag_pic && !(*targetm.binds_local_p) (function))
14393 return false;
14395 /* Otherwise ok. */
14396 return true;
14399 /* Output the assembler code for a thunk function. THUNK_DECL is the
14400 declaration for the thunk function itself, FUNCTION is the decl for
14401 the target function. DELTA is an immediate constant offset to be
14402 added to THIS. If VCALL_OFFSET is nonzero, the word at
14403 *(*this + vcall_offset) should be added to THIS. */
14405 static void
14406 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14407 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14408 HOST_WIDE_INT vcall_offset, tree function)
14410 rtx xops[3];
14411 rtx this = x86_this_parameter (function);
14412 rtx this_reg, tmp;
14414 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14415 pull it in now and let DELTA benefit. */
14416 if (REG_P (this))
14417 this_reg = this;
14418 else if (vcall_offset)
14420 /* Put the this parameter into %eax. */
14421 xops[0] = this;
14422 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14423 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14425 else
14426 this_reg = NULL_RTX;
14428 /* Adjust the this parameter by a fixed constant. */
14429 if (delta)
14431 xops[0] = GEN_INT (delta);
14432 xops[1] = this_reg ? this_reg : this;
14433 if (TARGET_64BIT)
14435 if (!x86_64_general_operand (xops[0], DImode))
14437 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14438 xops[1] = tmp;
14439 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14440 xops[0] = tmp;
14441 xops[1] = this;
14443 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14445 else
14446 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14449 /* Adjust the this parameter by a value stored in the vtable. */
14450 if (vcall_offset)
14452 if (TARGET_64BIT)
14453 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14454 else
14456 int tmp_regno = 2 /* ECX */;
14457 if (lookup_attribute ("fastcall",
14458 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14459 tmp_regno = 0 /* EAX */;
14460 tmp = gen_rtx_REG (SImode, tmp_regno);
14463 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14464 xops[1] = tmp;
14465 if (TARGET_64BIT)
14466 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14467 else
14468 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14470 /* Adjust the this parameter. */
14471 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14472 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14474 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14475 xops[0] = GEN_INT (vcall_offset);
14476 xops[1] = tmp2;
14477 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14478 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14480 xops[1] = this_reg;
14481 if (TARGET_64BIT)
14482 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14483 else
14484 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14487 /* If necessary, drop THIS back to its stack slot. */
14488 if (this_reg && this_reg != this)
14490 xops[0] = this_reg;
14491 xops[1] = this;
14492 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14495 xops[0] = XEXP (DECL_RTL (function), 0);
14496 if (TARGET_64BIT)
14498 if (!flag_pic || (*targetm.binds_local_p) (function))
14499 output_asm_insn ("jmp\t%P0", xops);
14500 else
14502 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14503 tmp = gen_rtx_CONST (Pmode, tmp);
14504 tmp = gen_rtx_MEM (QImode, tmp);
14505 xops[0] = tmp;
14506 output_asm_insn ("jmp\t%A0", xops);
14509 else
14511 if (!flag_pic || (*targetm.binds_local_p) (function))
14512 output_asm_insn ("jmp\t%P0", xops);
14513 else
14514 #if TARGET_MACHO
14515 if (TARGET_MACHO)
14517 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14518 tmp = (gen_rtx_SYMBOL_REF
14519 (Pmode,
14520 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14521 tmp = gen_rtx_MEM (QImode, tmp);
14522 xops[0] = tmp;
14523 output_asm_insn ("jmp\t%0", xops);
14525 else
14526 #endif /* TARGET_MACHO */
14528 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14529 output_set_got (tmp);
14531 xops[1] = tmp;
14532 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14533 output_asm_insn ("jmp\t{*}%1", xops);
14538 static void
14539 x86_file_start (void)
14541 default_file_start ();
14542 if (X86_FILE_START_VERSION_DIRECTIVE)
14543 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14544 if (X86_FILE_START_FLTUSED)
14545 fputs ("\t.global\t__fltused\n", asm_out_file);
14546 if (ix86_asm_dialect == ASM_INTEL)
14547 fputs ("\t.intel_syntax\n", asm_out_file);
14551 x86_field_alignment (tree field, int computed)
14553 enum machine_mode mode;
14554 tree type = TREE_TYPE (field);
14556 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14557 return computed;
14558 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14559 ? get_inner_array_type (type) : type);
14560 if (mode == DFmode || mode == DCmode
14561 || GET_MODE_CLASS (mode) == MODE_INT
14562 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14563 return MIN (32, computed);
14564 return computed;
14567 /* Output assembler code to FILE to increment profiler label # LABELNO
14568 for profiling a function entry. */
14569 void
14570 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14572 if (TARGET_64BIT)
14573 if (flag_pic)
14575 #ifndef NO_PROFILE_COUNTERS
14576 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14577 #endif
14578 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14580 else
14582 #ifndef NO_PROFILE_COUNTERS
14583 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14584 #endif
14585 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14587 else if (flag_pic)
14589 #ifndef NO_PROFILE_COUNTERS
14590 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14591 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14592 #endif
14593 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14595 else
14597 #ifndef NO_PROFILE_COUNTERS
14598 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14599 PROFILE_COUNT_REGISTER);
14600 #endif
14601 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14605 /* We don't have exact information about the insn sizes, but we may assume
14606 quite safely that we are informed about all 1 byte insns and memory
14607 address sizes. This is enough to eliminate unnecessary padding in
14608 99% of cases. */
14610 static int
14611 min_insn_size (rtx insn)
14613 int l = 0;
14615 if (!INSN_P (insn) || !active_insn_p (insn))
14616 return 0;
14618 /* Discard alignments we've emit and jump instructions. */
14619 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14620 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14621 return 0;
14622 if (GET_CODE (insn) == JUMP_INSN
14623 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14624 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14625 return 0;
14627 /* Important case - calls are always 5 bytes.
14628 It is common to have many calls in the row. */
14629 if (GET_CODE (insn) == CALL_INSN
14630 && symbolic_reference_mentioned_p (PATTERN (insn))
14631 && !SIBLING_CALL_P (insn))
14632 return 5;
14633 if (get_attr_length (insn) <= 1)
14634 return 1;
14636 /* For normal instructions we may rely on the sizes of addresses
14637 and the presence of symbol to require 4 bytes of encoding.
14638 This is not the case for jumps where references are PC relative. */
14639 if (GET_CODE (insn) != JUMP_INSN)
14641 l = get_attr_length_address (insn);
14642 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14643 l = 4;
14645 if (l)
14646 return 1+l;
14647 else
14648 return 2;
14651 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14652 window. */
14654 static void
14655 ix86_avoid_jump_misspredicts (void)
14657 rtx insn, start = get_insns ();
14658 int nbytes = 0, njumps = 0;
14659 int isjump = 0;
14661 /* Look for all minimal intervals of instructions containing 4 jumps.
14662 The intervals are bounded by START and INSN. NBYTES is the total
14663 size of instructions in the interval including INSN and not including
14664 START. When the NBYTES is smaller than 16 bytes, it is possible
14665 that the end of START and INSN ends up in the same 16byte page.
14667 The smallest offset in the page INSN can start is the case where START
14668 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14669 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14671 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14674 nbytes += min_insn_size (insn);
14675 if (dump_file)
14676 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
14677 INSN_UID (insn), min_insn_size (insn));
14678 if ((GET_CODE (insn) == JUMP_INSN
14679 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14680 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14681 || GET_CODE (insn) == CALL_INSN)
14682 njumps++;
14683 else
14684 continue;
14686 while (njumps > 3)
14688 start = NEXT_INSN (start);
14689 if ((GET_CODE (start) == JUMP_INSN
14690 && GET_CODE (PATTERN (start)) != ADDR_VEC
14691 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14692 || GET_CODE (start) == CALL_INSN)
14693 njumps--, isjump = 1;
14694 else
14695 isjump = 0;
14696 nbytes -= min_insn_size (start);
14698 if (njumps < 0)
14699 abort ();
14700 if (dump_file)
14701 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
14702 INSN_UID (start), INSN_UID (insn), nbytes);
14704 if (njumps == 3 && isjump && nbytes < 16)
14706 int padsize = 15 - nbytes + min_insn_size (insn);
14708 if (dump_file)
14709 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14710 INSN_UID (insn), padsize);
14711 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14716 /* AMD Athlon works faster
14717 when RET is not destination of conditional jump or directly preceded
14718 by other jump instruction. We avoid the penalty by inserting NOP just
14719 before the RET instructions in such cases. */
14720 static void
14721 ix86_pad_returns (void)
14723 edge e;
14725 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14727 basic_block bb = e->src;
14728 rtx ret = BB_END (bb);
14729 rtx prev;
14730 bool replace = false;
14732 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
14733 || !maybe_hot_bb_p (bb))
14734 continue;
14735 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14736 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14737 break;
14738 if (prev && GET_CODE (prev) == CODE_LABEL)
14740 edge e;
14741 for (e = bb->pred; e; e = e->pred_next)
14742 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14743 && !(e->flags & EDGE_FALLTHRU))
14744 replace = true;
14746 if (!replace)
14748 prev = prev_active_insn (ret);
14749 if (prev
14750 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
14751 || GET_CODE (prev) == CALL_INSN))
14752 replace = true;
14753 /* Empty functions get branch mispredict even when the jump destination
14754 is not visible to us. */
14755 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14756 replace = true;
14758 if (replace)
14760 emit_insn_before (gen_return_internal_long (), ret);
14761 delete_insn (ret);
14766 /* Implement machine specific optimizations. We implement padding of returns
14767 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14768 static void
14769 ix86_reorg (void)
14771 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
14772 ix86_pad_returns ();
14773 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
14774 ix86_avoid_jump_misspredicts ();
14777 /* Return nonzero when QImode register that must be represented via REX prefix
14778 is used. */
14779 bool
14780 x86_extended_QIreg_mentioned_p (rtx insn)
14782 int i;
14783 extract_insn_cached (insn);
14784 for (i = 0; i < recog_data.n_operands; i++)
14785 if (REG_P (recog_data.operand[i])
14786 && REGNO (recog_data.operand[i]) >= 4)
14787 return true;
14788 return false;
14791 /* Return nonzero when P points to register encoded via REX prefix.
14792 Called via for_each_rtx. */
14793 static int
14794 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
14796 unsigned int regno;
14797 if (!REG_P (*p))
14798 return 0;
14799 regno = REGNO (*p);
14800 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14803 /* Return true when INSN mentions register that must be encoded using REX
14804 prefix. */
14805 bool
14806 x86_extended_reg_mentioned_p (rtx insn)
14808 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14811 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
14812 optabs would emit if we didn't have TFmode patterns. */
14814 void
14815 x86_emit_floatuns (rtx operands[2])
14817 rtx neglab, donelab, i0, i1, f0, in, out;
14818 enum machine_mode mode, inmode;
14820 inmode = GET_MODE (operands[1]);
14821 if (inmode != SImode
14822 && inmode != DImode)
14823 abort ();
14825 out = operands[0];
14826 in = force_reg (inmode, operands[1]);
14827 mode = GET_MODE (out);
14828 neglab = gen_label_rtx ();
14829 donelab = gen_label_rtx ();
14830 i1 = gen_reg_rtx (Pmode);
14831 f0 = gen_reg_rtx (mode);
14833 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
14835 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
14836 emit_jump_insn (gen_jump (donelab));
14837 emit_barrier ();
14839 emit_label (neglab);
14841 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
14842 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
14843 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
14844 expand_float (f0, i0, 0);
14845 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
14847 emit_label (donelab);
14850 /* Initialize vector TARGET via VALS. */
14851 void
14852 ix86_expand_vector_init (rtx target, rtx vals)
14854 enum machine_mode mode = GET_MODE (target);
14855 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
14856 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
14857 int i;
14859 for (i = n_elts - 1; i >= 0; i--)
14860 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
14861 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
14862 break;
14864 /* Few special cases first...
14865 ... constants are best loaded from constant pool. */
14866 if (i < 0)
14868 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
14869 return;
14872 /* ... values where only first field is non-constant are best loaded
14873 from the pool and overwritten via move later. */
14874 if (!i)
14876 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
14877 GET_MODE_INNER (mode), 0);
14879 op = force_reg (mode, op);
14880 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
14881 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
14882 switch (GET_MODE (target))
14884 case V2DFmode:
14885 emit_insn (gen_sse2_movsd (target, target, op));
14886 break;
14887 case V4SFmode:
14888 emit_insn (gen_sse_movss (target, target, op));
14889 break;
14890 default:
14891 break;
14893 return;
14896 /* And the busy sequence doing rotations. */
14897 switch (GET_MODE (target))
14899 case V2DFmode:
14901 rtx vecop0 =
14902 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
14903 rtx vecop1 =
14904 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
14906 vecop0 = force_reg (V2DFmode, vecop0);
14907 vecop1 = force_reg (V2DFmode, vecop1);
14908 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
14910 break;
14911 case V4SFmode:
14913 rtx vecop0 =
14914 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
14915 rtx vecop1 =
14916 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
14917 rtx vecop2 =
14918 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
14919 rtx vecop3 =
14920 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
14921 rtx tmp1 = gen_reg_rtx (V4SFmode);
14922 rtx tmp2 = gen_reg_rtx (V4SFmode);
14924 vecop0 = force_reg (V4SFmode, vecop0);
14925 vecop1 = force_reg (V4SFmode, vecop1);
14926 vecop2 = force_reg (V4SFmode, vecop2);
14927 vecop3 = force_reg (V4SFmode, vecop3);
14928 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
14929 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
14930 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
14932 break;
14933 default:
14934 abort ();
14938 /* Implements target hook vector_mode_supported_p. */
14939 static bool
14940 ix86_vector_mode_supported_p (enum machine_mode mode)
14942 if (TARGET_SSE
14943 && VALID_SSE_REG_MODE (mode))
14944 return true;
14946 else if (TARGET_MMX
14947 && VALID_MMX_REG_MODE (mode))
14948 return true;
14950 else if (TARGET_3DNOW
14951 && VALID_MMX_REG_MODE_3DNOW (mode))
14952 return true;
14954 else
14955 return false;
14958 /* Worker function for TARGET_MD_ASM_CLOBBERS.
14960 We do this in the new i386 backend to maintain source compatibility
14961 with the old cc0-based compiler. */
14963 static tree
14964 ix86_md_asm_clobbers (tree clobbers)
14966 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
14967 clobbers);
14968 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
14969 clobbers);
14970 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
14971 clobbers);
14972 return clobbers;
14975 /* Worker function for REVERSE_CONDITION. */
14977 enum rtx_code
14978 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
14980 return (mode != CCFPmode && mode != CCFPUmode
14981 ? reverse_condition (code)
14982 : reverse_condition_maybe_unordered (code));
14985 /* Output code to perform an x87 FP register move, from OPERANDS[1]
14986 to OPERANDS[0]. */
14988 const char *
14989 output_387_reg_move (rtx insn, rtx *operands)
14991 if (REG_P (operands[1])
14992 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14994 if (REGNO (operands[0]) == FIRST_STACK_REG
14995 && TARGET_USE_FFREEP)
14996 return "ffreep\t%y0";
14997 return "fstp\t%y0";
14999 if (STACK_TOP_P (operands[0]))
15000 return "fld%z1\t%y1";
15001 return "fst\t%y0";
15004 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15005 FP status register is set. */
15007 void
15008 ix86_emit_fp_unordered_jump (rtx label)
15010 rtx reg = gen_reg_rtx (HImode);
15011 rtx temp;
15013 emit_insn (gen_x86_fnstsw_1 (reg));
15015 if (TARGET_USE_SAHF)
15017 emit_insn (gen_x86_sahf_1 (reg));
15019 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15020 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15022 else
15024 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15026 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15027 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15030 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15031 gen_rtx_LABEL_REF (VOIDmode, label),
15032 pc_rtx);
15033 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15034 emit_jump_insn (temp);
15037 /* Output code to perform a log1p XFmode calculation. */
15039 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15041 rtx label1 = gen_label_rtx ();
15042 rtx label2 = gen_label_rtx ();
15044 rtx tmp = gen_reg_rtx (XFmode);
15045 rtx tmp2 = gen_reg_rtx (XFmode);
15047 emit_insn (gen_absxf2 (tmp, op1));
15048 emit_insn (gen_cmpxf (tmp,
15049 CONST_DOUBLE_FROM_REAL_VALUE (
15050 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15051 XFmode)));
15052 emit_jump_insn (gen_bge (label1));
15054 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15055 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15056 emit_jump (label2);
15058 emit_label (label1);
15059 emit_move_insn (tmp, CONST1_RTX (XFmode));
15060 emit_insn (gen_addxf3 (tmp, op1, tmp));
15061 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15062 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15064 emit_label (label2);
15067 #include "gt-i386.h"