* emit-rtl.c (gen_const_mem): New.
[official-gcc.git] / gcc / config / i386 / i386.c
blob9ae3972c4627fb938f5aa088a34b7cb2760061d9
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
738 [arguments]
739 <- ARG_POINTER
740 saved pc
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
746 [padding1] \
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
752 [padding2] /
754 struct ix86_frame
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 char internal_label_prefix[16];
833 int internal_label_prefix_len;
835 static void output_pic_addr_const (FILE *, rtx, int);
836 static void put_condition_code (enum rtx_code, enum machine_mode,
837 int, int, FILE *);
838 static const char *get_some_local_dynamic_name (void);
839 static int get_some_local_dynamic_name_1 (rtx *, void *);
840 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
841 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
842 rtx *);
843 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
844 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
845 enum machine_mode);
846 static rtx get_thread_pointer (int);
847 static rtx legitimize_tls_address (rtx, enum tls_model, int);
848 static void get_pc_thunk_name (char [32], unsigned int);
849 static rtx gen_push (rtx);
850 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
851 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
852 static struct machine_function * ix86_init_machine_status (void);
853 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
854 static int ix86_nsaved_regs (void);
855 static void ix86_emit_save_regs (void);
856 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
857 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
858 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
859 static HOST_WIDE_INT ix86_GOT_alias_set (void);
860 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
861 static rtx ix86_expand_aligntest (rtx, int);
862 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
863 static int ix86_issue_rate (void);
864 static int ix86_adjust_cost (rtx, rtx, rtx, int);
865 static int ia32_multipass_dfa_lookahead (void);
866 static void ix86_init_mmx_sse_builtins (void);
867 static rtx x86_this_parameter (tree);
868 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
869 HOST_WIDE_INT, tree);
870 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
871 static void x86_file_start (void);
872 static void ix86_reorg (void);
873 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
874 static tree ix86_build_builtin_va_list (void);
875 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
876 tree, int *, int);
877 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
879 static int ix86_address_cost (rtx);
880 static bool ix86_cannot_force_const_mem (rtx);
881 static rtx ix86_delegitimize_address (rtx);
883 struct builtin_description;
884 static rtx ix86_expand_sse_comi (const struct builtin_description *,
885 tree, rtx);
886 static rtx ix86_expand_sse_compare (const struct builtin_description *,
887 tree, rtx);
888 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
889 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
890 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
891 static rtx ix86_expand_store_builtin (enum insn_code, tree);
892 static rtx safe_vector_operand (rtx, enum machine_mode);
893 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
894 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
895 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
896 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
897 static int ix86_fp_comparison_cost (enum rtx_code code);
898 static unsigned int ix86_select_alt_pic_regnum (void);
899 static int ix86_save_reg (unsigned int, int);
900 static void ix86_compute_frame_layout (struct ix86_frame *);
901 static int ix86_comp_type_attributes (tree, tree);
902 static int ix86_function_regparm (tree, tree);
903 const struct attribute_spec ix86_attribute_table[];
904 static bool ix86_function_ok_for_sibcall (tree, tree);
905 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
906 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
907 static int ix86_value_regno (enum machine_mode);
908 static bool contains_128bit_aligned_vector_p (tree);
909 static rtx ix86_struct_value_rtx (tree, int);
910 static bool ix86_ms_bitfield_layout_p (tree);
911 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
912 static int extended_reg_mentioned_1 (rtx *, void *);
913 static bool ix86_rtx_costs (rtx, int, int, int *);
914 static int min_insn_size (rtx);
915 static tree ix86_md_asm_clobbers (tree clobbers);
916 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
917 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
918 tree, bool);
920 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
921 static void ix86_svr3_asm_out_constructor (rtx, int);
922 #endif
924 /* Register class used for passing given 64bit part of the argument.
925 These represent classes as documented by the PS ABI, with the exception
926 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
927 use SF or DFmode move instead of DImode to avoid reformatting penalties.
929 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
930 whenever possible (upper half does contain padding).
932 enum x86_64_reg_class
934 X86_64_NO_CLASS,
935 X86_64_INTEGER_CLASS,
936 X86_64_INTEGERSI_CLASS,
937 X86_64_SSE_CLASS,
938 X86_64_SSESF_CLASS,
939 X86_64_SSEDF_CLASS,
940 X86_64_SSEUP_CLASS,
941 X86_64_X87_CLASS,
942 X86_64_X87UP_CLASS,
943 X86_64_MEMORY_CLASS
945 static const char * const x86_64_reg_class_name[] =
946 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
948 #define MAX_CLASSES 4
949 static int classify_argument (enum machine_mode, tree,
950 enum x86_64_reg_class [MAX_CLASSES], int);
951 static int examine_argument (enum machine_mode, tree, int, int *, int *);
952 static rtx construct_container (enum machine_mode, tree, int, int, int,
953 const int *, int);
954 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
955 enum x86_64_reg_class);
957 /* Table of constants used by fldpi, fldln2, etc.... */
958 static REAL_VALUE_TYPE ext_80387_constants_table [5];
959 static bool ext_80387_constants_init = 0;
960 static void init_ext_80387_constants (void);
962 /* Initialize the GCC target structure. */
963 #undef TARGET_ATTRIBUTE_TABLE
964 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
965 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
966 # undef TARGET_MERGE_DECL_ATTRIBUTES
967 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
968 #endif
970 #undef TARGET_COMP_TYPE_ATTRIBUTES
971 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973 #undef TARGET_INIT_BUILTINS
974 #define TARGET_INIT_BUILTINS ix86_init_builtins
976 #undef TARGET_EXPAND_BUILTIN
977 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979 #undef TARGET_ASM_FUNCTION_EPILOGUE
980 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
982 #undef TARGET_ASM_OPEN_PAREN
983 #define TARGET_ASM_OPEN_PAREN ""
984 #undef TARGET_ASM_CLOSE_PAREN
985 #define TARGET_ASM_CLOSE_PAREN ""
987 #undef TARGET_ASM_ALIGNED_HI_OP
988 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
989 #undef TARGET_ASM_ALIGNED_SI_OP
990 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
991 #ifdef ASM_QUAD
992 #undef TARGET_ASM_ALIGNED_DI_OP
993 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
994 #endif
996 #undef TARGET_ASM_UNALIGNED_HI_OP
997 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
998 #undef TARGET_ASM_UNALIGNED_SI_OP
999 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1000 #undef TARGET_ASM_UNALIGNED_DI_OP
1001 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1003 #undef TARGET_SCHED_ADJUST_COST
1004 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1005 #undef TARGET_SCHED_ISSUE_RATE
1006 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1007 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1008 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1009 ia32_multipass_dfa_lookahead
1011 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1012 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1014 #ifdef HAVE_AS_TLS
1015 #undef TARGET_HAVE_TLS
1016 #define TARGET_HAVE_TLS true
1017 #endif
1018 #undef TARGET_CANNOT_FORCE_CONST_MEM
1019 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1021 #undef TARGET_DELEGITIMIZE_ADDRESS
1022 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1024 #undef TARGET_MS_BITFIELD_LAYOUT_P
1025 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1027 #undef TARGET_ASM_OUTPUT_MI_THUNK
1028 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1029 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1030 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1032 #undef TARGET_ASM_FILE_START
1033 #define TARGET_ASM_FILE_START x86_file_start
1035 #undef TARGET_RTX_COSTS
1036 #define TARGET_RTX_COSTS ix86_rtx_costs
1037 #undef TARGET_ADDRESS_COST
1038 #define TARGET_ADDRESS_COST ix86_address_cost
1040 #undef TARGET_FIXED_CONDITION_CODE_REGS
1041 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1042 #undef TARGET_CC_MODES_COMPATIBLE
1043 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1045 #undef TARGET_MACHINE_DEPENDENT_REORG
1046 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1048 #undef TARGET_BUILD_BUILTIN_VA_LIST
1049 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1051 #undef TARGET_MD_ASM_CLOBBERS
1052 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1054 #undef TARGET_PROMOTE_PROTOTYPES
1055 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1056 #undef TARGET_STRUCT_VALUE_RTX
1057 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1058 #undef TARGET_SETUP_INCOMING_VARARGS
1059 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1060 #undef TARGET_MUST_PASS_IN_STACK
1061 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1062 #undef TARGET_PASS_BY_REFERENCE
1063 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1065 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1066 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1068 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1069 #undef TARGET_INSERT_ATTRIBUTES
1070 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1071 #endif
1073 struct gcc_target targetm = TARGET_INITIALIZER;
1076 /* The svr4 ABI for the i386 says that records and unions are returned
1077 in memory. */
1078 #ifndef DEFAULT_PCC_STRUCT_RETURN
1079 #define DEFAULT_PCC_STRUCT_RETURN 1
1080 #endif
1082 /* Sometimes certain combinations of command options do not make
1083 sense on a particular target machine. You can define a macro
1084 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1085 defined, is executed once just after all the command options have
1086 been parsed.
1088 Don't use this macro to turn on various extra optimizations for
1089 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1091 void
1092 override_options (void)
1094 int i;
1095 int ix86_tune_defaulted = 0;
1097 /* Comes from final.c -- no real reason to change it. */
1098 #define MAX_CODE_ALIGN 16
1100 static struct ptt
1102 const struct processor_costs *cost; /* Processor costs */
1103 const int target_enable; /* Target flags to enable. */
1104 const int target_disable; /* Target flags to disable. */
1105 const int align_loop; /* Default alignments. */
1106 const int align_loop_max_skip;
1107 const int align_jump;
1108 const int align_jump_max_skip;
1109 const int align_func;
1111 const processor_target_table[PROCESSOR_max] =
1113 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1114 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1115 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1116 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1117 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1118 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1120 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1121 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1124 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1125 static struct pta
1127 const char *const name; /* processor name or nickname. */
1128 const enum processor_type processor;
1129 const enum pta_flags
1131 PTA_SSE = 1,
1132 PTA_SSE2 = 2,
1133 PTA_SSE3 = 4,
1134 PTA_MMX = 8,
1135 PTA_PREFETCH_SSE = 16,
1136 PTA_3DNOW = 32,
1137 PTA_3DNOW_A = 64,
1138 PTA_64BIT = 128
1139 } flags;
1141 const processor_alias_table[] =
1143 {"i386", PROCESSOR_I386, 0},
1144 {"i486", PROCESSOR_I486, 0},
1145 {"i586", PROCESSOR_PENTIUM, 0},
1146 {"pentium", PROCESSOR_PENTIUM, 0},
1147 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1148 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1149 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1150 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1151 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1152 {"i686", PROCESSOR_PENTIUMPRO, 0},
1153 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1154 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1155 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1156 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1157 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1158 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1159 | PTA_MMX | PTA_PREFETCH_SSE},
1160 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1161 | PTA_MMX | PTA_PREFETCH_SSE},
1162 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1163 | PTA_MMX | PTA_PREFETCH_SSE},
1164 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1165 | PTA_MMX | PTA_PREFETCH_SSE},
1166 {"k6", PROCESSOR_K6, PTA_MMX},
1167 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1168 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1169 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1170 | PTA_3DNOW_A},
1171 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1172 | PTA_3DNOW | PTA_3DNOW_A},
1173 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1174 | PTA_3DNOW_A | PTA_SSE},
1175 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1176 | PTA_3DNOW_A | PTA_SSE},
1177 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1178 | PTA_3DNOW_A | PTA_SSE},
1179 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1180 | PTA_SSE | PTA_SSE2 },
1181 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1182 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1183 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1184 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1185 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1186 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1187 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1188 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1191 int const pta_size = ARRAY_SIZE (processor_alias_table);
1193 /* Set the default values for switches whose default depends on TARGET_64BIT
1194 in case they weren't overwritten by command line options. */
1195 if (TARGET_64BIT)
1197 if (flag_omit_frame_pointer == 2)
1198 flag_omit_frame_pointer = 1;
1199 if (flag_asynchronous_unwind_tables == 2)
1200 flag_asynchronous_unwind_tables = 1;
1201 if (flag_pcc_struct_return == 2)
1202 flag_pcc_struct_return = 0;
1204 else
1206 if (flag_omit_frame_pointer == 2)
1207 flag_omit_frame_pointer = 0;
1208 if (flag_asynchronous_unwind_tables == 2)
1209 flag_asynchronous_unwind_tables = 0;
1210 if (flag_pcc_struct_return == 2)
1211 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1214 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1215 SUBTARGET_OVERRIDE_OPTIONS;
1216 #endif
1218 if (!ix86_tune_string && ix86_arch_string)
1219 ix86_tune_string = ix86_arch_string;
1220 if (!ix86_tune_string)
1222 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1223 ix86_tune_defaulted = 1;
1225 if (!ix86_arch_string)
1226 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1228 if (ix86_cmodel_string != 0)
1230 if (!strcmp (ix86_cmodel_string, "small"))
1231 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1232 else if (flag_pic)
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1234 else if (!strcmp (ix86_cmodel_string, "32"))
1235 ix86_cmodel = CM_32;
1236 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1237 ix86_cmodel = CM_KERNEL;
1238 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1239 ix86_cmodel = CM_MEDIUM;
1240 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1241 ix86_cmodel = CM_LARGE;
1242 else
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1245 else
1247 ix86_cmodel = CM_32;
1248 if (TARGET_64BIT)
1249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1251 if (ix86_asm_string != 0)
1253 if (!strcmp (ix86_asm_string, "intel"))
1254 ix86_asm_dialect = ASM_INTEL;
1255 else if (!strcmp (ix86_asm_string, "att"))
1256 ix86_asm_dialect = ASM_ATT;
1257 else
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1260 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1263 if (ix86_cmodel == CM_LARGE)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags & MASK_64BIT) ? 64 : 32);
1269 for (i = 0; i < pta_size; i++)
1270 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1272 ix86_arch = processor_alias_table[i].processor;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune = ix86_arch;
1275 if (processor_alias_table[i].flags & PTA_MMX
1276 && !(target_flags_explicit & MASK_MMX))
1277 target_flags |= MASK_MMX;
1278 if (processor_alias_table[i].flags & PTA_3DNOW
1279 && !(target_flags_explicit & MASK_3DNOW))
1280 target_flags |= MASK_3DNOW;
1281 if (processor_alias_table[i].flags & PTA_3DNOW_A
1282 && !(target_flags_explicit & MASK_3DNOW_A))
1283 target_flags |= MASK_3DNOW_A;
1284 if (processor_alias_table[i].flags & PTA_SSE
1285 && !(target_flags_explicit & MASK_SSE))
1286 target_flags |= MASK_SSE;
1287 if (processor_alias_table[i].flags & PTA_SSE2
1288 && !(target_flags_explicit & MASK_SSE2))
1289 target_flags |= MASK_SSE2;
1290 if (processor_alias_table[i].flags & PTA_SSE3
1291 && !(target_flags_explicit & MASK_SSE3))
1292 target_flags |= MASK_SSE3;
1293 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1294 x86_prefetch_sse = true;
1295 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1297 if (ix86_tune_defaulted)
1299 ix86_tune_string = "x86-64";
1300 for (i = 0; i < pta_size; i++)
1301 if (! strcmp (ix86_tune_string,
1302 processor_alias_table[i].name))
1303 break;
1304 ix86_tune = processor_alias_table[i].processor;
1306 else
1307 error ("CPU you selected does not support x86-64 "
1308 "instruction set");
1310 break;
1313 if (i == pta_size)
1314 error ("bad value (%s) for -march= switch", ix86_arch_string);
1316 for (i = 0; i < pta_size; i++)
1317 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1319 ix86_tune = processor_alias_table[i].processor;
1320 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1321 error ("CPU you selected does not support x86-64 instruction set");
1323 /* Intel CPUs have always interpreted SSE prefetch instructions as
1324 NOPs; so, we can enable SSE prefetch instructions even when
1325 -mtune (rather than -march) points us to a processor that has them.
1326 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1327 higher processors. */
1328 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1329 x86_prefetch_sse = true;
1330 break;
1332 if (i == pta_size)
1333 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1335 if (optimize_size)
1336 ix86_cost = &size_cost;
1337 else
1338 ix86_cost = processor_target_table[ix86_tune].cost;
1339 target_flags |= processor_target_table[ix86_tune].target_enable;
1340 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1342 /* Arrange to set up i386_stack_locals for all functions. */
1343 init_machine_status = ix86_init_machine_status;
1345 /* Validate -mregparm= value. */
1346 if (ix86_regparm_string)
1348 i = atoi (ix86_regparm_string);
1349 if (i < 0 || i > REGPARM_MAX)
1350 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1351 else
1352 ix86_regparm = i;
1354 else
1355 if (TARGET_64BIT)
1356 ix86_regparm = REGPARM_MAX;
1358 /* If the user has provided any of the -malign-* options,
1359 warn and use that value only if -falign-* is not set.
1360 Remove this code in GCC 3.2 or later. */
1361 if (ix86_align_loops_string)
1363 warning ("-malign-loops is obsolete, use -falign-loops");
1364 if (align_loops == 0)
1366 i = atoi (ix86_align_loops_string);
1367 if (i < 0 || i > MAX_CODE_ALIGN)
1368 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1369 else
1370 align_loops = 1 << i;
1374 if (ix86_align_jumps_string)
1376 warning ("-malign-jumps is obsolete, use -falign-jumps");
1377 if (align_jumps == 0)
1379 i = atoi (ix86_align_jumps_string);
1380 if (i < 0 || i > MAX_CODE_ALIGN)
1381 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1382 else
1383 align_jumps = 1 << i;
1387 if (ix86_align_funcs_string)
1389 warning ("-malign-functions is obsolete, use -falign-functions");
1390 if (align_functions == 0)
1392 i = atoi (ix86_align_funcs_string);
1393 if (i < 0 || i > MAX_CODE_ALIGN)
1394 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1395 else
1396 align_functions = 1 << i;
1400 /* Default align_* from the processor table. */
1401 if (align_loops == 0)
1403 align_loops = processor_target_table[ix86_tune].align_loop;
1404 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1406 if (align_jumps == 0)
1408 align_jumps = processor_target_table[ix86_tune].align_jump;
1409 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1411 if (align_functions == 0)
1413 align_functions = processor_target_table[ix86_tune].align_func;
1416 /* Validate -mpreferred-stack-boundary= value, or provide default.
1417 The default of 128 bits is for Pentium III's SSE __m128, but we
1418 don't want additional code to keep the stack aligned when
1419 optimizing for code size. */
1420 ix86_preferred_stack_boundary = (optimize_size
1421 ? TARGET_64BIT ? 128 : 32
1422 : 128);
1423 if (ix86_preferred_stack_boundary_string)
1425 i = atoi (ix86_preferred_stack_boundary_string);
1426 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1427 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1428 TARGET_64BIT ? 4 : 2);
1429 else
1430 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1433 /* Validate -mbranch-cost= value, or provide default. */
1434 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1435 if (ix86_branch_cost_string)
1437 i = atoi (ix86_branch_cost_string);
1438 if (i < 0 || i > 5)
1439 error ("-mbranch-cost=%d is not between 0 and 5", i);
1440 else
1441 ix86_branch_cost = i;
1444 if (ix86_tls_dialect_string)
1446 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1447 ix86_tls_dialect = TLS_DIALECT_GNU;
1448 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1449 ix86_tls_dialect = TLS_DIALECT_SUN;
1450 else
1451 error ("bad value (%s) for -mtls-dialect= switch",
1452 ix86_tls_dialect_string);
1455 /* Keep nonleaf frame pointers. */
1456 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1457 flag_omit_frame_pointer = 1;
1459 /* If we're doing fast math, we don't care about comparison order
1460 wrt NaNs. This lets us use a shorter comparison sequence. */
1461 if (flag_unsafe_math_optimizations)
1462 target_flags &= ~MASK_IEEE_FP;
1464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1465 since the insns won't need emulation. */
1466 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1467 target_flags &= ~MASK_NO_FANCY_MATH_387;
1469 /* Turn on SSE2 builtins for -msse3. */
1470 if (TARGET_SSE3)
1471 target_flags |= MASK_SSE2;
1473 /* Turn on SSE builtins for -msse2. */
1474 if (TARGET_SSE2)
1475 target_flags |= MASK_SSE;
1477 if (TARGET_64BIT)
1479 if (TARGET_ALIGN_DOUBLE)
1480 error ("-malign-double makes no sense in the 64bit mode");
1481 if (TARGET_RTD)
1482 error ("-mrtd calling convention not supported in the 64bit mode");
1483 /* Enable by default the SSE and MMX builtins. */
1484 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1485 ix86_fpmath = FPMATH_SSE;
1487 else
1489 ix86_fpmath = FPMATH_387;
1490 /* i386 ABI does not specify red zone. It still makes sense to use it
1491 when programmer takes care to stack from being destroyed. */
1492 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1493 target_flags |= MASK_NO_RED_ZONE;
1496 if (ix86_fpmath_string != 0)
1498 if (! strcmp (ix86_fpmath_string, "387"))
1499 ix86_fpmath = FPMATH_387;
1500 else if (! strcmp (ix86_fpmath_string, "sse"))
1502 if (!TARGET_SSE)
1504 warning ("SSE instruction set disabled, using 387 arithmetics");
1505 ix86_fpmath = FPMATH_387;
1507 else
1508 ix86_fpmath = FPMATH_SSE;
1510 else if (! strcmp (ix86_fpmath_string, "387,sse")
1511 || ! strcmp (ix86_fpmath_string, "sse,387"))
1513 if (!TARGET_SSE)
1515 warning ("SSE instruction set disabled, using 387 arithmetics");
1516 ix86_fpmath = FPMATH_387;
1518 else if (!TARGET_80387)
1520 warning ("387 instruction set disabled, using SSE arithmetics");
1521 ix86_fpmath = FPMATH_SSE;
1523 else
1524 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1526 else
1527 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1530 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1531 on by -msse. */
1532 if (TARGET_SSE)
1534 target_flags |= MASK_MMX;
1535 x86_prefetch_sse = true;
1538 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1539 if (TARGET_3DNOW)
1541 target_flags |= MASK_MMX;
1542 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1543 extensions it adds. */
1544 if (x86_3dnow_a & (1 << ix86_arch))
1545 target_flags |= MASK_3DNOW_A;
1547 if ((x86_accumulate_outgoing_args & TUNEMASK)
1548 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1549 && !optimize_size)
1550 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1552 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1554 char *p;
1555 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1556 p = strchr (internal_label_prefix, 'X');
1557 internal_label_prefix_len = p - internal_label_prefix;
1558 *p = '\0';
1562 void
1563 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1565 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1566 make the problem with not enough registers even worse. */
1567 #ifdef INSN_SCHEDULING
1568 if (level > 1)
1569 flag_schedule_insns = 0;
1570 #endif
1572 /* The default values of these switches depend on the TARGET_64BIT
1573 that is not known at this moment. Mark these values with 2 and
1574 let user the to override these. In case there is no command line option
1575 specifying them, we will set the defaults in override_options. */
1576 if (optimize >= 1)
1577 flag_omit_frame_pointer = 2;
1578 flag_pcc_struct_return = 2;
1579 flag_asynchronous_unwind_tables = 2;
1582 /* Table of valid machine attributes. */
1583 const struct attribute_spec ix86_attribute_table[] =
1585 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1586 /* Stdcall attribute says callee is responsible for popping arguments
1587 if they are not variable. */
1588 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1589 /* Fastcall attribute says callee is responsible for popping arguments
1590 if they are not variable. */
1591 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1592 /* Cdecl attribute says the callee is a normal C declaration */
1593 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1594 /* Regparm attribute specifies how many integer arguments are to be
1595 passed in registers. */
1596 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1598 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1599 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1600 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1601 #endif
1602 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1603 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1604 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1605 SUBTARGET_ATTRIBUTE_TABLE,
1606 #endif
1607 { NULL, 0, 0, false, false, false, NULL }
1610 /* Decide whether we can make a sibling call to a function. DECL is the
1611 declaration of the function being targeted by the call and EXP is the
1612 CALL_EXPR representing the call. */
1614 static bool
1615 ix86_function_ok_for_sibcall (tree decl, tree exp)
1617 /* If we are generating position-independent code, we cannot sibcall
1618 optimize any indirect call, or a direct call to a global function,
1619 as the PLT requires %ebx be live. */
1620 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1621 return false;
1623 /* If we are returning floats on the 80387 register stack, we cannot
1624 make a sibcall from a function that doesn't return a float to a
1625 function that does or, conversely, from a function that does return
1626 a float to a function that doesn't; the necessary stack adjustment
1627 would not be executed. */
1628 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1629 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1630 return false;
1632 /* If this call is indirect, we'll need to be able to use a call-clobbered
1633 register for the address of the target function. Make sure that all
1634 such registers are not used for passing parameters. */
1635 if (!decl && !TARGET_64BIT)
1637 tree type;
1639 /* We're looking at the CALL_EXPR, we need the type of the function. */
1640 type = TREE_OPERAND (exp, 0); /* pointer expression */
1641 type = TREE_TYPE (type); /* pointer type */
1642 type = TREE_TYPE (type); /* function type */
1644 if (ix86_function_regparm (type, NULL) >= 3)
1646 /* ??? Need to count the actual number of registers to be used,
1647 not the possible number of registers. Fix later. */
1648 return false;
1652 /* Otherwise okay. That also includes certain types of indirect calls. */
1653 return true;
1656 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1657 arguments as in struct attribute_spec.handler. */
1658 static tree
1659 ix86_handle_cdecl_attribute (tree *node, tree name,
1660 tree args ATTRIBUTE_UNUSED,
1661 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1663 if (TREE_CODE (*node) != FUNCTION_TYPE
1664 && TREE_CODE (*node) != METHOD_TYPE
1665 && TREE_CODE (*node) != FIELD_DECL
1666 && TREE_CODE (*node) != TYPE_DECL)
1668 warning ("`%s' attribute only applies to functions",
1669 IDENTIFIER_POINTER (name));
1670 *no_add_attrs = true;
1672 else
1674 if (is_attribute_p ("fastcall", name))
1676 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1678 error ("fastcall and stdcall attributes are not compatible");
1680 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1682 error ("fastcall and regparm attributes are not compatible");
1685 else if (is_attribute_p ("stdcall", name))
1687 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1689 error ("fastcall and stdcall attributes are not compatible");
1694 if (TARGET_64BIT)
1696 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1697 *no_add_attrs = true;
1700 return NULL_TREE;
1703 /* Handle a "regparm" attribute;
1704 arguments as in struct attribute_spec.handler. */
1705 static tree
1706 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1707 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1709 if (TREE_CODE (*node) != FUNCTION_TYPE
1710 && TREE_CODE (*node) != METHOD_TYPE
1711 && TREE_CODE (*node) != FIELD_DECL
1712 && TREE_CODE (*node) != TYPE_DECL)
1714 warning ("`%s' attribute only applies to functions",
1715 IDENTIFIER_POINTER (name));
1716 *no_add_attrs = true;
1718 else
1720 tree cst;
1722 cst = TREE_VALUE (args);
1723 if (TREE_CODE (cst) != INTEGER_CST)
1725 warning ("`%s' attribute requires an integer constant argument",
1726 IDENTIFIER_POINTER (name));
1727 *no_add_attrs = true;
1729 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1731 warning ("argument to `%s' attribute larger than %d",
1732 IDENTIFIER_POINTER (name), REGPARM_MAX);
1733 *no_add_attrs = true;
1736 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1738 error ("fastcall and regparm attributes are not compatible");
1742 return NULL_TREE;
1745 /* Return 0 if the attributes for two types are incompatible, 1 if they
1746 are compatible, and 2 if they are nearly compatible (which causes a
1747 warning to be generated). */
1749 static int
1750 ix86_comp_type_attributes (tree type1, tree type2)
1752 /* Check for mismatch of non-default calling convention. */
1753 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1755 if (TREE_CODE (type1) != FUNCTION_TYPE)
1756 return 1;
1758 /* Check for mismatched fastcall types */
1759 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1760 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1761 return 0;
1763 /* Check for mismatched return types (cdecl vs stdcall). */
1764 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1765 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1766 return 0;
1767 if (ix86_function_regparm (type1, NULL)
1768 != ix86_function_regparm (type2, NULL))
1769 return 0;
1770 return 1;
1773 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1774 DECL may be NULL when calling function indirectly
1775 or considering a libcall. */
1777 static int
1778 ix86_function_regparm (tree type, tree decl)
1780 tree attr;
1781 int regparm = ix86_regparm;
1782 bool user_convention = false;
1784 if (!TARGET_64BIT)
1786 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1787 if (attr)
1789 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1790 user_convention = true;
1793 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1795 regparm = 2;
1796 user_convention = true;
1799 /* Use register calling convention for local functions when possible. */
1800 if (!TARGET_64BIT && !user_convention && decl
1801 && flag_unit_at_a_time && !profile_flag)
1803 struct cgraph_local_info *i = cgraph_local_info (decl);
1804 if (i && i->local)
1806 /* We can't use regparm(3) for nested functions as these use
1807 static chain pointer in third argument. */
1808 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1809 regparm = 2;
1810 else
1811 regparm = 3;
1815 return regparm;
1818 /* Return true if EAX is live at the start of the function. Used by
1819 ix86_expand_prologue to determine if we need special help before
1820 calling allocate_stack_worker. */
1822 static bool
1823 ix86_eax_live_at_start_p (void)
1825 /* Cheat. Don't bother working forward from ix86_function_regparm
1826 to the function type to whether an actual argument is located in
1827 eax. Instead just look at cfg info, which is still close enough
1828 to correct at this point. This gives false positives for broken
1829 functions that might use uninitialized data that happens to be
1830 allocated in eax, but who cares? */
1831 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1834 /* Value is the number of bytes of arguments automatically
1835 popped when returning from a subroutine call.
1836 FUNDECL is the declaration node of the function (as a tree),
1837 FUNTYPE is the data type of the function (as a tree),
1838 or for a library call it is an identifier node for the subroutine name.
1839 SIZE is the number of bytes of arguments passed on the stack.
1841 On the 80386, the RTD insn may be used to pop them if the number
1842 of args is fixed, but if the number is variable then the caller
1843 must pop them all. RTD can't be used for library calls now
1844 because the library is compiled with the Unix compiler.
1845 Use of RTD is a selectable option, since it is incompatible with
1846 standard Unix calling sequences. If the option is not selected,
1847 the caller must always pop the args.
1849 The attribute stdcall is equivalent to RTD on a per module basis. */
1852 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1854 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1856 /* Cdecl functions override -mrtd, and never pop the stack. */
1857 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1859 /* Stdcall and fastcall functions will pop the stack if not
1860 variable args. */
1861 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1862 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1863 rtd = 1;
1865 if (rtd
1866 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1867 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1868 == void_type_node)))
1869 return size;
1872 /* Lose any fake structure return argument if it is passed on the stack. */
1873 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1874 && !TARGET_64BIT)
1876 int nregs = ix86_function_regparm (funtype, fundecl);
1878 if (!nregs)
1879 return GET_MODE_SIZE (Pmode);
1882 return 0;
1885 /* Argument support functions. */
1887 /* Return true when register may be used to pass function parameters. */
1888 bool
1889 ix86_function_arg_regno_p (int regno)
1891 int i;
1892 if (!TARGET_64BIT)
1893 return (regno < REGPARM_MAX
1894 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1895 if (SSE_REGNO_P (regno) && TARGET_SSE)
1896 return true;
1897 /* RAX is used as hidden argument to va_arg functions. */
1898 if (!regno)
1899 return true;
1900 for (i = 0; i < REGPARM_MAX; i++)
1901 if (regno == x86_64_int_parameter_registers[i])
1902 return true;
1903 return false;
1906 /* Return if we do not know how to pass TYPE solely in registers. */
1908 static bool
1909 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1911 if (must_pass_in_stack_var_size_or_pad (mode, type))
1912 return true;
1913 return (!TARGET_64BIT && type && mode == TImode);
1916 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1917 for a call to a function whose data type is FNTYPE.
1918 For a library call, FNTYPE is 0. */
1920 void
1921 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1922 tree fntype, /* tree ptr for function decl */
1923 rtx libname, /* SYMBOL_REF of library name or 0 */
1924 tree fndecl)
1926 static CUMULATIVE_ARGS zero_cum;
1927 tree param, next_param;
1929 if (TARGET_DEBUG_ARG)
1931 fprintf (stderr, "\ninit_cumulative_args (");
1932 if (fntype)
1933 fprintf (stderr, "fntype code = %s, ret code = %s",
1934 tree_code_name[(int) TREE_CODE (fntype)],
1935 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1936 else
1937 fprintf (stderr, "no fntype");
1939 if (libname)
1940 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1943 *cum = zero_cum;
1945 /* Set up the number of registers to use for passing arguments. */
1946 if (fntype)
1947 cum->nregs = ix86_function_regparm (fntype, fndecl);
1948 else
1949 cum->nregs = ix86_regparm;
1950 if (TARGET_SSE)
1951 cum->sse_nregs = SSE_REGPARM_MAX;
1952 if (TARGET_MMX)
1953 cum->mmx_nregs = MMX_REGPARM_MAX;
1954 cum->warn_sse = true;
1955 cum->warn_mmx = true;
1956 cum->maybe_vaarg = false;
1958 /* Use ecx and edx registers if function has fastcall attribute */
1959 if (fntype && !TARGET_64BIT)
1961 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1963 cum->nregs = 2;
1964 cum->fastcall = 1;
1968 /* Determine if this function has variable arguments. This is
1969 indicated by the last argument being 'void_type_mode' if there
1970 are no variable arguments. If there are variable arguments, then
1971 we won't pass anything in registers in 32-bit mode. */
1973 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1975 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1976 param != 0; param = next_param)
1978 next_param = TREE_CHAIN (param);
1979 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1981 if (!TARGET_64BIT)
1983 cum->nregs = 0;
1984 cum->sse_nregs = 0;
1985 cum->mmx_nregs = 0;
1986 cum->warn_sse = 0;
1987 cum->warn_mmx = 0;
1988 cum->fastcall = 0;
1990 cum->maybe_vaarg = true;
1994 if ((!fntype && !libname)
1995 || (fntype && !TYPE_ARG_TYPES (fntype)))
1996 cum->maybe_vaarg = 1;
1998 if (TARGET_DEBUG_ARG)
1999 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2001 return;
2004 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2005 of this code is to classify each 8bytes of incoming argument by the register
2006 class and assign registers accordingly. */
2008 /* Return the union class of CLASS1 and CLASS2.
2009 See the x86-64 PS ABI for details. */
2011 static enum x86_64_reg_class
2012 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2014 /* Rule #1: If both classes are equal, this is the resulting class. */
2015 if (class1 == class2)
2016 return class1;
2018 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2019 the other class. */
2020 if (class1 == X86_64_NO_CLASS)
2021 return class2;
2022 if (class2 == X86_64_NO_CLASS)
2023 return class1;
2025 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2026 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2027 return X86_64_MEMORY_CLASS;
2029 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2030 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2031 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2032 return X86_64_INTEGERSI_CLASS;
2033 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2034 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2035 return X86_64_INTEGER_CLASS;
2037 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2038 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2039 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2040 return X86_64_MEMORY_CLASS;
2042 /* Rule #6: Otherwise class SSE is used. */
2043 return X86_64_SSE_CLASS;
2046 /* Classify the argument of type TYPE and mode MODE.
2047 CLASSES will be filled by the register class used to pass each word
2048 of the operand. The number of words is returned. In case the parameter
2049 should be passed in memory, 0 is returned. As a special case for zero
2050 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2052 BIT_OFFSET is used internally for handling records and specifies offset
2053 of the offset in bits modulo 256 to avoid overflow cases.
2055 See the x86-64 PS ABI for details.
2058 static int
2059 classify_argument (enum machine_mode mode, tree type,
2060 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2062 HOST_WIDE_INT bytes =
2063 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2064 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2066 /* Variable sized entities are always passed/returned in memory. */
2067 if (bytes < 0)
2068 return 0;
2070 if (mode != VOIDmode
2071 && targetm.calls.must_pass_in_stack (mode, type))
2072 return 0;
2074 if (type && AGGREGATE_TYPE_P (type))
2076 int i;
2077 tree field;
2078 enum x86_64_reg_class subclasses[MAX_CLASSES];
2080 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2081 if (bytes > 16)
2082 return 0;
2084 for (i = 0; i < words; i++)
2085 classes[i] = X86_64_NO_CLASS;
2087 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2088 signalize memory class, so handle it as special case. */
2089 if (!words)
2091 classes[0] = X86_64_NO_CLASS;
2092 return 1;
2095 /* Classify each field of record and merge classes. */
2096 if (TREE_CODE (type) == RECORD_TYPE)
2098 /* For classes first merge in the field of the subclasses. */
2099 if (TYPE_BINFO (type))
2101 tree binfo, base_binfo;
2102 int i;
2104 for (binfo = TYPE_BINFO (type), i = 0;
2105 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2107 int num;
2108 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2109 tree type = BINFO_TYPE (base_binfo);
2111 num = classify_argument (TYPE_MODE (type),
2112 type, subclasses,
2113 (offset + bit_offset) % 256);
2114 if (!num)
2115 return 0;
2116 for (i = 0; i < num; i++)
2118 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2119 classes[i + pos] =
2120 merge_classes (subclasses[i], classes[i + pos]);
2124 /* And now merge the fields of structure. */
2125 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2127 if (TREE_CODE (field) == FIELD_DECL)
2129 int num;
2131 /* Bitfields are always classified as integer. Handle them
2132 early, since later code would consider them to be
2133 misaligned integers. */
2134 if (DECL_BIT_FIELD (field))
2136 for (i = int_bit_position (field) / 8 / 8;
2137 i < (int_bit_position (field)
2138 + tree_low_cst (DECL_SIZE (field), 0)
2139 + 63) / 8 / 8; i++)
2140 classes[i] =
2141 merge_classes (X86_64_INTEGER_CLASS,
2142 classes[i]);
2144 else
2146 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2147 TREE_TYPE (field), subclasses,
2148 (int_bit_position (field)
2149 + bit_offset) % 256);
2150 if (!num)
2151 return 0;
2152 for (i = 0; i < num; i++)
2154 int pos =
2155 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2156 classes[i + pos] =
2157 merge_classes (subclasses[i], classes[i + pos]);
2163 /* Arrays are handled as small records. */
2164 else if (TREE_CODE (type) == ARRAY_TYPE)
2166 int num;
2167 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2168 TREE_TYPE (type), subclasses, bit_offset);
2169 if (!num)
2170 return 0;
2172 /* The partial classes are now full classes. */
2173 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2174 subclasses[0] = X86_64_SSE_CLASS;
2175 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2176 subclasses[0] = X86_64_INTEGER_CLASS;
2178 for (i = 0; i < words; i++)
2179 classes[i] = subclasses[i % num];
2181 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2182 else if (TREE_CODE (type) == UNION_TYPE
2183 || TREE_CODE (type) == QUAL_UNION_TYPE)
2185 /* For classes first merge in the field of the subclasses. */
2186 if (TYPE_BINFO (type))
2188 tree binfo, base_binfo;
2189 int i;
2191 for (binfo = TYPE_BINFO (type), i = 0;
2192 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2194 int num;
2195 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2196 tree type = BINFO_TYPE (base_binfo);
2198 num = classify_argument (TYPE_MODE (type),
2199 type, subclasses,
2200 (offset + (bit_offset % 64)) % 256);
2201 if (!num)
2202 return 0;
2203 for (i = 0; i < num; i++)
2205 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2206 classes[i + pos] =
2207 merge_classes (subclasses[i], classes[i + pos]);
2211 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2213 if (TREE_CODE (field) == FIELD_DECL)
2215 int num;
2216 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2217 TREE_TYPE (field), subclasses,
2218 bit_offset);
2219 if (!num)
2220 return 0;
2221 for (i = 0; i < num; i++)
2222 classes[i] = merge_classes (subclasses[i], classes[i]);
2226 else if (TREE_CODE (type) == SET_TYPE)
2228 if (bytes <= 4)
2230 classes[0] = X86_64_INTEGERSI_CLASS;
2231 return 1;
2233 else if (bytes <= 8)
2235 classes[0] = X86_64_INTEGER_CLASS;
2236 return 1;
2238 else if (bytes <= 12)
2240 classes[0] = X86_64_INTEGER_CLASS;
2241 classes[1] = X86_64_INTEGERSI_CLASS;
2242 return 2;
2244 else
2246 classes[0] = X86_64_INTEGER_CLASS;
2247 classes[1] = X86_64_INTEGER_CLASS;
2248 return 2;
2251 else
2252 abort ();
2254 /* Final merger cleanup. */
2255 for (i = 0; i < words; i++)
2257 /* If one class is MEMORY, everything should be passed in
2258 memory. */
2259 if (classes[i] == X86_64_MEMORY_CLASS)
2260 return 0;
2262 /* The X86_64_SSEUP_CLASS should be always preceded by
2263 X86_64_SSE_CLASS. */
2264 if (classes[i] == X86_64_SSEUP_CLASS
2265 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2266 classes[i] = X86_64_SSE_CLASS;
2268 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2269 if (classes[i] == X86_64_X87UP_CLASS
2270 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2271 classes[i] = X86_64_SSE_CLASS;
2273 return words;
2276 /* Compute alignment needed. We align all types to natural boundaries with
2277 exception of XFmode that is aligned to 64bits. */
2278 if (mode != VOIDmode && mode != BLKmode)
2280 int mode_alignment = GET_MODE_BITSIZE (mode);
2282 if (mode == XFmode)
2283 mode_alignment = 128;
2284 else if (mode == XCmode)
2285 mode_alignment = 256;
2286 if (COMPLEX_MODE_P (mode))
2287 mode_alignment /= 2;
2288 /* Misaligned fields are always returned in memory. */
2289 if (bit_offset % mode_alignment)
2290 return 0;
2293 /* for V1xx modes, just use the base mode */
2294 if (VECTOR_MODE_P (mode)
2295 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2296 mode = GET_MODE_INNER (mode);
2298 /* Classification of atomic types. */
2299 switch (mode)
2301 case DImode:
2302 case SImode:
2303 case HImode:
2304 case QImode:
2305 case CSImode:
2306 case CHImode:
2307 case CQImode:
2308 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2309 classes[0] = X86_64_INTEGERSI_CLASS;
2310 else
2311 classes[0] = X86_64_INTEGER_CLASS;
2312 return 1;
2313 case CDImode:
2314 case TImode:
2315 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2316 return 2;
2317 case CTImode:
2318 return 0;
2319 case SFmode:
2320 if (!(bit_offset % 64))
2321 classes[0] = X86_64_SSESF_CLASS;
2322 else
2323 classes[0] = X86_64_SSE_CLASS;
2324 return 1;
2325 case DFmode:
2326 classes[0] = X86_64_SSEDF_CLASS;
2327 return 1;
2328 case XFmode:
2329 classes[0] = X86_64_X87_CLASS;
2330 classes[1] = X86_64_X87UP_CLASS;
2331 return 2;
2332 case TFmode:
2333 classes[0] = X86_64_SSE_CLASS;
2334 classes[1] = X86_64_SSEUP_CLASS;
2335 return 2;
2336 case SCmode:
2337 classes[0] = X86_64_SSE_CLASS;
2338 return 1;
2339 case DCmode:
2340 classes[0] = X86_64_SSEDF_CLASS;
2341 classes[1] = X86_64_SSEDF_CLASS;
2342 return 2;
2343 case XCmode:
2344 case TCmode:
2345 /* These modes are larger than 16 bytes. */
2346 return 0;
2347 case V4SFmode:
2348 case V4SImode:
2349 case V16QImode:
2350 case V8HImode:
2351 case V2DFmode:
2352 case V2DImode:
2353 classes[0] = X86_64_SSE_CLASS;
2354 classes[1] = X86_64_SSEUP_CLASS;
2355 return 2;
2356 case V2SFmode:
2357 case V2SImode:
2358 case V4HImode:
2359 case V8QImode:
2360 classes[0] = X86_64_SSE_CLASS;
2361 return 1;
2362 case BLKmode:
2363 case VOIDmode:
2364 return 0;
2365 default:
2366 if (VECTOR_MODE_P (mode))
2368 if (bytes > 16)
2369 return 0;
2370 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2372 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2373 classes[0] = X86_64_INTEGERSI_CLASS;
2374 else
2375 classes[0] = X86_64_INTEGER_CLASS;
2376 classes[1] = X86_64_INTEGER_CLASS;
2377 return 1 + (bytes > 8);
2380 abort ();
2384 /* Examine the argument and return set number of register required in each
2385 class. Return 0 iff parameter should be passed in memory. */
2386 static int
2387 examine_argument (enum machine_mode mode, tree type, int in_return,
2388 int *int_nregs, int *sse_nregs)
2390 enum x86_64_reg_class class[MAX_CLASSES];
2391 int n = classify_argument (mode, type, class, 0);
2393 *int_nregs = 0;
2394 *sse_nregs = 0;
2395 if (!n)
2396 return 0;
2397 for (n--; n >= 0; n--)
2398 switch (class[n])
2400 case X86_64_INTEGER_CLASS:
2401 case X86_64_INTEGERSI_CLASS:
2402 (*int_nregs)++;
2403 break;
2404 case X86_64_SSE_CLASS:
2405 case X86_64_SSESF_CLASS:
2406 case X86_64_SSEDF_CLASS:
2407 (*sse_nregs)++;
2408 break;
2409 case X86_64_NO_CLASS:
2410 case X86_64_SSEUP_CLASS:
2411 break;
2412 case X86_64_X87_CLASS:
2413 case X86_64_X87UP_CLASS:
2414 if (!in_return)
2415 return 0;
2416 break;
2417 case X86_64_MEMORY_CLASS:
2418 abort ();
2420 return 1;
2422 /* Construct container for the argument used by GCC interface. See
2423 FUNCTION_ARG for the detailed description. */
2424 static rtx
2425 construct_container (enum machine_mode mode, tree type, int in_return,
2426 int nintregs, int nsseregs, const int * intreg,
2427 int sse_regno)
2429 enum machine_mode tmpmode;
2430 int bytes =
2431 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2432 enum x86_64_reg_class class[MAX_CLASSES];
2433 int n;
2434 int i;
2435 int nexps = 0;
2436 int needed_sseregs, needed_intregs;
2437 rtx exp[MAX_CLASSES];
2438 rtx ret;
2440 n = classify_argument (mode, type, class, 0);
2441 if (TARGET_DEBUG_ARG)
2443 if (!n)
2444 fprintf (stderr, "Memory class\n");
2445 else
2447 fprintf (stderr, "Classes:");
2448 for (i = 0; i < n; i++)
2450 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2452 fprintf (stderr, "\n");
2455 if (!n)
2456 return NULL;
2457 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2458 return NULL;
2459 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2460 return NULL;
2462 /* First construct simple cases. Avoid SCmode, since we want to use
2463 single register to pass this type. */
2464 if (n == 1 && mode != SCmode)
2465 switch (class[0])
2467 case X86_64_INTEGER_CLASS:
2468 case X86_64_INTEGERSI_CLASS:
2469 return gen_rtx_REG (mode, intreg[0]);
2470 case X86_64_SSE_CLASS:
2471 case X86_64_SSESF_CLASS:
2472 case X86_64_SSEDF_CLASS:
2473 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2474 case X86_64_X87_CLASS:
2475 return gen_rtx_REG (mode, FIRST_STACK_REG);
2476 case X86_64_NO_CLASS:
2477 /* Zero sized array, struct or class. */
2478 return NULL;
2479 default:
2480 abort ();
2482 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2483 && mode != BLKmode)
2484 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2485 if (n == 2
2486 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2487 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2488 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2489 && class[1] == X86_64_INTEGER_CLASS
2490 && (mode == CDImode || mode == TImode || mode == TFmode)
2491 && intreg[0] + 1 == intreg[1])
2492 return gen_rtx_REG (mode, intreg[0]);
2493 if (n == 4
2494 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2495 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2496 && mode != BLKmode)
2497 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2499 /* Otherwise figure out the entries of the PARALLEL. */
2500 for (i = 0; i < n; i++)
2502 switch (class[i])
2504 case X86_64_NO_CLASS:
2505 break;
2506 case X86_64_INTEGER_CLASS:
2507 case X86_64_INTEGERSI_CLASS:
2508 /* Merge TImodes on aligned occasions here too. */
2509 if (i * 8 + 8 > bytes)
2510 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2511 else if (class[i] == X86_64_INTEGERSI_CLASS)
2512 tmpmode = SImode;
2513 else
2514 tmpmode = DImode;
2515 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2516 if (tmpmode == BLKmode)
2517 tmpmode = DImode;
2518 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2519 gen_rtx_REG (tmpmode, *intreg),
2520 GEN_INT (i*8));
2521 intreg++;
2522 break;
2523 case X86_64_SSESF_CLASS:
2524 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2525 gen_rtx_REG (SFmode,
2526 SSE_REGNO (sse_regno)),
2527 GEN_INT (i*8));
2528 sse_regno++;
2529 break;
2530 case X86_64_SSEDF_CLASS:
2531 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2532 gen_rtx_REG (DFmode,
2533 SSE_REGNO (sse_regno)),
2534 GEN_INT (i*8));
2535 sse_regno++;
2536 break;
2537 case X86_64_SSE_CLASS:
2538 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2539 tmpmode = TImode;
2540 else
2541 tmpmode = DImode;
2542 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2543 gen_rtx_REG (tmpmode,
2544 SSE_REGNO (sse_regno)),
2545 GEN_INT (i*8));
2546 if (tmpmode == TImode)
2547 i++;
2548 sse_regno++;
2549 break;
2550 default:
2551 abort ();
2554 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2555 for (i = 0; i < nexps; i++)
2556 XVECEXP (ret, 0, i) = exp [i];
2557 return ret;
2560 /* Update the data in CUM to advance over an argument
2561 of mode MODE and data type TYPE.
2562 (TYPE is null for libcalls where that information may not be available.) */
2564 void
2565 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2566 enum machine_mode mode, /* current arg mode */
2567 tree type, /* type of the argument or 0 if lib support */
2568 int named) /* whether or not the argument was named */
2570 int bytes =
2571 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2572 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2574 if (TARGET_DEBUG_ARG)
2575 fprintf (stderr,
2576 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2577 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2578 if (TARGET_64BIT)
2580 int int_nregs, sse_nregs;
2581 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2582 cum->words += words;
2583 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2585 cum->nregs -= int_nregs;
2586 cum->sse_nregs -= sse_nregs;
2587 cum->regno += int_nregs;
2588 cum->sse_regno += sse_nregs;
2590 else
2591 cum->words += words;
2593 else
2595 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2596 && (!type || !AGGREGATE_TYPE_P (type)))
2598 cum->sse_words += words;
2599 cum->sse_nregs -= 1;
2600 cum->sse_regno += 1;
2601 if (cum->sse_nregs <= 0)
2603 cum->sse_nregs = 0;
2604 cum->sse_regno = 0;
2607 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2608 && (!type || !AGGREGATE_TYPE_P (type)))
2610 cum->mmx_words += words;
2611 cum->mmx_nregs -= 1;
2612 cum->mmx_regno += 1;
2613 if (cum->mmx_nregs <= 0)
2615 cum->mmx_nregs = 0;
2616 cum->mmx_regno = 0;
2619 else
2621 cum->words += words;
2622 cum->nregs -= words;
2623 cum->regno += words;
2625 if (cum->nregs <= 0)
2627 cum->nregs = 0;
2628 cum->regno = 0;
2632 return;
2635 /* Define where to put the arguments to a function.
2636 Value is zero to push the argument on the stack,
2637 or a hard register in which to store the argument.
2639 MODE is the argument's machine mode.
2640 TYPE is the data type of the argument (as a tree).
2641 This is null for libcalls where that information may
2642 not be available.
2643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2644 the preceding args and about the function being called.
2645 NAMED is nonzero if this argument is a named parameter
2646 (otherwise it is an extra parameter matching an ellipsis). */
2649 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2650 enum machine_mode mode, /* current arg mode */
2651 tree type, /* type of the argument or 0 if lib support */
2652 int named) /* != 0 for normal args, == 0 for ... args */
2654 rtx ret = NULL_RTX;
2655 int bytes =
2656 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2657 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2658 static bool warnedsse, warnedmmx;
2660 /* To simplify the code below, represent vector types with a vector mode
2661 even if MMX/SSE are not active. */
2662 if (type
2663 && TREE_CODE (type) == VECTOR_TYPE
2664 && (bytes == 8 || bytes == 16)
2665 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2666 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2668 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2669 enum machine_mode newmode
2670 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2671 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2673 /* Get the mode which has this inner mode and number of units. */
2674 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2675 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2676 && GET_MODE_INNER (newmode) == innermode)
2678 mode = newmode;
2679 break;
2683 /* Handle a hidden AL argument containing number of registers for varargs
2684 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2685 any AL settings. */
2686 if (mode == VOIDmode)
2688 if (TARGET_64BIT)
2689 return GEN_INT (cum->maybe_vaarg
2690 ? (cum->sse_nregs < 0
2691 ? SSE_REGPARM_MAX
2692 : cum->sse_regno)
2693 : -1);
2694 else
2695 return constm1_rtx;
2697 if (TARGET_64BIT)
2698 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2699 &x86_64_int_parameter_registers [cum->regno],
2700 cum->sse_regno);
2701 else
2702 switch (mode)
2704 /* For now, pass fp/complex values on the stack. */
2705 default:
2706 break;
2708 case BLKmode:
2709 if (bytes < 0)
2710 break;
2711 /* FALLTHRU */
2712 case DImode:
2713 case SImode:
2714 case HImode:
2715 case QImode:
2716 if (words <= cum->nregs)
2718 int regno = cum->regno;
2720 /* Fastcall allocates the first two DWORD (SImode) or
2721 smaller arguments to ECX and EDX. */
2722 if (cum->fastcall)
2724 if (mode == BLKmode || mode == DImode)
2725 break;
2727 /* ECX not EAX is the first allocated register. */
2728 if (regno == 0)
2729 regno = 2;
2731 ret = gen_rtx_REG (mode, regno);
2733 break;
2734 case TImode:
2735 case V16QImode:
2736 case V8HImode:
2737 case V4SImode:
2738 case V2DImode:
2739 case V4SFmode:
2740 case V2DFmode:
2741 if (!type || !AGGREGATE_TYPE_P (type))
2743 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2745 warnedsse = true;
2746 warning ("SSE vector argument without SSE enabled "
2747 "changes the ABI");
2749 if (cum->sse_nregs)
2750 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2752 break;
2753 case V8QImode:
2754 case V4HImode:
2755 case V2SImode:
2756 case V2SFmode:
2757 if (!type || !AGGREGATE_TYPE_P (type))
2759 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2761 warnedmmx = true;
2762 warning ("MMX vector argument without MMX enabled "
2763 "changes the ABI");
2765 if (cum->mmx_nregs)
2766 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2768 break;
2771 if (TARGET_DEBUG_ARG)
2773 fprintf (stderr,
2774 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2775 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2777 if (ret)
2778 print_simple_rtl (stderr, ret);
2779 else
2780 fprintf (stderr, ", stack");
2782 fprintf (stderr, " )\n");
2785 return ret;
2788 /* A C expression that indicates when an argument must be passed by
2789 reference. If nonzero for an argument, a copy of that argument is
2790 made in memory and a pointer to the argument is passed instead of
2791 the argument itself. The pointer is passed in whatever way is
2792 appropriate for passing a pointer to that type. */
2794 static bool
2795 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2796 enum machine_mode mode ATTRIBUTE_UNUSED,
2797 tree type, bool named ATTRIBUTE_UNUSED)
2799 if (!TARGET_64BIT)
2800 return 0;
2802 if (type && int_size_in_bytes (type) == -1)
2804 if (TARGET_DEBUG_ARG)
2805 fprintf (stderr, "function_arg_pass_by_reference\n");
2806 return 1;
2809 return 0;
2812 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2813 ABI. Only called if TARGET_SSE. */
2814 static bool
2815 contains_128bit_aligned_vector_p (tree type)
2817 enum machine_mode mode = TYPE_MODE (type);
2818 if (SSE_REG_MODE_P (mode)
2819 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2820 return true;
2821 if (TYPE_ALIGN (type) < 128)
2822 return false;
2824 if (AGGREGATE_TYPE_P (type))
2826 /* Walk the aggregates recursively. */
2827 if (TREE_CODE (type) == RECORD_TYPE
2828 || TREE_CODE (type) == UNION_TYPE
2829 || TREE_CODE (type) == QUAL_UNION_TYPE)
2831 tree field;
2833 if (TYPE_BINFO (type))
2835 tree binfo, base_binfo;
2836 int i;
2838 for (binfo = TYPE_BINFO (type), i = 0;
2839 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2840 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2841 return true;
2843 /* And now merge the fields of structure. */
2844 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2846 if (TREE_CODE (field) == FIELD_DECL
2847 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2848 return true;
2851 /* Just for use if some languages passes arrays by value. */
2852 else if (TREE_CODE (type) == ARRAY_TYPE)
2854 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2855 return true;
2857 else
2858 abort ();
2860 return false;
2863 /* Gives the alignment boundary, in bits, of an argument with the
2864 specified mode and type. */
2867 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2869 int align;
2870 if (type)
2871 align = TYPE_ALIGN (type);
2872 else
2873 align = GET_MODE_ALIGNMENT (mode);
2874 if (align < PARM_BOUNDARY)
2875 align = PARM_BOUNDARY;
2876 if (!TARGET_64BIT)
2878 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2879 make an exception for SSE modes since these require 128bit
2880 alignment.
2882 The handling here differs from field_alignment. ICC aligns MMX
2883 arguments to 4 byte boundaries, while structure fields are aligned
2884 to 8 byte boundaries. */
2885 if (!TARGET_SSE)
2886 align = PARM_BOUNDARY;
2887 else if (!type)
2889 if (!SSE_REG_MODE_P (mode))
2890 align = PARM_BOUNDARY;
2892 else
2894 if (!contains_128bit_aligned_vector_p (type))
2895 align = PARM_BOUNDARY;
2898 if (align > 128)
2899 align = 128;
2900 return align;
2903 /* Return true if N is a possible register number of function value. */
2904 bool
2905 ix86_function_value_regno_p (int regno)
2907 if (!TARGET_64BIT)
2909 return ((regno) == 0
2910 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2911 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2913 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2914 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2915 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2918 /* Define how to find the value returned by a function.
2919 VALTYPE is the data type of the value (as a tree).
2920 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2921 otherwise, FUNC is 0. */
2923 ix86_function_value (tree valtype)
2925 if (TARGET_64BIT)
2927 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2928 REGPARM_MAX, SSE_REGPARM_MAX,
2929 x86_64_int_return_registers, 0);
2930 /* For zero sized structures, construct_container return NULL, but we need
2931 to keep rest of compiler happy by returning meaningful value. */
2932 if (!ret)
2933 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2934 return ret;
2936 else
2937 return gen_rtx_REG (TYPE_MODE (valtype),
2938 ix86_value_regno (TYPE_MODE (valtype)));
2941 /* Return false iff type is returned in memory. */
2943 ix86_return_in_memory (tree type)
2945 int needed_intregs, needed_sseregs, size;
2946 enum machine_mode mode = TYPE_MODE (type);
2948 if (TARGET_64BIT)
2949 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2951 if (mode == BLKmode)
2952 return 1;
2954 size = int_size_in_bytes (type);
2956 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2957 return 0;
2959 if (VECTOR_MODE_P (mode) || mode == TImode)
2961 /* User-created vectors small enough to fit in EAX. */
2962 if (size < 8)
2963 return 0;
2965 /* MMX/3dNow values are returned on the stack, since we've
2966 got to EMMS/FEMMS before returning. */
2967 if (size == 8)
2968 return 1;
2970 /* SSE values are returned in XMM0, except when it doesn't exist. */
2971 if (size == 16)
2972 return (TARGET_SSE ? 0 : 1);
2975 if (mode == XFmode)
2976 return 0;
2978 if (size > 12)
2979 return 1;
2980 return 0;
2983 /* When returning SSE vector types, we have a choice of either
2984 (1) being abi incompatible with a -march switch, or
2985 (2) generating an error.
2986 Given no good solution, I think the safest thing is one warning.
2987 The user won't be able to use -Werror, but....
2989 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2990 called in response to actually generating a caller or callee that
2991 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2992 via aggregate_value_p for general type probing from tree-ssa. */
2994 static rtx
2995 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2997 static bool warned;
2999 if (!TARGET_SSE && type && !warned)
3001 /* Look at the return type of the function, not the function type. */
3002 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3004 if (mode == TImode
3005 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3007 warned = true;
3008 warning ("SSE vector return without SSE enabled changes the ABI");
3012 return NULL;
3015 /* Define how to find the value returned by a library function
3016 assuming the value has mode MODE. */
3018 ix86_libcall_value (enum machine_mode mode)
3020 if (TARGET_64BIT)
3022 switch (mode)
3024 case SFmode:
3025 case SCmode:
3026 case DFmode:
3027 case DCmode:
3028 case TFmode:
3029 return gen_rtx_REG (mode, FIRST_SSE_REG);
3030 case XFmode:
3031 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3032 case XCmode:
3033 case TCmode:
3034 return NULL;
3035 default:
3036 return gen_rtx_REG (mode, 0);
3039 else
3040 return gen_rtx_REG (mode, ix86_value_regno (mode));
3043 /* Given a mode, return the register to use for a return value. */
3045 static int
3046 ix86_value_regno (enum machine_mode mode)
3048 /* Floating point return values in %st(0). */
3049 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3050 return FIRST_FLOAT_REG;
3051 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3052 we prevent this case when sse is not available. */
3053 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3054 return FIRST_SSE_REG;
3055 /* Everything else in %eax. */
3056 return 0;
3059 /* Create the va_list data type. */
3061 static tree
3062 ix86_build_builtin_va_list (void)
3064 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3066 /* For i386 we use plain pointer to argument area. */
3067 if (!TARGET_64BIT)
3068 return build_pointer_type (char_type_node);
3070 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3071 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3073 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3074 unsigned_type_node);
3075 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3076 unsigned_type_node);
3077 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3078 ptr_type_node);
3079 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3080 ptr_type_node);
3082 DECL_FIELD_CONTEXT (f_gpr) = record;
3083 DECL_FIELD_CONTEXT (f_fpr) = record;
3084 DECL_FIELD_CONTEXT (f_ovf) = record;
3085 DECL_FIELD_CONTEXT (f_sav) = record;
3087 TREE_CHAIN (record) = type_decl;
3088 TYPE_NAME (record) = type_decl;
3089 TYPE_FIELDS (record) = f_gpr;
3090 TREE_CHAIN (f_gpr) = f_fpr;
3091 TREE_CHAIN (f_fpr) = f_ovf;
3092 TREE_CHAIN (f_ovf) = f_sav;
3094 layout_type (record);
3096 /* The correct type is an array type of one element. */
3097 return build_array_type (record, build_index_type (size_zero_node));
3100 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3102 static void
3103 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3104 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3105 int no_rtl)
3107 CUMULATIVE_ARGS next_cum;
3108 rtx save_area = NULL_RTX, mem;
3109 rtx label;
3110 rtx label_ref;
3111 rtx tmp_reg;
3112 rtx nsse_reg;
3113 int set;
3114 tree fntype;
3115 int stdarg_p;
3116 int i;
3118 if (!TARGET_64BIT)
3119 return;
3121 /* Indicate to allocate space on the stack for varargs save area. */
3122 ix86_save_varrargs_registers = 1;
3124 cfun->stack_alignment_needed = 128;
3126 fntype = TREE_TYPE (current_function_decl);
3127 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3128 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3129 != void_type_node));
3131 /* For varargs, we do not want to skip the dummy va_dcl argument.
3132 For stdargs, we do want to skip the last named argument. */
3133 next_cum = *cum;
3134 if (stdarg_p)
3135 function_arg_advance (&next_cum, mode, type, 1);
3137 if (!no_rtl)
3138 save_area = frame_pointer_rtx;
3140 set = get_varargs_alias_set ();
3142 for (i = next_cum.regno; i < ix86_regparm; i++)
3144 mem = gen_rtx_MEM (Pmode,
3145 plus_constant (save_area, i * UNITS_PER_WORD));
3146 set_mem_alias_set (mem, set);
3147 emit_move_insn (mem, gen_rtx_REG (Pmode,
3148 x86_64_int_parameter_registers[i]));
3151 if (next_cum.sse_nregs)
3153 /* Now emit code to save SSE registers. The AX parameter contains number
3154 of SSE parameter registers used to call this function. We use
3155 sse_prologue_save insn template that produces computed jump across
3156 SSE saves. We need some preparation work to get this working. */
3158 label = gen_label_rtx ();
3159 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3161 /* Compute address to jump to :
3162 label - 5*eax + nnamed_sse_arguments*5 */
3163 tmp_reg = gen_reg_rtx (Pmode);
3164 nsse_reg = gen_reg_rtx (Pmode);
3165 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3166 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3167 gen_rtx_MULT (Pmode, nsse_reg,
3168 GEN_INT (4))));
3169 if (next_cum.sse_regno)
3170 emit_move_insn
3171 (nsse_reg,
3172 gen_rtx_CONST (DImode,
3173 gen_rtx_PLUS (DImode,
3174 label_ref,
3175 GEN_INT (next_cum.sse_regno * 4))));
3176 else
3177 emit_move_insn (nsse_reg, label_ref);
3178 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3180 /* Compute address of memory block we save into. We always use pointer
3181 pointing 127 bytes after first byte to store - this is needed to keep
3182 instruction size limited by 4 bytes. */
3183 tmp_reg = gen_reg_rtx (Pmode);
3184 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3185 plus_constant (save_area,
3186 8 * REGPARM_MAX + 127)));
3187 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3188 set_mem_alias_set (mem, set);
3189 set_mem_align (mem, BITS_PER_WORD);
3191 /* And finally do the dirty job! */
3192 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3193 GEN_INT (next_cum.sse_regno), label));
3198 /* Implement va_start. */
3200 void
3201 ix86_va_start (tree valist, rtx nextarg)
3203 HOST_WIDE_INT words, n_gpr, n_fpr;
3204 tree f_gpr, f_fpr, f_ovf, f_sav;
3205 tree gpr, fpr, ovf, sav, t;
3207 /* Only 64bit target needs something special. */
3208 if (!TARGET_64BIT)
3210 std_expand_builtin_va_start (valist, nextarg);
3211 return;
3214 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3215 f_fpr = TREE_CHAIN (f_gpr);
3216 f_ovf = TREE_CHAIN (f_fpr);
3217 f_sav = TREE_CHAIN (f_ovf);
3219 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3220 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3221 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3222 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3223 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3225 /* Count number of gp and fp argument registers used. */
3226 words = current_function_args_info.words;
3227 n_gpr = current_function_args_info.regno;
3228 n_fpr = current_function_args_info.sse_regno;
3230 if (TARGET_DEBUG_ARG)
3231 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3232 (int) words, (int) n_gpr, (int) n_fpr);
3234 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3235 build_int_cst (NULL_TREE, n_gpr * 8, 0));
3236 TREE_SIDE_EFFECTS (t) = 1;
3237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3239 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3240 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX, 0));
3241 TREE_SIDE_EFFECTS (t) = 1;
3242 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3244 /* Find the overflow area. */
3245 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3246 if (words != 0)
3247 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3248 build_int_cst (NULL_TREE, words * UNITS_PER_WORD, 0));
3249 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3253 /* Find the register save area.
3254 Prologue of the function save it right above stack frame. */
3255 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3256 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3257 TREE_SIDE_EFFECTS (t) = 1;
3258 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3261 /* Implement va_arg. */
3263 tree
3264 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3266 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3267 tree f_gpr, f_fpr, f_ovf, f_sav;
3268 tree gpr, fpr, ovf, sav, t;
3269 int size, rsize;
3270 tree lab_false, lab_over = NULL_TREE;
3271 tree addr, t2;
3272 rtx container;
3273 int indirect_p = 0;
3274 tree ptrtype;
3276 /* Only 64bit target needs something special. */
3277 if (!TARGET_64BIT)
3278 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3280 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3281 f_fpr = TREE_CHAIN (f_gpr);
3282 f_ovf = TREE_CHAIN (f_fpr);
3283 f_sav = TREE_CHAIN (f_ovf);
3285 valist = build_fold_indirect_ref (valist);
3286 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3287 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3288 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3289 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3291 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3292 if (indirect_p)
3293 type = build_pointer_type (type);
3294 size = int_size_in_bytes (type);
3295 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3297 container = construct_container (TYPE_MODE (type), type, 0,
3298 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3300 * Pull the value out of the saved registers ...
3303 addr = create_tmp_var (ptr_type_node, "addr");
3304 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3306 if (container)
3308 int needed_intregs, needed_sseregs;
3309 bool need_temp;
3310 tree int_addr, sse_addr;
3312 lab_false = create_artificial_label ();
3313 lab_over = create_artificial_label ();
3315 examine_argument (TYPE_MODE (type), type, 0,
3316 &needed_intregs, &needed_sseregs);
3318 need_temp = (!REG_P (container)
3319 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3320 || TYPE_ALIGN (type) > 128));
3322 /* In case we are passing structure, verify that it is consecutive block
3323 on the register save area. If not we need to do moves. */
3324 if (!need_temp && !REG_P (container))
3326 /* Verify that all registers are strictly consecutive */
3327 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3329 int i;
3331 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3333 rtx slot = XVECEXP (container, 0, i);
3334 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3335 || INTVAL (XEXP (slot, 1)) != i * 16)
3336 need_temp = 1;
3339 else
3341 int i;
3343 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3345 rtx slot = XVECEXP (container, 0, i);
3346 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3347 || INTVAL (XEXP (slot, 1)) != i * 8)
3348 need_temp = 1;
3352 if (!need_temp)
3354 int_addr = addr;
3355 sse_addr = addr;
3357 else
3359 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3360 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3361 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3362 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3364 /* First ensure that we fit completely in registers. */
3365 if (needed_intregs)
3367 t = build_int_cst (TREE_TYPE (gpr),
3368 (REGPARM_MAX - needed_intregs + 1) * 8, 0);
3369 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3370 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3371 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3372 gimplify_and_add (t, pre_p);
3374 if (needed_sseregs)
3376 t = build_int_cst (TREE_TYPE (fpr),
3377 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3378 + REGPARM_MAX * 8, 0);
3379 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3380 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3381 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3382 gimplify_and_add (t, pre_p);
3385 /* Compute index to start of area used for integer regs. */
3386 if (needed_intregs)
3388 /* int_addr = gpr + sav; */
3389 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3390 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3391 gimplify_and_add (t, pre_p);
3393 if (needed_sseregs)
3395 /* sse_addr = fpr + sav; */
3396 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3397 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3398 gimplify_and_add (t, pre_p);
3400 if (need_temp)
3402 int i;
3403 tree temp = create_tmp_var (type, "va_arg_tmp");
3405 /* addr = &temp; */
3406 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3407 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3408 gimplify_and_add (t, pre_p);
3410 for (i = 0; i < XVECLEN (container, 0); i++)
3412 rtx slot = XVECEXP (container, 0, i);
3413 rtx reg = XEXP (slot, 0);
3414 enum machine_mode mode = GET_MODE (reg);
3415 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3416 tree addr_type = build_pointer_type (piece_type);
3417 tree src_addr, src;
3418 int src_offset;
3419 tree dest_addr, dest;
3421 if (SSE_REGNO_P (REGNO (reg)))
3423 src_addr = sse_addr;
3424 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3426 else
3428 src_addr = int_addr;
3429 src_offset = REGNO (reg) * 8;
3431 src_addr = fold_convert (addr_type, src_addr);
3432 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3433 size_int (src_offset)));
3434 src = build_fold_indirect_ref (src_addr);
3436 dest_addr = fold_convert (addr_type, addr);
3437 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3438 size_int (INTVAL (XEXP (slot, 1)))));
3439 dest = build_fold_indirect_ref (dest_addr);
3441 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3442 gimplify_and_add (t, pre_p);
3446 if (needed_intregs)
3448 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3449 build_int_cst (NULL_TREE, needed_intregs * 8, 0));
3450 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3451 gimplify_and_add (t, pre_p);
3453 if (needed_sseregs)
3455 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3456 build_int_cst (NULL_TREE, needed_sseregs * 16, 0));
3457 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3458 gimplify_and_add (t, pre_p);
3461 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3462 gimplify_and_add (t, pre_p);
3464 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3465 append_to_statement_list (t, pre_p);
3468 /* ... otherwise out of the overflow area. */
3470 /* Care for on-stack alignment if needed. */
3471 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3472 t = ovf;
3473 else
3475 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3476 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3477 build_int_cst (NULL_TREE, align - 1, 0));
3478 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3479 build_int_cst (NULL_TREE, -align, -1));
3481 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3483 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3484 gimplify_and_add (t2, pre_p);
3486 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3487 build_int_cst (NULL_TREE, rsize * UNITS_PER_WORD, 0));
3488 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3489 gimplify_and_add (t, pre_p);
3491 if (container)
3493 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3494 append_to_statement_list (t, pre_p);
3497 ptrtype = build_pointer_type (type);
3498 addr = fold_convert (ptrtype, addr);
3500 if (indirect_p)
3501 addr = build_fold_indirect_ref (addr);
3502 return build_fold_indirect_ref (addr);
3505 /* Return nonzero if OPNUM's MEM should be matched
3506 in movabs* patterns. */
3509 ix86_check_movabs (rtx insn, int opnum)
3511 rtx set, mem;
3513 set = PATTERN (insn);
3514 if (GET_CODE (set) == PARALLEL)
3515 set = XVECEXP (set, 0, 0);
3516 if (GET_CODE (set) != SET)
3517 abort ();
3518 mem = XEXP (set, opnum);
3519 while (GET_CODE (mem) == SUBREG)
3520 mem = SUBREG_REG (mem);
3521 if (GET_CODE (mem) != MEM)
3522 abort ();
3523 return (volatile_ok || !MEM_VOLATILE_P (mem));
3526 /* Initialize the table of extra 80387 mathematical constants. */
3528 static void
3529 init_ext_80387_constants (void)
3531 static const char * cst[5] =
3533 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3534 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3535 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3536 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3537 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3539 int i;
3541 for (i = 0; i < 5; i++)
3543 real_from_string (&ext_80387_constants_table[i], cst[i]);
3544 /* Ensure each constant is rounded to XFmode precision. */
3545 real_convert (&ext_80387_constants_table[i],
3546 XFmode, &ext_80387_constants_table[i]);
3549 ext_80387_constants_init = 1;
3552 /* Return true if the constant is something that can be loaded with
3553 a special instruction. */
3556 standard_80387_constant_p (rtx x)
3558 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3559 return -1;
3561 if (x == CONST0_RTX (GET_MODE (x)))
3562 return 1;
3563 if (x == CONST1_RTX (GET_MODE (x)))
3564 return 2;
3566 /* For XFmode constants, try to find a special 80387 instruction when
3567 optimizing for size or on those CPUs that benefit from them. */
3568 if (GET_MODE (x) == XFmode
3569 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3571 REAL_VALUE_TYPE r;
3572 int i;
3574 if (! ext_80387_constants_init)
3575 init_ext_80387_constants ();
3577 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3578 for (i = 0; i < 5; i++)
3579 if (real_identical (&r, &ext_80387_constants_table[i]))
3580 return i + 3;
3583 return 0;
3586 /* Return the opcode of the special instruction to be used to load
3587 the constant X. */
3589 const char *
3590 standard_80387_constant_opcode (rtx x)
3592 switch (standard_80387_constant_p (x))
3594 case 1:
3595 return "fldz";
3596 case 2:
3597 return "fld1";
3598 case 3:
3599 return "fldlg2";
3600 case 4:
3601 return "fldln2";
3602 case 5:
3603 return "fldl2e";
3604 case 6:
3605 return "fldl2t";
3606 case 7:
3607 return "fldpi";
3609 abort ();
3612 /* Return the CONST_DOUBLE representing the 80387 constant that is
3613 loaded by the specified special instruction. The argument IDX
3614 matches the return value from standard_80387_constant_p. */
3617 standard_80387_constant_rtx (int idx)
3619 int i;
3621 if (! ext_80387_constants_init)
3622 init_ext_80387_constants ();
3624 switch (idx)
3626 case 3:
3627 case 4:
3628 case 5:
3629 case 6:
3630 case 7:
3631 i = idx - 3;
3632 break;
3634 default:
3635 abort ();
3638 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3639 XFmode);
3642 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3645 standard_sse_constant_p (rtx x)
3647 if (x == const0_rtx)
3648 return 1;
3649 return (x == CONST0_RTX (GET_MODE (x)));
3652 /* Returns 1 if OP contains a symbol reference */
3655 symbolic_reference_mentioned_p (rtx op)
3657 const char *fmt;
3658 int i;
3660 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3661 return 1;
3663 fmt = GET_RTX_FORMAT (GET_CODE (op));
3664 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3666 if (fmt[i] == 'E')
3668 int j;
3670 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3671 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3672 return 1;
3675 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3676 return 1;
3679 return 0;
3682 /* Return 1 if it is appropriate to emit `ret' instructions in the
3683 body of a function. Do this only if the epilogue is simple, needing a
3684 couple of insns. Prior to reloading, we can't tell how many registers
3685 must be saved, so return 0 then. Return 0 if there is no frame
3686 marker to de-allocate.
3688 If NON_SAVING_SETJMP is defined and true, then it is not possible
3689 for the epilogue to be simple, so return 0. This is a special case
3690 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3691 until final, but jump_optimize may need to know sooner if a
3692 `return' is OK. */
3695 ix86_can_use_return_insn_p (void)
3697 struct ix86_frame frame;
3699 #ifdef NON_SAVING_SETJMP
3700 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3701 return 0;
3702 #endif
3704 if (! reload_completed || frame_pointer_needed)
3705 return 0;
3707 /* Don't allow more than 32 pop, since that's all we can do
3708 with one instruction. */
3709 if (current_function_pops_args
3710 && current_function_args_size >= 32768)
3711 return 0;
3713 ix86_compute_frame_layout (&frame);
3714 return frame.to_allocate == 0 && frame.nregs == 0;
3717 /* Value should be nonzero if functions must have frame pointers.
3718 Zero means the frame pointer need not be set up (and parms may
3719 be accessed via the stack pointer) in functions that seem suitable. */
3722 ix86_frame_pointer_required (void)
3724 /* If we accessed previous frames, then the generated code expects
3725 to be able to access the saved ebp value in our frame. */
3726 if (cfun->machine->accesses_prev_frame)
3727 return 1;
3729 /* Several x86 os'es need a frame pointer for other reasons,
3730 usually pertaining to setjmp. */
3731 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3732 return 1;
3734 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3735 the frame pointer by default. Turn it back on now if we've not
3736 got a leaf function. */
3737 if (TARGET_OMIT_LEAF_FRAME_POINTER
3738 && (!current_function_is_leaf))
3739 return 1;
3741 if (current_function_profile)
3742 return 1;
3744 return 0;
3747 /* Record that the current function accesses previous call frames. */
3749 void
3750 ix86_setup_frame_addresses (void)
3752 cfun->machine->accesses_prev_frame = 1;
3755 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3756 # define USE_HIDDEN_LINKONCE 1
3757 #else
3758 # define USE_HIDDEN_LINKONCE 0
3759 #endif
3761 static int pic_labels_used;
3763 /* Fills in the label name that should be used for a pc thunk for
3764 the given register. */
3766 static void
3767 get_pc_thunk_name (char name[32], unsigned int regno)
3769 if (USE_HIDDEN_LINKONCE)
3770 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3771 else
3772 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3776 /* This function generates code for -fpic that loads %ebx with
3777 the return address of the caller and then returns. */
3779 void
3780 ix86_file_end (void)
3782 rtx xops[2];
3783 int regno;
3785 for (regno = 0; regno < 8; ++regno)
3787 char name[32];
3789 if (! ((pic_labels_used >> regno) & 1))
3790 continue;
3792 get_pc_thunk_name (name, regno);
3794 if (USE_HIDDEN_LINKONCE)
3796 tree decl;
3798 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3799 error_mark_node);
3800 TREE_PUBLIC (decl) = 1;
3801 TREE_STATIC (decl) = 1;
3802 DECL_ONE_ONLY (decl) = 1;
3804 (*targetm.asm_out.unique_section) (decl, 0);
3805 named_section (decl, NULL, 0);
3807 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3808 fputs ("\t.hidden\t", asm_out_file);
3809 assemble_name (asm_out_file, name);
3810 fputc ('\n', asm_out_file);
3811 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3813 else
3815 text_section ();
3816 ASM_OUTPUT_LABEL (asm_out_file, name);
3819 xops[0] = gen_rtx_REG (SImode, regno);
3820 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3821 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3822 output_asm_insn ("ret", xops);
3825 if (NEED_INDICATE_EXEC_STACK)
3826 file_end_indicate_exec_stack ();
3829 /* Emit code for the SET_GOT patterns. */
3831 const char *
3832 output_set_got (rtx dest)
3834 rtx xops[3];
3836 xops[0] = dest;
3837 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3839 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3841 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3843 if (!flag_pic)
3844 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3845 else
3846 output_asm_insn ("call\t%a2", xops);
3848 #if TARGET_MACHO
3849 /* Output the "canonical" label name ("Lxx$pb") here too. This
3850 is what will be referred to by the Mach-O PIC subsystem. */
3851 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3852 #endif
3853 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3854 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3856 if (flag_pic)
3857 output_asm_insn ("pop{l}\t%0", xops);
3859 else
3861 char name[32];
3862 get_pc_thunk_name (name, REGNO (dest));
3863 pic_labels_used |= 1 << REGNO (dest);
3865 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3866 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3867 output_asm_insn ("call\t%X2", xops);
3870 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3871 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3872 else if (!TARGET_MACHO)
3873 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3875 return "";
3878 /* Generate an "push" pattern for input ARG. */
3880 static rtx
3881 gen_push (rtx arg)
3883 return gen_rtx_SET (VOIDmode,
3884 gen_rtx_MEM (Pmode,
3885 gen_rtx_PRE_DEC (Pmode,
3886 stack_pointer_rtx)),
3887 arg);
3890 /* Return >= 0 if there is an unused call-clobbered register available
3891 for the entire function. */
3893 static unsigned int
3894 ix86_select_alt_pic_regnum (void)
3896 if (current_function_is_leaf && !current_function_profile)
3898 int i;
3899 for (i = 2; i >= 0; --i)
3900 if (!regs_ever_live[i])
3901 return i;
3904 return INVALID_REGNUM;
3907 /* Return 1 if we need to save REGNO. */
3908 static int
3909 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3911 if (pic_offset_table_rtx
3912 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3913 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3914 || current_function_profile
3915 || current_function_calls_eh_return
3916 || current_function_uses_const_pool))
3918 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3919 return 0;
3920 return 1;
3923 if (current_function_calls_eh_return && maybe_eh_return)
3925 unsigned i;
3926 for (i = 0; ; i++)
3928 unsigned test = EH_RETURN_DATA_REGNO (i);
3929 if (test == INVALID_REGNUM)
3930 break;
3931 if (test == regno)
3932 return 1;
3936 return (regs_ever_live[regno]
3937 && !call_used_regs[regno]
3938 && !fixed_regs[regno]
3939 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3942 /* Return number of registers to be saved on the stack. */
3944 static int
3945 ix86_nsaved_regs (void)
3947 int nregs = 0;
3948 int regno;
3950 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3951 if (ix86_save_reg (regno, true))
3952 nregs++;
3953 return nregs;
3956 /* Return the offset between two registers, one to be eliminated, and the other
3957 its replacement, at the start of a routine. */
3959 HOST_WIDE_INT
3960 ix86_initial_elimination_offset (int from, int to)
3962 struct ix86_frame frame;
3963 ix86_compute_frame_layout (&frame);
3965 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3966 return frame.hard_frame_pointer_offset;
3967 else if (from == FRAME_POINTER_REGNUM
3968 && to == HARD_FRAME_POINTER_REGNUM)
3969 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3970 else
3972 if (to != STACK_POINTER_REGNUM)
3973 abort ();
3974 else if (from == ARG_POINTER_REGNUM)
3975 return frame.stack_pointer_offset;
3976 else if (from != FRAME_POINTER_REGNUM)
3977 abort ();
3978 else
3979 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3983 /* Fill structure ix86_frame about frame of currently computed function. */
3985 static void
3986 ix86_compute_frame_layout (struct ix86_frame *frame)
3988 HOST_WIDE_INT total_size;
3989 unsigned int stack_alignment_needed;
3990 HOST_WIDE_INT offset;
3991 unsigned int preferred_alignment;
3992 HOST_WIDE_INT size = get_frame_size ();
3994 frame->nregs = ix86_nsaved_regs ();
3995 total_size = size;
3997 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3998 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4000 /* During reload iteration the amount of registers saved can change.
4001 Recompute the value as needed. Do not recompute when amount of registers
4002 didn't change as reload does mutiple calls to the function and does not
4003 expect the decision to change within single iteration. */
4004 if (!optimize_size
4005 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4007 int count = frame->nregs;
4009 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4010 /* The fast prologue uses move instead of push to save registers. This
4011 is significantly longer, but also executes faster as modern hardware
4012 can execute the moves in parallel, but can't do that for push/pop.
4014 Be careful about choosing what prologue to emit: When function takes
4015 many instructions to execute we may use slow version as well as in
4016 case function is known to be outside hot spot (this is known with
4017 feedback only). Weight the size of function by number of registers
4018 to save as it is cheap to use one or two push instructions but very
4019 slow to use many of them. */
4020 if (count)
4021 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4022 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4023 || (flag_branch_probabilities
4024 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4025 cfun->machine->use_fast_prologue_epilogue = false;
4026 else
4027 cfun->machine->use_fast_prologue_epilogue
4028 = !expensive_function_p (count);
4030 if (TARGET_PROLOGUE_USING_MOVE
4031 && cfun->machine->use_fast_prologue_epilogue)
4032 frame->save_regs_using_mov = true;
4033 else
4034 frame->save_regs_using_mov = false;
4037 /* Skip return address and saved base pointer. */
4038 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4040 frame->hard_frame_pointer_offset = offset;
4042 /* Do some sanity checking of stack_alignment_needed and
4043 preferred_alignment, since i386 port is the only using those features
4044 that may break easily. */
4046 if (size && !stack_alignment_needed)
4047 abort ();
4048 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4049 abort ();
4050 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4051 abort ();
4052 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4053 abort ();
4055 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4056 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4058 /* Register save area */
4059 offset += frame->nregs * UNITS_PER_WORD;
4061 /* Va-arg area */
4062 if (ix86_save_varrargs_registers)
4064 offset += X86_64_VARARGS_SIZE;
4065 frame->va_arg_size = X86_64_VARARGS_SIZE;
4067 else
4068 frame->va_arg_size = 0;
4070 /* Align start of frame for local function. */
4071 frame->padding1 = ((offset + stack_alignment_needed - 1)
4072 & -stack_alignment_needed) - offset;
4074 offset += frame->padding1;
4076 /* Frame pointer points here. */
4077 frame->frame_pointer_offset = offset;
4079 offset += size;
4081 /* Add outgoing arguments area. Can be skipped if we eliminated
4082 all the function calls as dead code.
4083 Skipping is however impossible when function calls alloca. Alloca
4084 expander assumes that last current_function_outgoing_args_size
4085 of stack frame are unused. */
4086 if (ACCUMULATE_OUTGOING_ARGS
4087 && (!current_function_is_leaf || current_function_calls_alloca))
4089 offset += current_function_outgoing_args_size;
4090 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4092 else
4093 frame->outgoing_arguments_size = 0;
4095 /* Align stack boundary. Only needed if we're calling another function
4096 or using alloca. */
4097 if (!current_function_is_leaf || current_function_calls_alloca)
4098 frame->padding2 = ((offset + preferred_alignment - 1)
4099 & -preferred_alignment) - offset;
4100 else
4101 frame->padding2 = 0;
4103 offset += frame->padding2;
4105 /* We've reached end of stack frame. */
4106 frame->stack_pointer_offset = offset;
4108 /* Size prologue needs to allocate. */
4109 frame->to_allocate =
4110 (size + frame->padding1 + frame->padding2
4111 + frame->outgoing_arguments_size + frame->va_arg_size);
4113 if ((!frame->to_allocate && frame->nregs <= 1)
4114 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4115 frame->save_regs_using_mov = false;
4117 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4118 && current_function_is_leaf)
4120 frame->red_zone_size = frame->to_allocate;
4121 if (frame->save_regs_using_mov)
4122 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4123 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4124 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4126 else
4127 frame->red_zone_size = 0;
4128 frame->to_allocate -= frame->red_zone_size;
4129 frame->stack_pointer_offset -= frame->red_zone_size;
4130 #if 0
4131 fprintf (stderr, "nregs: %i\n", frame->nregs);
4132 fprintf (stderr, "size: %i\n", size);
4133 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4134 fprintf (stderr, "padding1: %i\n", frame->padding1);
4135 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4136 fprintf (stderr, "padding2: %i\n", frame->padding2);
4137 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4138 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4139 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4140 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4141 frame->hard_frame_pointer_offset);
4142 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4143 #endif
4146 /* Emit code to save registers in the prologue. */
4148 static void
4149 ix86_emit_save_regs (void)
4151 int regno;
4152 rtx insn;
4154 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4155 if (ix86_save_reg (regno, true))
4157 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4158 RTX_FRAME_RELATED_P (insn) = 1;
4162 /* Emit code to save registers using MOV insns. First register
4163 is restored from POINTER + OFFSET. */
4164 static void
4165 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4167 int regno;
4168 rtx insn;
4170 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4171 if (ix86_save_reg (regno, true))
4173 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4174 Pmode, offset),
4175 gen_rtx_REG (Pmode, regno));
4176 RTX_FRAME_RELATED_P (insn) = 1;
4177 offset += UNITS_PER_WORD;
4181 /* Expand prologue or epilogue stack adjustment.
4182 The pattern exist to put a dependency on all ebp-based memory accesses.
4183 STYLE should be negative if instructions should be marked as frame related,
4184 zero if %r11 register is live and cannot be freely used and positive
4185 otherwise. */
4187 static void
4188 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4190 rtx insn;
4192 if (! TARGET_64BIT)
4193 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4194 else if (x86_64_immediate_operand (offset, DImode))
4195 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4196 else
4198 rtx r11;
4199 /* r11 is used by indirect sibcall return as well, set before the
4200 epilogue and used after the epilogue. ATM indirect sibcall
4201 shouldn't be used together with huge frame sizes in one
4202 function because of the frame_size check in sibcall.c. */
4203 if (style == 0)
4204 abort ();
4205 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4206 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4207 if (style < 0)
4208 RTX_FRAME_RELATED_P (insn) = 1;
4209 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4210 offset));
4212 if (style < 0)
4213 RTX_FRAME_RELATED_P (insn) = 1;
4216 /* Expand the prologue into a bunch of separate insns. */
4218 void
4219 ix86_expand_prologue (void)
4221 rtx insn;
4222 bool pic_reg_used;
4223 struct ix86_frame frame;
4224 HOST_WIDE_INT allocate;
4226 ix86_compute_frame_layout (&frame);
4228 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4229 slower on all targets. Also sdb doesn't like it. */
4231 if (frame_pointer_needed)
4233 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4234 RTX_FRAME_RELATED_P (insn) = 1;
4236 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4237 RTX_FRAME_RELATED_P (insn) = 1;
4240 allocate = frame.to_allocate;
4242 if (!frame.save_regs_using_mov)
4243 ix86_emit_save_regs ();
4244 else
4245 allocate += frame.nregs * UNITS_PER_WORD;
4247 /* When using red zone we may start register saving before allocating
4248 the stack frame saving one cycle of the prologue. */
4249 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4250 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4251 : stack_pointer_rtx,
4252 -frame.nregs * UNITS_PER_WORD);
4254 if (allocate == 0)
4256 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4257 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4258 GEN_INT (-allocate), -1);
4259 else
4261 /* Only valid for Win32. */
4262 rtx eax = gen_rtx_REG (SImode, 0);
4263 bool eax_live = ix86_eax_live_at_start_p ();
4265 if (TARGET_64BIT)
4266 abort ();
4268 if (eax_live)
4270 emit_insn (gen_push (eax));
4271 allocate -= 4;
4274 insn = emit_move_insn (eax, GEN_INT (allocate));
4275 RTX_FRAME_RELATED_P (insn) = 1;
4277 insn = emit_insn (gen_allocate_stack_worker (eax));
4278 RTX_FRAME_RELATED_P (insn) = 1;
4280 if (eax_live)
4282 rtx t;
4283 if (frame_pointer_needed)
4284 t = plus_constant (hard_frame_pointer_rtx,
4285 allocate
4286 - frame.to_allocate
4287 - frame.nregs * UNITS_PER_WORD);
4288 else
4289 t = plus_constant (stack_pointer_rtx, allocate);
4290 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4294 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4296 if (!frame_pointer_needed || !frame.to_allocate)
4297 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4298 else
4299 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4300 -frame.nregs * UNITS_PER_WORD);
4303 pic_reg_used = false;
4304 if (pic_offset_table_rtx
4305 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4306 || current_function_profile))
4308 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4310 if (alt_pic_reg_used != INVALID_REGNUM)
4311 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4313 pic_reg_used = true;
4316 if (pic_reg_used)
4318 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4320 /* Even with accurate pre-reload life analysis, we can wind up
4321 deleting all references to the pic register after reload.
4322 Consider if cross-jumping unifies two sides of a branch
4323 controlled by a comparison vs the only read from a global.
4324 In which case, allow the set_got to be deleted, though we're
4325 too late to do anything about the ebx save in the prologue. */
4326 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4329 /* Prevent function calls from be scheduled before the call to mcount.
4330 In the pic_reg_used case, make sure that the got load isn't deleted. */
4331 if (current_function_profile)
4332 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4335 /* Emit code to restore saved registers using MOV insns. First register
4336 is restored from POINTER + OFFSET. */
4337 static void
4338 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4339 int maybe_eh_return)
4341 int regno;
4342 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4344 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4345 if (ix86_save_reg (regno, maybe_eh_return))
4347 /* Ensure that adjust_address won't be forced to produce pointer
4348 out of range allowed by x86-64 instruction set. */
4349 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4351 rtx r11;
4353 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4354 emit_move_insn (r11, GEN_INT (offset));
4355 emit_insn (gen_adddi3 (r11, r11, pointer));
4356 base_address = gen_rtx_MEM (Pmode, r11);
4357 offset = 0;
4359 emit_move_insn (gen_rtx_REG (Pmode, regno),
4360 adjust_address (base_address, Pmode, offset));
4361 offset += UNITS_PER_WORD;
4365 /* Restore function stack, frame, and registers. */
4367 void
4368 ix86_expand_epilogue (int style)
4370 int regno;
4371 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4372 struct ix86_frame frame;
4373 HOST_WIDE_INT offset;
4375 ix86_compute_frame_layout (&frame);
4377 /* Calculate start of saved registers relative to ebp. Special care
4378 must be taken for the normal return case of a function using
4379 eh_return: the eax and edx registers are marked as saved, but not
4380 restored along this path. */
4381 offset = frame.nregs;
4382 if (current_function_calls_eh_return && style != 2)
4383 offset -= 2;
4384 offset *= -UNITS_PER_WORD;
4386 /* If we're only restoring one register and sp is not valid then
4387 using a move instruction to restore the register since it's
4388 less work than reloading sp and popping the register.
4390 The default code result in stack adjustment using add/lea instruction,
4391 while this code results in LEAVE instruction (or discrete equivalent),
4392 so it is profitable in some other cases as well. Especially when there
4393 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4394 and there is exactly one register to pop. This heuristic may need some
4395 tuning in future. */
4396 if ((!sp_valid && frame.nregs <= 1)
4397 || (TARGET_EPILOGUE_USING_MOVE
4398 && cfun->machine->use_fast_prologue_epilogue
4399 && (frame.nregs > 1 || frame.to_allocate))
4400 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4401 || (frame_pointer_needed && TARGET_USE_LEAVE
4402 && cfun->machine->use_fast_prologue_epilogue
4403 && frame.nregs == 1)
4404 || current_function_calls_eh_return)
4406 /* Restore registers. We can use ebp or esp to address the memory
4407 locations. If both are available, default to ebp, since offsets
4408 are known to be small. Only exception is esp pointing directly to the
4409 end of block of saved registers, where we may simplify addressing
4410 mode. */
4412 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4413 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4414 frame.to_allocate, style == 2);
4415 else
4416 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4417 offset, style == 2);
4419 /* eh_return epilogues need %ecx added to the stack pointer. */
4420 if (style == 2)
4422 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4424 if (frame_pointer_needed)
4426 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4427 tmp = plus_constant (tmp, UNITS_PER_WORD);
4428 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4430 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4431 emit_move_insn (hard_frame_pointer_rtx, tmp);
4433 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4434 const0_rtx, style);
4436 else
4438 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4439 tmp = plus_constant (tmp, (frame.to_allocate
4440 + frame.nregs * UNITS_PER_WORD));
4441 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4444 else if (!frame_pointer_needed)
4445 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4446 GEN_INT (frame.to_allocate
4447 + frame.nregs * UNITS_PER_WORD),
4448 style);
4449 /* If not an i386, mov & pop is faster than "leave". */
4450 else if (TARGET_USE_LEAVE || optimize_size
4451 || !cfun->machine->use_fast_prologue_epilogue)
4452 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4453 else
4455 pro_epilogue_adjust_stack (stack_pointer_rtx,
4456 hard_frame_pointer_rtx,
4457 const0_rtx, style);
4458 if (TARGET_64BIT)
4459 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4460 else
4461 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4464 else
4466 /* First step is to deallocate the stack frame so that we can
4467 pop the registers. */
4468 if (!sp_valid)
4470 if (!frame_pointer_needed)
4471 abort ();
4472 pro_epilogue_adjust_stack (stack_pointer_rtx,
4473 hard_frame_pointer_rtx,
4474 GEN_INT (offset), style);
4476 else if (frame.to_allocate)
4477 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4478 GEN_INT (frame.to_allocate), style);
4480 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4481 if (ix86_save_reg (regno, false))
4483 if (TARGET_64BIT)
4484 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4485 else
4486 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4488 if (frame_pointer_needed)
4490 /* Leave results in shorter dependency chains on CPUs that are
4491 able to grok it fast. */
4492 if (TARGET_USE_LEAVE)
4493 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4494 else if (TARGET_64BIT)
4495 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4496 else
4497 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4501 /* Sibcall epilogues don't want a return instruction. */
4502 if (style == 0)
4503 return;
4505 if (current_function_pops_args && current_function_args_size)
4507 rtx popc = GEN_INT (current_function_pops_args);
4509 /* i386 can only pop 64K bytes. If asked to pop more, pop
4510 return address, do explicit add, and jump indirectly to the
4511 caller. */
4513 if (current_function_pops_args >= 65536)
4515 rtx ecx = gen_rtx_REG (SImode, 2);
4517 /* There is no "pascal" calling convention in 64bit ABI. */
4518 if (TARGET_64BIT)
4519 abort ();
4521 emit_insn (gen_popsi1 (ecx));
4522 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4523 emit_jump_insn (gen_return_indirect_internal (ecx));
4525 else
4526 emit_jump_insn (gen_return_pop_internal (popc));
4528 else
4529 emit_jump_insn (gen_return_internal ());
4532 /* Reset from the function's potential modifications. */
4534 static void
4535 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4536 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4538 if (pic_offset_table_rtx)
4539 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4542 /* Extract the parts of an RTL expression that is a valid memory address
4543 for an instruction. Return 0 if the structure of the address is
4544 grossly off. Return -1 if the address contains ASHIFT, so it is not
4545 strictly valid, but still used for computing length of lea instruction. */
4548 ix86_decompose_address (rtx addr, struct ix86_address *out)
4550 rtx base = NULL_RTX;
4551 rtx index = NULL_RTX;
4552 rtx disp = NULL_RTX;
4553 HOST_WIDE_INT scale = 1;
4554 rtx scale_rtx = NULL_RTX;
4555 int retval = 1;
4556 enum ix86_address_seg seg = SEG_DEFAULT;
4558 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4559 base = addr;
4560 else if (GET_CODE (addr) == PLUS)
4562 rtx addends[4], op;
4563 int n = 0, i;
4565 op = addr;
4568 if (n >= 4)
4569 return 0;
4570 addends[n++] = XEXP (op, 1);
4571 op = XEXP (op, 0);
4573 while (GET_CODE (op) == PLUS);
4574 if (n >= 4)
4575 return 0;
4576 addends[n] = op;
4578 for (i = n; i >= 0; --i)
4580 op = addends[i];
4581 switch (GET_CODE (op))
4583 case MULT:
4584 if (index)
4585 return 0;
4586 index = XEXP (op, 0);
4587 scale_rtx = XEXP (op, 1);
4588 break;
4590 case UNSPEC:
4591 if (XINT (op, 1) == UNSPEC_TP
4592 && TARGET_TLS_DIRECT_SEG_REFS
4593 && seg == SEG_DEFAULT)
4594 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4595 else
4596 return 0;
4597 break;
4599 case REG:
4600 case SUBREG:
4601 if (!base)
4602 base = op;
4603 else if (!index)
4604 index = op;
4605 else
4606 return 0;
4607 break;
4609 case CONST:
4610 case CONST_INT:
4611 case SYMBOL_REF:
4612 case LABEL_REF:
4613 if (disp)
4614 return 0;
4615 disp = op;
4616 break;
4618 default:
4619 return 0;
4623 else if (GET_CODE (addr) == MULT)
4625 index = XEXP (addr, 0); /* index*scale */
4626 scale_rtx = XEXP (addr, 1);
4628 else if (GET_CODE (addr) == ASHIFT)
4630 rtx tmp;
4632 /* We're called for lea too, which implements ashift on occasion. */
4633 index = XEXP (addr, 0);
4634 tmp = XEXP (addr, 1);
4635 if (GET_CODE (tmp) != CONST_INT)
4636 return 0;
4637 scale = INTVAL (tmp);
4638 if ((unsigned HOST_WIDE_INT) scale > 3)
4639 return 0;
4640 scale = 1 << scale;
4641 retval = -1;
4643 else
4644 disp = addr; /* displacement */
4646 /* Extract the integral value of scale. */
4647 if (scale_rtx)
4649 if (GET_CODE (scale_rtx) != CONST_INT)
4650 return 0;
4651 scale = INTVAL (scale_rtx);
4654 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4655 if (base && index && scale == 1
4656 && (index == arg_pointer_rtx
4657 || index == frame_pointer_rtx
4658 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4660 rtx tmp = base;
4661 base = index;
4662 index = tmp;
4665 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4666 if ((base == hard_frame_pointer_rtx
4667 || base == frame_pointer_rtx
4668 || base == arg_pointer_rtx) && !disp)
4669 disp = const0_rtx;
4671 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4672 Avoid this by transforming to [%esi+0]. */
4673 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4674 && base && !index && !disp
4675 && REG_P (base)
4676 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4677 disp = const0_rtx;
4679 /* Special case: encode reg+reg instead of reg*2. */
4680 if (!base && index && scale && scale == 2)
4681 base = index, scale = 1;
4683 /* Special case: scaling cannot be encoded without base or displacement. */
4684 if (!base && !disp && index && scale != 1)
4685 disp = const0_rtx;
4687 out->base = base;
4688 out->index = index;
4689 out->disp = disp;
4690 out->scale = scale;
4691 out->seg = seg;
4693 return retval;
4696 /* Return cost of the memory address x.
4697 For i386, it is better to use a complex address than let gcc copy
4698 the address into a reg and make a new pseudo. But not if the address
4699 requires to two regs - that would mean more pseudos with longer
4700 lifetimes. */
4701 static int
4702 ix86_address_cost (rtx x)
4704 struct ix86_address parts;
4705 int cost = 1;
4707 if (!ix86_decompose_address (x, &parts))
4708 abort ();
4710 /* More complex memory references are better. */
4711 if (parts.disp && parts.disp != const0_rtx)
4712 cost--;
4713 if (parts.seg != SEG_DEFAULT)
4714 cost--;
4716 /* Attempt to minimize number of registers in the address. */
4717 if ((parts.base
4718 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4719 || (parts.index
4720 && (!REG_P (parts.index)
4721 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4722 cost++;
4724 if (parts.base
4725 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4726 && parts.index
4727 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4728 && parts.base != parts.index)
4729 cost++;
4731 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4732 since it's predecode logic can't detect the length of instructions
4733 and it degenerates to vector decoded. Increase cost of such
4734 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4735 to split such addresses or even refuse such addresses at all.
4737 Following addressing modes are affected:
4738 [base+scale*index]
4739 [scale*index+disp]
4740 [base+index]
4742 The first and last case may be avoidable by explicitly coding the zero in
4743 memory address, but I don't have AMD-K6 machine handy to check this
4744 theory. */
4746 if (TARGET_K6
4747 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4748 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4749 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4750 cost += 10;
4752 return cost;
4755 /* If X is a machine specific address (i.e. a symbol or label being
4756 referenced as a displacement from the GOT implemented using an
4757 UNSPEC), then return the base term. Otherwise return X. */
4760 ix86_find_base_term (rtx x)
4762 rtx term;
4764 if (TARGET_64BIT)
4766 if (GET_CODE (x) != CONST)
4767 return x;
4768 term = XEXP (x, 0);
4769 if (GET_CODE (term) == PLUS
4770 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4771 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4772 term = XEXP (term, 0);
4773 if (GET_CODE (term) != UNSPEC
4774 || XINT (term, 1) != UNSPEC_GOTPCREL)
4775 return x;
4777 term = XVECEXP (term, 0, 0);
4779 if (GET_CODE (term) != SYMBOL_REF
4780 && GET_CODE (term) != LABEL_REF)
4781 return x;
4783 return term;
4786 term = ix86_delegitimize_address (x);
4788 if (GET_CODE (term) != SYMBOL_REF
4789 && GET_CODE (term) != LABEL_REF)
4790 return x;
4792 return term;
4795 /* Determine if a given RTX is a valid constant. We already know this
4796 satisfies CONSTANT_P. */
4798 bool
4799 legitimate_constant_p (rtx x)
4801 rtx inner;
4803 switch (GET_CODE (x))
4805 case SYMBOL_REF:
4806 /* TLS symbols are not constant. */
4807 if (tls_symbolic_operand (x, Pmode))
4808 return false;
4809 break;
4811 case CONST:
4812 inner = XEXP (x, 0);
4814 /* Offsets of TLS symbols are never valid.
4815 Discourage CSE from creating them. */
4816 if (GET_CODE (inner) == PLUS
4817 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4818 return false;
4820 if (GET_CODE (inner) == PLUS
4821 || GET_CODE (inner) == MINUS)
4823 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
4824 return false;
4825 inner = XEXP (inner, 0);
4828 /* Only some unspecs are valid as "constants". */
4829 if (GET_CODE (inner) == UNSPEC)
4830 switch (XINT (inner, 1))
4832 case UNSPEC_TPOFF:
4833 case UNSPEC_NTPOFF:
4834 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4835 case UNSPEC_DTPOFF:
4836 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4837 default:
4838 return false;
4840 break;
4842 default:
4843 break;
4846 /* Otherwise we handle everything else in the move patterns. */
4847 return true;
4850 /* Determine if it's legal to put X into the constant pool. This
4851 is not possible for the address of thread-local symbols, which
4852 is checked above. */
4854 static bool
4855 ix86_cannot_force_const_mem (rtx x)
4857 return !legitimate_constant_p (x);
4860 /* Determine if a given RTX is a valid constant address. */
4862 bool
4863 constant_address_p (rtx x)
4865 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4868 /* Nonzero if the constant value X is a legitimate general operand
4869 when generating PIC code. It is given that flag_pic is on and
4870 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4872 bool
4873 legitimate_pic_operand_p (rtx x)
4875 rtx inner;
4877 switch (GET_CODE (x))
4879 case CONST:
4880 inner = XEXP (x, 0);
4882 /* Only some unspecs are valid as "constants". */
4883 if (GET_CODE (inner) == UNSPEC)
4884 switch (XINT (inner, 1))
4886 case UNSPEC_TPOFF:
4887 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4888 default:
4889 return false;
4891 /* FALLTHRU */
4893 case SYMBOL_REF:
4894 case LABEL_REF:
4895 return legitimate_pic_address_disp_p (x);
4897 default:
4898 return true;
4902 /* Determine if a given CONST RTX is a valid memory displacement
4903 in PIC mode. */
4906 legitimate_pic_address_disp_p (rtx disp)
4908 bool saw_plus;
4910 /* In 64bit mode we can allow direct addresses of symbols and labels
4911 when they are not dynamic symbols. */
4912 if (TARGET_64BIT)
4914 /* TLS references should always be enclosed in UNSPEC. */
4915 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4916 return 0;
4917 if (GET_CODE (disp) == SYMBOL_REF
4918 && ix86_cmodel == CM_SMALL_PIC
4919 && SYMBOL_REF_LOCAL_P (disp))
4920 return 1;
4921 if (GET_CODE (disp) == LABEL_REF)
4922 return 1;
4923 if (GET_CODE (disp) == CONST
4924 && GET_CODE (XEXP (disp, 0)) == PLUS)
4926 rtx op0 = XEXP (XEXP (disp, 0), 0);
4927 rtx op1 = XEXP (XEXP (disp, 0), 1);
4929 /* TLS references should always be enclosed in UNSPEC. */
4930 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4931 return 0;
4932 if (((GET_CODE (op0) == SYMBOL_REF
4933 && ix86_cmodel == CM_SMALL_PIC
4934 && SYMBOL_REF_LOCAL_P (op0))
4935 || GET_CODE (op0) == LABEL_REF)
4936 && GET_CODE (op1) == CONST_INT
4937 && INTVAL (op1) < 16*1024*1024
4938 && INTVAL (op1) >= -16*1024*1024)
4939 return 1;
4942 if (GET_CODE (disp) != CONST)
4943 return 0;
4944 disp = XEXP (disp, 0);
4946 if (TARGET_64BIT)
4948 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4949 of GOT tables. We should not need these anyway. */
4950 if (GET_CODE (disp) != UNSPEC
4951 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4952 return 0;
4954 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4955 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4956 return 0;
4957 return 1;
4960 saw_plus = false;
4961 if (GET_CODE (disp) == PLUS)
4963 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4964 return 0;
4965 disp = XEXP (disp, 0);
4966 saw_plus = true;
4969 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4970 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4972 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4973 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4974 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4976 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4977 if (! strcmp (sym_name, "<pic base>"))
4978 return 1;
4982 if (GET_CODE (disp) != UNSPEC)
4983 return 0;
4985 switch (XINT (disp, 1))
4987 case UNSPEC_GOT:
4988 if (saw_plus)
4989 return false;
4990 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4991 case UNSPEC_GOTOFF:
4992 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
4993 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
4994 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4995 return false;
4996 case UNSPEC_GOTTPOFF:
4997 case UNSPEC_GOTNTPOFF:
4998 case UNSPEC_INDNTPOFF:
4999 if (saw_plus)
5000 return false;
5001 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5002 case UNSPEC_NTPOFF:
5003 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5004 case UNSPEC_DTPOFF:
5005 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5008 return 0;
5011 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5012 memory address for an instruction. The MODE argument is the machine mode
5013 for the MEM expression that wants to use this address.
5015 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5016 convert common non-canonical forms to canonical form so that they will
5017 be recognized. */
5020 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5022 struct ix86_address parts;
5023 rtx base, index, disp;
5024 HOST_WIDE_INT scale;
5025 const char *reason = NULL;
5026 rtx reason_rtx = NULL_RTX;
5028 if (TARGET_DEBUG_ADDR)
5030 fprintf (stderr,
5031 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5032 GET_MODE_NAME (mode), strict);
5033 debug_rtx (addr);
5036 if (ix86_decompose_address (addr, &parts) <= 0)
5038 reason = "decomposition failed";
5039 goto report_error;
5042 base = parts.base;
5043 index = parts.index;
5044 disp = parts.disp;
5045 scale = parts.scale;
5047 /* Validate base register.
5049 Don't allow SUBREG's here, it can lead to spill failures when the base
5050 is one word out of a two word structure, which is represented internally
5051 as a DImode int. */
5053 if (base)
5055 reason_rtx = base;
5057 if (GET_CODE (base) != REG)
5059 reason = "base is not a register";
5060 goto report_error;
5063 if (GET_MODE (base) != Pmode)
5065 reason = "base is not in Pmode";
5066 goto report_error;
5069 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5070 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5072 reason = "base is not valid";
5073 goto report_error;
5077 /* Validate index register.
5079 Don't allow SUBREG's here, it can lead to spill failures when the index
5080 is one word out of a two word structure, which is represented internally
5081 as a DImode int. */
5083 if (index)
5085 reason_rtx = index;
5087 if (GET_CODE (index) != REG)
5089 reason = "index is not a register";
5090 goto report_error;
5093 if (GET_MODE (index) != Pmode)
5095 reason = "index is not in Pmode";
5096 goto report_error;
5099 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5100 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5102 reason = "index is not valid";
5103 goto report_error;
5107 /* Validate scale factor. */
5108 if (scale != 1)
5110 reason_rtx = GEN_INT (scale);
5111 if (!index)
5113 reason = "scale without index";
5114 goto report_error;
5117 if (scale != 2 && scale != 4 && scale != 8)
5119 reason = "scale is not a valid multiplier";
5120 goto report_error;
5124 /* Validate displacement. */
5125 if (disp)
5127 reason_rtx = disp;
5129 if (GET_CODE (disp) == CONST
5130 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5131 switch (XINT (XEXP (disp, 0), 1))
5133 case UNSPEC_GOT:
5134 case UNSPEC_GOTOFF:
5135 case UNSPEC_GOTPCREL:
5136 if (!flag_pic)
5137 abort ();
5138 goto is_legitimate_pic;
5140 case UNSPEC_GOTTPOFF:
5141 case UNSPEC_GOTNTPOFF:
5142 case UNSPEC_INDNTPOFF:
5143 case UNSPEC_NTPOFF:
5144 case UNSPEC_DTPOFF:
5145 break;
5147 default:
5148 reason = "invalid address unspec";
5149 goto report_error;
5152 else if (flag_pic && (SYMBOLIC_CONST (disp)
5153 #if TARGET_MACHO
5154 && !machopic_operand_p (disp)
5155 #endif
5158 is_legitimate_pic:
5159 if (TARGET_64BIT && (index || base))
5161 /* foo@dtpoff(%rX) is ok. */
5162 if (GET_CODE (disp) != CONST
5163 || GET_CODE (XEXP (disp, 0)) != PLUS
5164 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5165 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5166 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5167 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5169 reason = "non-constant pic memory reference";
5170 goto report_error;
5173 else if (! legitimate_pic_address_disp_p (disp))
5175 reason = "displacement is an invalid pic construct";
5176 goto report_error;
5179 /* This code used to verify that a symbolic pic displacement
5180 includes the pic_offset_table_rtx register.
5182 While this is good idea, unfortunately these constructs may
5183 be created by "adds using lea" optimization for incorrect
5184 code like:
5186 int a;
5187 int foo(int i)
5189 return *(&a+i);
5192 This code is nonsensical, but results in addressing
5193 GOT table with pic_offset_table_rtx base. We can't
5194 just refuse it easily, since it gets matched by
5195 "addsi3" pattern, that later gets split to lea in the
5196 case output register differs from input. While this
5197 can be handled by separate addsi pattern for this case
5198 that never results in lea, this seems to be easier and
5199 correct fix for crash to disable this test. */
5201 else if (GET_CODE (disp) != LABEL_REF
5202 && GET_CODE (disp) != CONST_INT
5203 && (GET_CODE (disp) != CONST
5204 || !legitimate_constant_p (disp))
5205 && (GET_CODE (disp) != SYMBOL_REF
5206 || !legitimate_constant_p (disp)))
5208 reason = "displacement is not constant";
5209 goto report_error;
5211 else if (TARGET_64BIT
5212 && !x86_64_immediate_operand (disp, VOIDmode))
5214 reason = "displacement is out of range";
5215 goto report_error;
5219 /* Everything looks valid. */
5220 if (TARGET_DEBUG_ADDR)
5221 fprintf (stderr, "Success.\n");
5222 return TRUE;
5224 report_error:
5225 if (TARGET_DEBUG_ADDR)
5227 fprintf (stderr, "Error: %s\n", reason);
5228 debug_rtx (reason_rtx);
5230 return FALSE;
5233 /* Return an unique alias set for the GOT. */
5235 static HOST_WIDE_INT
5236 ix86_GOT_alias_set (void)
5238 static HOST_WIDE_INT set = -1;
5239 if (set == -1)
5240 set = new_alias_set ();
5241 return set;
5244 /* Return a legitimate reference for ORIG (an address) using the
5245 register REG. If REG is 0, a new pseudo is generated.
5247 There are two types of references that must be handled:
5249 1. Global data references must load the address from the GOT, via
5250 the PIC reg. An insn is emitted to do this load, and the reg is
5251 returned.
5253 2. Static data references, constant pool addresses, and code labels
5254 compute the address as an offset from the GOT, whose base is in
5255 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5256 differentiate them from global data objects. The returned
5257 address is the PIC reg + an unspec constant.
5259 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5260 reg also appears in the address. */
5262 static rtx
5263 legitimize_pic_address (rtx orig, rtx reg)
5265 rtx addr = orig;
5266 rtx new = orig;
5267 rtx base;
5269 #if TARGET_MACHO
5270 if (reg == 0)
5271 reg = gen_reg_rtx (Pmode);
5272 /* Use the generic Mach-O PIC machinery. */
5273 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5274 #endif
5276 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5277 new = addr;
5278 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5280 /* This symbol may be referenced via a displacement from the PIC
5281 base address (@GOTOFF). */
5283 if (reload_in_progress)
5284 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5285 if (GET_CODE (addr) == CONST)
5286 addr = XEXP (addr, 0);
5287 if (GET_CODE (addr) == PLUS)
5289 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5290 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5292 else
5293 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5294 new = gen_rtx_CONST (Pmode, new);
5295 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5297 if (reg != 0)
5299 emit_move_insn (reg, new);
5300 new = reg;
5303 else if (GET_CODE (addr) == SYMBOL_REF)
5305 if (TARGET_64BIT)
5307 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5308 new = gen_rtx_CONST (Pmode, new);
5309 new = gen_const_mem (Pmode, new);
5310 set_mem_alias_set (new, ix86_GOT_alias_set ());
5312 if (reg == 0)
5313 reg = gen_reg_rtx (Pmode);
5314 /* Use directly gen_movsi, otherwise the address is loaded
5315 into register for CSE. We don't want to CSE this addresses,
5316 instead we CSE addresses from the GOT table, so skip this. */
5317 emit_insn (gen_movsi (reg, new));
5318 new = reg;
5320 else
5322 /* This symbol must be referenced via a load from the
5323 Global Offset Table (@GOT). */
5325 if (reload_in_progress)
5326 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5327 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5328 new = gen_rtx_CONST (Pmode, new);
5329 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5330 new = gen_const_mem (Pmode, new);
5331 set_mem_alias_set (new, ix86_GOT_alias_set ());
5333 if (reg == 0)
5334 reg = gen_reg_rtx (Pmode);
5335 emit_move_insn (reg, new);
5336 new = reg;
5339 else
5341 if (GET_CODE (addr) == CONST)
5343 addr = XEXP (addr, 0);
5345 /* We must match stuff we generate before. Assume the only
5346 unspecs that can get here are ours. Not that we could do
5347 anything with them anyway.... */
5348 if (GET_CODE (addr) == UNSPEC
5349 || (GET_CODE (addr) == PLUS
5350 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5351 return orig;
5352 if (GET_CODE (addr) != PLUS)
5353 abort ();
5355 if (GET_CODE (addr) == PLUS)
5357 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5359 /* Check first to see if this is a constant offset from a @GOTOFF
5360 symbol reference. */
5361 if (local_symbolic_operand (op0, Pmode)
5362 && GET_CODE (op1) == CONST_INT)
5364 if (!TARGET_64BIT)
5366 if (reload_in_progress)
5367 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5368 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5369 UNSPEC_GOTOFF);
5370 new = gen_rtx_PLUS (Pmode, new, op1);
5371 new = gen_rtx_CONST (Pmode, new);
5372 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5374 if (reg != 0)
5376 emit_move_insn (reg, new);
5377 new = reg;
5380 else
5382 if (INTVAL (op1) < -16*1024*1024
5383 || INTVAL (op1) >= 16*1024*1024)
5384 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5387 else
5389 base = legitimize_pic_address (XEXP (addr, 0), reg);
5390 new = legitimize_pic_address (XEXP (addr, 1),
5391 base == reg ? NULL_RTX : reg);
5393 if (GET_CODE (new) == CONST_INT)
5394 new = plus_constant (base, INTVAL (new));
5395 else
5397 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5399 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5400 new = XEXP (new, 1);
5402 new = gen_rtx_PLUS (Pmode, base, new);
5407 return new;
5410 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5412 static rtx
5413 get_thread_pointer (int to_reg)
5415 rtx tp, reg, insn;
5417 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5418 if (!to_reg)
5419 return tp;
5421 reg = gen_reg_rtx (Pmode);
5422 insn = gen_rtx_SET (VOIDmode, reg, tp);
5423 insn = emit_insn (insn);
5425 return reg;
5428 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5429 false if we expect this to be used for a memory address and true if
5430 we expect to load the address into a register. */
5432 static rtx
5433 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5435 rtx dest, base, off, pic;
5436 int type;
5438 switch (model)
5440 case TLS_MODEL_GLOBAL_DYNAMIC:
5441 dest = gen_reg_rtx (Pmode);
5442 if (TARGET_64BIT)
5444 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5446 start_sequence ();
5447 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5448 insns = get_insns ();
5449 end_sequence ();
5451 emit_libcall_block (insns, dest, rax, x);
5453 else
5454 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5455 break;
5457 case TLS_MODEL_LOCAL_DYNAMIC:
5458 base = gen_reg_rtx (Pmode);
5459 if (TARGET_64BIT)
5461 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5463 start_sequence ();
5464 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5465 insns = get_insns ();
5466 end_sequence ();
5468 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5469 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5470 emit_libcall_block (insns, base, rax, note);
5472 else
5473 emit_insn (gen_tls_local_dynamic_base_32 (base));
5475 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5476 off = gen_rtx_CONST (Pmode, off);
5478 return gen_rtx_PLUS (Pmode, base, off);
5480 case TLS_MODEL_INITIAL_EXEC:
5481 if (TARGET_64BIT)
5483 pic = NULL;
5484 type = UNSPEC_GOTNTPOFF;
5486 else if (flag_pic)
5488 if (reload_in_progress)
5489 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5490 pic = pic_offset_table_rtx;
5491 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5493 else if (!TARGET_GNU_TLS)
5495 pic = gen_reg_rtx (Pmode);
5496 emit_insn (gen_set_got (pic));
5497 type = UNSPEC_GOTTPOFF;
5499 else
5501 pic = NULL;
5502 type = UNSPEC_INDNTPOFF;
5505 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5506 off = gen_rtx_CONST (Pmode, off);
5507 if (pic)
5508 off = gen_rtx_PLUS (Pmode, pic, off);
5509 off = gen_const_mem (Pmode, off);
5510 set_mem_alias_set (off, ix86_GOT_alias_set ());
5512 if (TARGET_64BIT || TARGET_GNU_TLS)
5514 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5515 off = force_reg (Pmode, off);
5516 return gen_rtx_PLUS (Pmode, base, off);
5518 else
5520 base = get_thread_pointer (true);
5521 dest = gen_reg_rtx (Pmode);
5522 emit_insn (gen_subsi3 (dest, base, off));
5524 break;
5526 case TLS_MODEL_LOCAL_EXEC:
5527 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5528 (TARGET_64BIT || TARGET_GNU_TLS)
5529 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5530 off = gen_rtx_CONST (Pmode, off);
5532 if (TARGET_64BIT || TARGET_GNU_TLS)
5534 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5535 return gen_rtx_PLUS (Pmode, base, off);
5537 else
5539 base = get_thread_pointer (true);
5540 dest = gen_reg_rtx (Pmode);
5541 emit_insn (gen_subsi3 (dest, base, off));
5543 break;
5545 default:
5546 abort ();
5549 return dest;
5552 /* Try machine-dependent ways of modifying an illegitimate address
5553 to be legitimate. If we find one, return the new, valid address.
5554 This macro is used in only one place: `memory_address' in explow.c.
5556 OLDX is the address as it was before break_out_memory_refs was called.
5557 In some cases it is useful to look at this to decide what needs to be done.
5559 MODE and WIN are passed so that this macro can use
5560 GO_IF_LEGITIMATE_ADDRESS.
5562 It is always safe for this macro to do nothing. It exists to recognize
5563 opportunities to optimize the output.
5565 For the 80386, we handle X+REG by loading X into a register R and
5566 using R+REG. R will go in a general reg and indexing will be used.
5567 However, if REG is a broken-out memory address or multiplication,
5568 nothing needs to be done because REG can certainly go in a general reg.
5570 When -fpic is used, special handling is needed for symbolic references.
5571 See comments by legitimize_pic_address in i386.c for details. */
5574 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5576 int changed = 0;
5577 unsigned log;
5579 if (TARGET_DEBUG_ADDR)
5581 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5582 GET_MODE_NAME (mode));
5583 debug_rtx (x);
5586 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5587 if (log)
5588 return legitimize_tls_address (x, log, false);
5589 if (GET_CODE (x) == CONST
5590 && GET_CODE (XEXP (x, 0)) == PLUS
5591 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5592 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5594 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5595 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5598 if (flag_pic && SYMBOLIC_CONST (x))
5599 return legitimize_pic_address (x, 0);
5601 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5602 if (GET_CODE (x) == ASHIFT
5603 && GET_CODE (XEXP (x, 1)) == CONST_INT
5604 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5606 changed = 1;
5607 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5608 GEN_INT (1 << log));
5611 if (GET_CODE (x) == PLUS)
5613 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5615 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5616 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5617 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5619 changed = 1;
5620 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5621 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5622 GEN_INT (1 << log));
5625 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5626 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5627 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5629 changed = 1;
5630 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5631 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5632 GEN_INT (1 << log));
5635 /* Put multiply first if it isn't already. */
5636 if (GET_CODE (XEXP (x, 1)) == MULT)
5638 rtx tmp = XEXP (x, 0);
5639 XEXP (x, 0) = XEXP (x, 1);
5640 XEXP (x, 1) = tmp;
5641 changed = 1;
5644 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5645 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5646 created by virtual register instantiation, register elimination, and
5647 similar optimizations. */
5648 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5650 changed = 1;
5651 x = gen_rtx_PLUS (Pmode,
5652 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5653 XEXP (XEXP (x, 1), 0)),
5654 XEXP (XEXP (x, 1), 1));
5657 /* Canonicalize
5658 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5659 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5660 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5661 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5662 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5663 && CONSTANT_P (XEXP (x, 1)))
5665 rtx constant;
5666 rtx other = NULL_RTX;
5668 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5670 constant = XEXP (x, 1);
5671 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5673 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5675 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5676 other = XEXP (x, 1);
5678 else
5679 constant = 0;
5681 if (constant)
5683 changed = 1;
5684 x = gen_rtx_PLUS (Pmode,
5685 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5686 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5687 plus_constant (other, INTVAL (constant)));
5691 if (changed && legitimate_address_p (mode, x, FALSE))
5692 return x;
5694 if (GET_CODE (XEXP (x, 0)) == MULT)
5696 changed = 1;
5697 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5700 if (GET_CODE (XEXP (x, 1)) == MULT)
5702 changed = 1;
5703 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5706 if (changed
5707 && GET_CODE (XEXP (x, 1)) == REG
5708 && GET_CODE (XEXP (x, 0)) == REG)
5709 return x;
5711 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5713 changed = 1;
5714 x = legitimize_pic_address (x, 0);
5717 if (changed && legitimate_address_p (mode, x, FALSE))
5718 return x;
5720 if (GET_CODE (XEXP (x, 0)) == REG)
5722 rtx temp = gen_reg_rtx (Pmode);
5723 rtx val = force_operand (XEXP (x, 1), temp);
5724 if (val != temp)
5725 emit_move_insn (temp, val);
5727 XEXP (x, 1) = temp;
5728 return x;
5731 else if (GET_CODE (XEXP (x, 1)) == REG)
5733 rtx temp = gen_reg_rtx (Pmode);
5734 rtx val = force_operand (XEXP (x, 0), temp);
5735 if (val != temp)
5736 emit_move_insn (temp, val);
5738 XEXP (x, 0) = temp;
5739 return x;
5743 return x;
5746 /* Print an integer constant expression in assembler syntax. Addition
5747 and subtraction are the only arithmetic that may appear in these
5748 expressions. FILE is the stdio stream to write to, X is the rtx, and
5749 CODE is the operand print code from the output string. */
5751 static void
5752 output_pic_addr_const (FILE *file, rtx x, int code)
5754 char buf[256];
5756 switch (GET_CODE (x))
5758 case PC:
5759 if (flag_pic)
5760 putc ('.', file);
5761 else
5762 abort ();
5763 break;
5765 case SYMBOL_REF:
5766 /* Mark the decl as referenced so that cgraph will output the function. */
5767 if (SYMBOL_REF_DECL (x))
5768 mark_decl_referenced (SYMBOL_REF_DECL (x));
5770 assemble_name (file, XSTR (x, 0));
5771 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5772 fputs ("@PLT", file);
5773 break;
5775 case LABEL_REF:
5776 x = XEXP (x, 0);
5777 /* FALLTHRU */
5778 case CODE_LABEL:
5779 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5780 assemble_name (asm_out_file, buf);
5781 break;
5783 case CONST_INT:
5784 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5785 break;
5787 case CONST:
5788 /* This used to output parentheses around the expression,
5789 but that does not work on the 386 (either ATT or BSD assembler). */
5790 output_pic_addr_const (file, XEXP (x, 0), code);
5791 break;
5793 case CONST_DOUBLE:
5794 if (GET_MODE (x) == VOIDmode)
5796 /* We can use %d if the number is <32 bits and positive. */
5797 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5798 fprintf (file, "0x%lx%08lx",
5799 (unsigned long) CONST_DOUBLE_HIGH (x),
5800 (unsigned long) CONST_DOUBLE_LOW (x));
5801 else
5802 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5804 else
5805 /* We can't handle floating point constants;
5806 PRINT_OPERAND must handle them. */
5807 output_operand_lossage ("floating constant misused");
5808 break;
5810 case PLUS:
5811 /* Some assemblers need integer constants to appear first. */
5812 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5814 output_pic_addr_const (file, XEXP (x, 0), code);
5815 putc ('+', file);
5816 output_pic_addr_const (file, XEXP (x, 1), code);
5818 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5820 output_pic_addr_const (file, XEXP (x, 1), code);
5821 putc ('+', file);
5822 output_pic_addr_const (file, XEXP (x, 0), code);
5824 else
5825 abort ();
5826 break;
5828 case MINUS:
5829 if (!TARGET_MACHO)
5830 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5831 output_pic_addr_const (file, XEXP (x, 0), code);
5832 putc ('-', file);
5833 output_pic_addr_const (file, XEXP (x, 1), code);
5834 if (!TARGET_MACHO)
5835 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5836 break;
5838 case UNSPEC:
5839 if (XVECLEN (x, 0) != 1)
5840 abort ();
5841 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5842 switch (XINT (x, 1))
5844 case UNSPEC_GOT:
5845 fputs ("@GOT", file);
5846 break;
5847 case UNSPEC_GOTOFF:
5848 fputs ("@GOTOFF", file);
5849 break;
5850 case UNSPEC_GOTPCREL:
5851 fputs ("@GOTPCREL(%rip)", file);
5852 break;
5853 case UNSPEC_GOTTPOFF:
5854 /* FIXME: This might be @TPOFF in Sun ld too. */
5855 fputs ("@GOTTPOFF", file);
5856 break;
5857 case UNSPEC_TPOFF:
5858 fputs ("@TPOFF", file);
5859 break;
5860 case UNSPEC_NTPOFF:
5861 if (TARGET_64BIT)
5862 fputs ("@TPOFF", file);
5863 else
5864 fputs ("@NTPOFF", file);
5865 break;
5866 case UNSPEC_DTPOFF:
5867 fputs ("@DTPOFF", file);
5868 break;
5869 case UNSPEC_GOTNTPOFF:
5870 if (TARGET_64BIT)
5871 fputs ("@GOTTPOFF(%rip)", file);
5872 else
5873 fputs ("@GOTNTPOFF", file);
5874 break;
5875 case UNSPEC_INDNTPOFF:
5876 fputs ("@INDNTPOFF", file);
5877 break;
5878 default:
5879 output_operand_lossage ("invalid UNSPEC as operand");
5880 break;
5882 break;
5884 default:
5885 output_operand_lossage ("invalid expression as operand");
5889 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5890 We need to handle our special PIC relocations. */
5892 void
5893 i386_dwarf_output_addr_const (FILE *file, rtx x)
5895 #ifdef ASM_QUAD
5896 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5897 #else
5898 if (TARGET_64BIT)
5899 abort ();
5900 fprintf (file, "%s", ASM_LONG);
5901 #endif
5902 if (flag_pic)
5903 output_pic_addr_const (file, x, '\0');
5904 else
5905 output_addr_const (file, x);
5906 fputc ('\n', file);
5909 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5910 We need to emit DTP-relative relocations. */
5912 void
5913 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5915 fputs (ASM_LONG, file);
5916 output_addr_const (file, x);
5917 fputs ("@DTPOFF", file);
5918 switch (size)
5920 case 4:
5921 break;
5922 case 8:
5923 fputs (", 0", file);
5924 break;
5925 default:
5926 abort ();
5930 /* In the name of slightly smaller debug output, and to cater to
5931 general assembler losage, recognize PIC+GOTOFF and turn it back
5932 into a direct symbol reference. */
5934 static rtx
5935 ix86_delegitimize_address (rtx orig_x)
5937 rtx x = orig_x, y;
5939 if (GET_CODE (x) == MEM)
5940 x = XEXP (x, 0);
5942 if (TARGET_64BIT)
5944 if (GET_CODE (x) != CONST
5945 || GET_CODE (XEXP (x, 0)) != UNSPEC
5946 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5947 || GET_CODE (orig_x) != MEM)
5948 return orig_x;
5949 return XVECEXP (XEXP (x, 0), 0, 0);
5952 if (GET_CODE (x) != PLUS
5953 || GET_CODE (XEXP (x, 1)) != CONST)
5954 return orig_x;
5956 if (GET_CODE (XEXP (x, 0)) == REG
5957 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5958 /* %ebx + GOT/GOTOFF */
5959 y = NULL;
5960 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5962 /* %ebx + %reg * scale + GOT/GOTOFF */
5963 y = XEXP (x, 0);
5964 if (GET_CODE (XEXP (y, 0)) == REG
5965 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5966 y = XEXP (y, 1);
5967 else if (GET_CODE (XEXP (y, 1)) == REG
5968 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5969 y = XEXP (y, 0);
5970 else
5971 return orig_x;
5972 if (GET_CODE (y) != REG
5973 && GET_CODE (y) != MULT
5974 && GET_CODE (y) != ASHIFT)
5975 return orig_x;
5977 else
5978 return orig_x;
5980 x = XEXP (XEXP (x, 1), 0);
5981 if (GET_CODE (x) == UNSPEC
5982 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5983 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5985 if (y)
5986 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5987 return XVECEXP (x, 0, 0);
5990 if (GET_CODE (x) == PLUS
5991 && GET_CODE (XEXP (x, 0)) == UNSPEC
5992 && GET_CODE (XEXP (x, 1)) == CONST_INT
5993 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5994 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5995 && GET_CODE (orig_x) != MEM)))
5997 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5998 if (y)
5999 return gen_rtx_PLUS (Pmode, y, x);
6000 return x;
6003 return orig_x;
6006 static void
6007 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6008 int fp, FILE *file)
6010 const char *suffix;
6012 if (mode == CCFPmode || mode == CCFPUmode)
6014 enum rtx_code second_code, bypass_code;
6015 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6016 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6017 abort ();
6018 code = ix86_fp_compare_code_to_integer (code);
6019 mode = CCmode;
6021 if (reverse)
6022 code = reverse_condition (code);
6024 switch (code)
6026 case EQ:
6027 suffix = "e";
6028 break;
6029 case NE:
6030 suffix = "ne";
6031 break;
6032 case GT:
6033 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6034 abort ();
6035 suffix = "g";
6036 break;
6037 case GTU:
6038 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6039 Those same assemblers have the same but opposite losage on cmov. */
6040 if (mode != CCmode)
6041 abort ();
6042 suffix = fp ? "nbe" : "a";
6043 break;
6044 case LT:
6045 if (mode == CCNOmode || mode == CCGOCmode)
6046 suffix = "s";
6047 else if (mode == CCmode || mode == CCGCmode)
6048 suffix = "l";
6049 else
6050 abort ();
6051 break;
6052 case LTU:
6053 if (mode != CCmode)
6054 abort ();
6055 suffix = "b";
6056 break;
6057 case GE:
6058 if (mode == CCNOmode || mode == CCGOCmode)
6059 suffix = "ns";
6060 else if (mode == CCmode || mode == CCGCmode)
6061 suffix = "ge";
6062 else
6063 abort ();
6064 break;
6065 case GEU:
6066 /* ??? As above. */
6067 if (mode != CCmode)
6068 abort ();
6069 suffix = fp ? "nb" : "ae";
6070 break;
6071 case LE:
6072 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6073 abort ();
6074 suffix = "le";
6075 break;
6076 case LEU:
6077 if (mode != CCmode)
6078 abort ();
6079 suffix = "be";
6080 break;
6081 case UNORDERED:
6082 suffix = fp ? "u" : "p";
6083 break;
6084 case ORDERED:
6085 suffix = fp ? "nu" : "np";
6086 break;
6087 default:
6088 abort ();
6090 fputs (suffix, file);
6093 /* Print the name of register X to FILE based on its machine mode and number.
6094 If CODE is 'w', pretend the mode is HImode.
6095 If CODE is 'b', pretend the mode is QImode.
6096 If CODE is 'k', pretend the mode is SImode.
6097 If CODE is 'q', pretend the mode is DImode.
6098 If CODE is 'h', pretend the reg is the `high' byte register.
6099 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6101 void
6102 print_reg (rtx x, int code, FILE *file)
6104 if (REGNO (x) == ARG_POINTER_REGNUM
6105 || REGNO (x) == FRAME_POINTER_REGNUM
6106 || REGNO (x) == FLAGS_REG
6107 || REGNO (x) == FPSR_REG)
6108 abort ();
6110 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6111 putc ('%', file);
6113 if (code == 'w' || MMX_REG_P (x))
6114 code = 2;
6115 else if (code == 'b')
6116 code = 1;
6117 else if (code == 'k')
6118 code = 4;
6119 else if (code == 'q')
6120 code = 8;
6121 else if (code == 'y')
6122 code = 3;
6123 else if (code == 'h')
6124 code = 0;
6125 else
6126 code = GET_MODE_SIZE (GET_MODE (x));
6128 /* Irritatingly, AMD extended registers use different naming convention
6129 from the normal registers. */
6130 if (REX_INT_REG_P (x))
6132 if (!TARGET_64BIT)
6133 abort ();
6134 switch (code)
6136 case 0:
6137 error ("extended registers have no high halves");
6138 break;
6139 case 1:
6140 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6141 break;
6142 case 2:
6143 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6144 break;
6145 case 4:
6146 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6147 break;
6148 case 8:
6149 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6150 break;
6151 default:
6152 error ("unsupported operand size for extended register");
6153 break;
6155 return;
6157 switch (code)
6159 case 3:
6160 if (STACK_TOP_P (x))
6162 fputs ("st(0)", file);
6163 break;
6165 /* FALLTHRU */
6166 case 8:
6167 case 4:
6168 case 12:
6169 if (! ANY_FP_REG_P (x))
6170 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6171 /* FALLTHRU */
6172 case 16:
6173 case 2:
6174 normal:
6175 fputs (hi_reg_name[REGNO (x)], file);
6176 break;
6177 case 1:
6178 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6179 goto normal;
6180 fputs (qi_reg_name[REGNO (x)], file);
6181 break;
6182 case 0:
6183 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6184 goto normal;
6185 fputs (qi_high_reg_name[REGNO (x)], file);
6186 break;
6187 default:
6188 abort ();
6192 /* Locate some local-dynamic symbol still in use by this function
6193 so that we can print its name in some tls_local_dynamic_base
6194 pattern. */
6196 static const char *
6197 get_some_local_dynamic_name (void)
6199 rtx insn;
6201 if (cfun->machine->some_ld_name)
6202 return cfun->machine->some_ld_name;
6204 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6205 if (INSN_P (insn)
6206 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6207 return cfun->machine->some_ld_name;
6209 abort ();
6212 static int
6213 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6215 rtx x = *px;
6217 if (GET_CODE (x) == SYMBOL_REF
6218 && local_dynamic_symbolic_operand (x, Pmode))
6220 cfun->machine->some_ld_name = XSTR (x, 0);
6221 return 1;
6224 return 0;
6227 /* Meaning of CODE:
6228 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6229 C -- print opcode suffix for set/cmov insn.
6230 c -- like C, but print reversed condition
6231 F,f -- likewise, but for floating-point.
6232 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6233 otherwise nothing
6234 R -- print the prefix for register names.
6235 z -- print the opcode suffix for the size of the current operand.
6236 * -- print a star (in certain assembler syntax)
6237 A -- print an absolute memory reference.
6238 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6239 s -- print a shift double count, followed by the assemblers argument
6240 delimiter.
6241 b -- print the QImode name of the register for the indicated operand.
6242 %b0 would print %al if operands[0] is reg 0.
6243 w -- likewise, print the HImode name of the register.
6244 k -- likewise, print the SImode name of the register.
6245 q -- likewise, print the DImode name of the register.
6246 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6247 y -- print "st(0)" instead of "st" as a register.
6248 D -- print condition for SSE cmp instruction.
6249 P -- if PIC, print an @PLT suffix.
6250 X -- don't print any sort of PIC '@' suffix for a symbol.
6251 & -- print some in-use local-dynamic symbol name.
6254 void
6255 print_operand (FILE *file, rtx x, int code)
6257 if (code)
6259 switch (code)
6261 case '*':
6262 if (ASSEMBLER_DIALECT == ASM_ATT)
6263 putc ('*', file);
6264 return;
6266 case '&':
6267 assemble_name (file, get_some_local_dynamic_name ());
6268 return;
6270 case 'A':
6271 if (ASSEMBLER_DIALECT == ASM_ATT)
6272 putc ('*', file);
6273 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6275 /* Intel syntax. For absolute addresses, registers should not
6276 be surrounded by braces. */
6277 if (GET_CODE (x) != REG)
6279 putc ('[', file);
6280 PRINT_OPERAND (file, x, 0);
6281 putc (']', file);
6282 return;
6285 else
6286 abort ();
6288 PRINT_OPERAND (file, x, 0);
6289 return;
6292 case 'L':
6293 if (ASSEMBLER_DIALECT == ASM_ATT)
6294 putc ('l', file);
6295 return;
6297 case 'W':
6298 if (ASSEMBLER_DIALECT == ASM_ATT)
6299 putc ('w', file);
6300 return;
6302 case 'B':
6303 if (ASSEMBLER_DIALECT == ASM_ATT)
6304 putc ('b', file);
6305 return;
6307 case 'Q':
6308 if (ASSEMBLER_DIALECT == ASM_ATT)
6309 putc ('l', file);
6310 return;
6312 case 'S':
6313 if (ASSEMBLER_DIALECT == ASM_ATT)
6314 putc ('s', file);
6315 return;
6317 case 'T':
6318 if (ASSEMBLER_DIALECT == ASM_ATT)
6319 putc ('t', file);
6320 return;
6322 case 'z':
6323 /* 387 opcodes don't get size suffixes if the operands are
6324 registers. */
6325 if (STACK_REG_P (x))
6326 return;
6328 /* Likewise if using Intel opcodes. */
6329 if (ASSEMBLER_DIALECT == ASM_INTEL)
6330 return;
6332 /* This is the size of op from size of operand. */
6333 switch (GET_MODE_SIZE (GET_MODE (x)))
6335 case 2:
6336 #ifdef HAVE_GAS_FILDS_FISTS
6337 putc ('s', file);
6338 #endif
6339 return;
6341 case 4:
6342 if (GET_MODE (x) == SFmode)
6344 putc ('s', file);
6345 return;
6347 else
6348 putc ('l', file);
6349 return;
6351 case 12:
6352 case 16:
6353 putc ('t', file);
6354 return;
6356 case 8:
6357 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6359 #ifdef GAS_MNEMONICS
6360 putc ('q', file);
6361 #else
6362 putc ('l', file);
6363 putc ('l', file);
6364 #endif
6366 else
6367 putc ('l', file);
6368 return;
6370 default:
6371 abort ();
6374 case 'b':
6375 case 'w':
6376 case 'k':
6377 case 'q':
6378 case 'h':
6379 case 'y':
6380 case 'X':
6381 case 'P':
6382 break;
6384 case 's':
6385 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6387 PRINT_OPERAND (file, x, 0);
6388 putc (',', file);
6390 return;
6392 case 'D':
6393 /* Little bit of braindamage here. The SSE compare instructions
6394 does use completely different names for the comparisons that the
6395 fp conditional moves. */
6396 switch (GET_CODE (x))
6398 case EQ:
6399 case UNEQ:
6400 fputs ("eq", file);
6401 break;
6402 case LT:
6403 case UNLT:
6404 fputs ("lt", file);
6405 break;
6406 case LE:
6407 case UNLE:
6408 fputs ("le", file);
6409 break;
6410 case UNORDERED:
6411 fputs ("unord", file);
6412 break;
6413 case NE:
6414 case LTGT:
6415 fputs ("neq", file);
6416 break;
6417 case UNGE:
6418 case GE:
6419 fputs ("nlt", file);
6420 break;
6421 case UNGT:
6422 case GT:
6423 fputs ("nle", file);
6424 break;
6425 case ORDERED:
6426 fputs ("ord", file);
6427 break;
6428 default:
6429 abort ();
6430 break;
6432 return;
6433 case 'O':
6434 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6435 if (ASSEMBLER_DIALECT == ASM_ATT)
6437 switch (GET_MODE (x))
6439 case HImode: putc ('w', file); break;
6440 case SImode:
6441 case SFmode: putc ('l', file); break;
6442 case DImode:
6443 case DFmode: putc ('q', file); break;
6444 default: abort ();
6446 putc ('.', file);
6448 #endif
6449 return;
6450 case 'C':
6451 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6452 return;
6453 case 'F':
6454 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6455 if (ASSEMBLER_DIALECT == ASM_ATT)
6456 putc ('.', file);
6457 #endif
6458 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6459 return;
6461 /* Like above, but reverse condition */
6462 case 'c':
6463 /* Check to see if argument to %c is really a constant
6464 and not a condition code which needs to be reversed. */
6465 if (!COMPARISON_P (x))
6467 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6468 return;
6470 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6471 return;
6472 case 'f':
6473 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6474 if (ASSEMBLER_DIALECT == ASM_ATT)
6475 putc ('.', file);
6476 #endif
6477 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6478 return;
6479 case '+':
6481 rtx x;
6483 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6484 return;
6486 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6487 if (x)
6489 int pred_val = INTVAL (XEXP (x, 0));
6491 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6492 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6494 int taken = pred_val > REG_BR_PROB_BASE / 2;
6495 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6497 /* Emit hints only in the case default branch prediction
6498 heuristics would fail. */
6499 if (taken != cputaken)
6501 /* We use 3e (DS) prefix for taken branches and
6502 2e (CS) prefix for not taken branches. */
6503 if (taken)
6504 fputs ("ds ; ", file);
6505 else
6506 fputs ("cs ; ", file);
6510 return;
6512 default:
6513 output_operand_lossage ("invalid operand code `%c'", code);
6517 if (GET_CODE (x) == REG)
6518 print_reg (x, code, file);
6520 else if (GET_CODE (x) == MEM)
6522 /* No `byte ptr' prefix for call instructions. */
6523 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6525 const char * size;
6526 switch (GET_MODE_SIZE (GET_MODE (x)))
6528 case 1: size = "BYTE"; break;
6529 case 2: size = "WORD"; break;
6530 case 4: size = "DWORD"; break;
6531 case 8: size = "QWORD"; break;
6532 case 12: size = "XWORD"; break;
6533 case 16: size = "XMMWORD"; break;
6534 default:
6535 abort ();
6538 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6539 if (code == 'b')
6540 size = "BYTE";
6541 else if (code == 'w')
6542 size = "WORD";
6543 else if (code == 'k')
6544 size = "DWORD";
6546 fputs (size, file);
6547 fputs (" PTR ", file);
6550 x = XEXP (x, 0);
6551 /* Avoid (%rip) for call operands. */
6552 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6553 && GET_CODE (x) != CONST_INT)
6554 output_addr_const (file, x);
6555 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6556 output_operand_lossage ("invalid constraints for operand");
6557 else
6558 output_address (x);
6561 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6563 REAL_VALUE_TYPE r;
6564 long l;
6566 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6567 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6569 if (ASSEMBLER_DIALECT == ASM_ATT)
6570 putc ('$', file);
6571 fprintf (file, "0x%08lx", l);
6574 /* These float cases don't actually occur as immediate operands. */
6575 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6577 char dstr[30];
6579 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6580 fprintf (file, "%s", dstr);
6583 else if (GET_CODE (x) == CONST_DOUBLE
6584 && GET_MODE (x) == XFmode)
6586 char dstr[30];
6588 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6589 fprintf (file, "%s", dstr);
6592 else
6594 if (code != 'P')
6596 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6598 if (ASSEMBLER_DIALECT == ASM_ATT)
6599 putc ('$', file);
6601 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6602 || GET_CODE (x) == LABEL_REF)
6604 if (ASSEMBLER_DIALECT == ASM_ATT)
6605 putc ('$', file);
6606 else
6607 fputs ("OFFSET FLAT:", file);
6610 if (GET_CODE (x) == CONST_INT)
6611 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6612 else if (flag_pic)
6613 output_pic_addr_const (file, x, code);
6614 else
6615 output_addr_const (file, x);
6619 /* Print a memory operand whose address is ADDR. */
6621 void
6622 print_operand_address (FILE *file, rtx addr)
6624 struct ix86_address parts;
6625 rtx base, index, disp;
6626 int scale;
6628 if (! ix86_decompose_address (addr, &parts))
6629 abort ();
6631 base = parts.base;
6632 index = parts.index;
6633 disp = parts.disp;
6634 scale = parts.scale;
6636 switch (parts.seg)
6638 case SEG_DEFAULT:
6639 break;
6640 case SEG_FS:
6641 case SEG_GS:
6642 if (USER_LABEL_PREFIX[0] == 0)
6643 putc ('%', file);
6644 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6645 break;
6646 default:
6647 abort ();
6650 if (!base && !index)
6652 /* Displacement only requires special attention. */
6654 if (GET_CODE (disp) == CONST_INT)
6656 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6658 if (USER_LABEL_PREFIX[0] == 0)
6659 putc ('%', file);
6660 fputs ("ds:", file);
6662 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6664 else if (flag_pic)
6665 output_pic_addr_const (file, disp, 0);
6666 else
6667 output_addr_const (file, disp);
6669 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6670 if (TARGET_64BIT
6671 && ((GET_CODE (disp) == SYMBOL_REF
6672 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6673 || GET_CODE (disp) == LABEL_REF
6674 || (GET_CODE (disp) == CONST
6675 && GET_CODE (XEXP (disp, 0)) == PLUS
6676 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6677 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6678 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6679 fputs ("(%rip)", file);
6681 else
6683 if (ASSEMBLER_DIALECT == ASM_ATT)
6685 if (disp)
6687 if (flag_pic)
6688 output_pic_addr_const (file, disp, 0);
6689 else if (GET_CODE (disp) == LABEL_REF)
6690 output_asm_label (disp);
6691 else
6692 output_addr_const (file, disp);
6695 putc ('(', file);
6696 if (base)
6697 print_reg (base, 0, file);
6698 if (index)
6700 putc (',', file);
6701 print_reg (index, 0, file);
6702 if (scale != 1)
6703 fprintf (file, ",%d", scale);
6705 putc (')', file);
6707 else
6709 rtx offset = NULL_RTX;
6711 if (disp)
6713 /* Pull out the offset of a symbol; print any symbol itself. */
6714 if (GET_CODE (disp) == CONST
6715 && GET_CODE (XEXP (disp, 0)) == PLUS
6716 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6718 offset = XEXP (XEXP (disp, 0), 1);
6719 disp = gen_rtx_CONST (VOIDmode,
6720 XEXP (XEXP (disp, 0), 0));
6723 if (flag_pic)
6724 output_pic_addr_const (file, disp, 0);
6725 else if (GET_CODE (disp) == LABEL_REF)
6726 output_asm_label (disp);
6727 else if (GET_CODE (disp) == CONST_INT)
6728 offset = disp;
6729 else
6730 output_addr_const (file, disp);
6733 putc ('[', file);
6734 if (base)
6736 print_reg (base, 0, file);
6737 if (offset)
6739 if (INTVAL (offset) >= 0)
6740 putc ('+', file);
6741 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6744 else if (offset)
6745 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6746 else
6747 putc ('0', file);
6749 if (index)
6751 putc ('+', file);
6752 print_reg (index, 0, file);
6753 if (scale != 1)
6754 fprintf (file, "*%d", scale);
6756 putc (']', file);
6761 bool
6762 output_addr_const_extra (FILE *file, rtx x)
6764 rtx op;
6766 if (GET_CODE (x) != UNSPEC)
6767 return false;
6769 op = XVECEXP (x, 0, 0);
6770 switch (XINT (x, 1))
6772 case UNSPEC_GOTTPOFF:
6773 output_addr_const (file, op);
6774 /* FIXME: This might be @TPOFF in Sun ld. */
6775 fputs ("@GOTTPOFF", file);
6776 break;
6777 case UNSPEC_TPOFF:
6778 output_addr_const (file, op);
6779 fputs ("@TPOFF", file);
6780 break;
6781 case UNSPEC_NTPOFF:
6782 output_addr_const (file, op);
6783 if (TARGET_64BIT)
6784 fputs ("@TPOFF", file);
6785 else
6786 fputs ("@NTPOFF", file);
6787 break;
6788 case UNSPEC_DTPOFF:
6789 output_addr_const (file, op);
6790 fputs ("@DTPOFF", file);
6791 break;
6792 case UNSPEC_GOTNTPOFF:
6793 output_addr_const (file, op);
6794 if (TARGET_64BIT)
6795 fputs ("@GOTTPOFF(%rip)", file);
6796 else
6797 fputs ("@GOTNTPOFF", file);
6798 break;
6799 case UNSPEC_INDNTPOFF:
6800 output_addr_const (file, op);
6801 fputs ("@INDNTPOFF", file);
6802 break;
6804 default:
6805 return false;
6808 return true;
6811 /* Split one or more DImode RTL references into pairs of SImode
6812 references. The RTL can be REG, offsettable MEM, integer constant, or
6813 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6814 split and "num" is its length. lo_half and hi_half are output arrays
6815 that parallel "operands". */
6817 void
6818 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6820 while (num--)
6822 rtx op = operands[num];
6824 /* simplify_subreg refuse to split volatile memory addresses,
6825 but we still have to handle it. */
6826 if (GET_CODE (op) == MEM)
6828 lo_half[num] = adjust_address (op, SImode, 0);
6829 hi_half[num] = adjust_address (op, SImode, 4);
6831 else
6833 lo_half[num] = simplify_gen_subreg (SImode, op,
6834 GET_MODE (op) == VOIDmode
6835 ? DImode : GET_MODE (op), 0);
6836 hi_half[num] = simplify_gen_subreg (SImode, op,
6837 GET_MODE (op) == VOIDmode
6838 ? DImode : GET_MODE (op), 4);
6842 /* Split one or more TImode RTL references into pairs of SImode
6843 references. The RTL can be REG, offsettable MEM, integer constant, or
6844 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6845 split and "num" is its length. lo_half and hi_half are output arrays
6846 that parallel "operands". */
6848 void
6849 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6851 while (num--)
6853 rtx op = operands[num];
6855 /* simplify_subreg refuse to split volatile memory addresses, but we
6856 still have to handle it. */
6857 if (GET_CODE (op) == MEM)
6859 lo_half[num] = adjust_address (op, DImode, 0);
6860 hi_half[num] = adjust_address (op, DImode, 8);
6862 else
6864 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6865 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6870 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6871 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6872 is the expression of the binary operation. The output may either be
6873 emitted here, or returned to the caller, like all output_* functions.
6875 There is no guarantee that the operands are the same mode, as they
6876 might be within FLOAT or FLOAT_EXTEND expressions. */
6878 #ifndef SYSV386_COMPAT
6879 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6880 wants to fix the assemblers because that causes incompatibility
6881 with gcc. No-one wants to fix gcc because that causes
6882 incompatibility with assemblers... You can use the option of
6883 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6884 #define SYSV386_COMPAT 1
6885 #endif
6887 const char *
6888 output_387_binary_op (rtx insn, rtx *operands)
6890 static char buf[30];
6891 const char *p;
6892 const char *ssep;
6893 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6895 #ifdef ENABLE_CHECKING
6896 /* Even if we do not want to check the inputs, this documents input
6897 constraints. Which helps in understanding the following code. */
6898 if (STACK_REG_P (operands[0])
6899 && ((REG_P (operands[1])
6900 && REGNO (operands[0]) == REGNO (operands[1])
6901 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6902 || (REG_P (operands[2])
6903 && REGNO (operands[0]) == REGNO (operands[2])
6904 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6905 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6906 ; /* ok */
6907 else if (!is_sse)
6908 abort ();
6909 #endif
6911 switch (GET_CODE (operands[3]))
6913 case PLUS:
6914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6916 p = "fiadd";
6917 else
6918 p = "fadd";
6919 ssep = "add";
6920 break;
6922 case MINUS:
6923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6925 p = "fisub";
6926 else
6927 p = "fsub";
6928 ssep = "sub";
6929 break;
6931 case MULT:
6932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6934 p = "fimul";
6935 else
6936 p = "fmul";
6937 ssep = "mul";
6938 break;
6940 case DIV:
6941 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6942 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6943 p = "fidiv";
6944 else
6945 p = "fdiv";
6946 ssep = "div";
6947 break;
6949 default:
6950 abort ();
6953 if (is_sse)
6955 strcpy (buf, ssep);
6956 if (GET_MODE (operands[0]) == SFmode)
6957 strcat (buf, "ss\t{%2, %0|%0, %2}");
6958 else
6959 strcat (buf, "sd\t{%2, %0|%0, %2}");
6960 return buf;
6962 strcpy (buf, p);
6964 switch (GET_CODE (operands[3]))
6966 case MULT:
6967 case PLUS:
6968 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6970 rtx temp = operands[2];
6971 operands[2] = operands[1];
6972 operands[1] = temp;
6975 /* know operands[0] == operands[1]. */
6977 if (GET_CODE (operands[2]) == MEM)
6979 p = "%z2\t%2";
6980 break;
6983 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6985 if (STACK_TOP_P (operands[0]))
6986 /* How is it that we are storing to a dead operand[2]?
6987 Well, presumably operands[1] is dead too. We can't
6988 store the result to st(0) as st(0) gets popped on this
6989 instruction. Instead store to operands[2] (which I
6990 think has to be st(1)). st(1) will be popped later.
6991 gcc <= 2.8.1 didn't have this check and generated
6992 assembly code that the Unixware assembler rejected. */
6993 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6994 else
6995 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6996 break;
6999 if (STACK_TOP_P (operands[0]))
7000 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7001 else
7002 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7003 break;
7005 case MINUS:
7006 case DIV:
7007 if (GET_CODE (operands[1]) == MEM)
7009 p = "r%z1\t%1";
7010 break;
7013 if (GET_CODE (operands[2]) == MEM)
7015 p = "%z2\t%2";
7016 break;
7019 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7021 #if SYSV386_COMPAT
7022 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7023 derived assemblers, confusingly reverse the direction of
7024 the operation for fsub{r} and fdiv{r} when the
7025 destination register is not st(0). The Intel assembler
7026 doesn't have this brain damage. Read !SYSV386_COMPAT to
7027 figure out what the hardware really does. */
7028 if (STACK_TOP_P (operands[0]))
7029 p = "{p\t%0, %2|rp\t%2, %0}";
7030 else
7031 p = "{rp\t%2, %0|p\t%0, %2}";
7032 #else
7033 if (STACK_TOP_P (operands[0]))
7034 /* As above for fmul/fadd, we can't store to st(0). */
7035 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7036 else
7037 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7038 #endif
7039 break;
7042 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7044 #if SYSV386_COMPAT
7045 if (STACK_TOP_P (operands[0]))
7046 p = "{rp\t%0, %1|p\t%1, %0}";
7047 else
7048 p = "{p\t%1, %0|rp\t%0, %1}";
7049 #else
7050 if (STACK_TOP_P (operands[0]))
7051 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7052 else
7053 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7054 #endif
7055 break;
7058 if (STACK_TOP_P (operands[0]))
7060 if (STACK_TOP_P (operands[1]))
7061 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7062 else
7063 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7064 break;
7066 else if (STACK_TOP_P (operands[1]))
7068 #if SYSV386_COMPAT
7069 p = "{\t%1, %0|r\t%0, %1}";
7070 #else
7071 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7072 #endif
7074 else
7076 #if SYSV386_COMPAT
7077 p = "{r\t%2, %0|\t%0, %2}";
7078 #else
7079 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7080 #endif
7082 break;
7084 default:
7085 abort ();
7088 strcat (buf, p);
7089 return buf;
7092 /* Output code to initialize control word copies used by
7093 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7094 is set to control word rounding downwards. */
7095 void
7096 emit_i387_cw_initialization (rtx normal, rtx round_down)
7098 rtx reg = gen_reg_rtx (HImode);
7100 emit_insn (gen_x86_fnstcw_1 (normal));
7101 emit_move_insn (reg, normal);
7102 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7103 && !TARGET_64BIT)
7104 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7105 else
7106 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7107 emit_move_insn (round_down, reg);
7110 /* Output code for INSN to convert a float to a signed int. OPERANDS
7111 are the insn operands. The output may be [HSD]Imode and the input
7112 operand may be [SDX]Fmode. */
7114 const char *
7115 output_fix_trunc (rtx insn, rtx *operands)
7117 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7118 int dimode_p = GET_MODE (operands[0]) == DImode;
7120 /* Jump through a hoop or two for DImode, since the hardware has no
7121 non-popping instruction. We used to do this a different way, but
7122 that was somewhat fragile and broke with post-reload splitters. */
7123 if (dimode_p && !stack_top_dies)
7124 output_asm_insn ("fld\t%y1", operands);
7126 if (!STACK_TOP_P (operands[1]))
7127 abort ();
7129 if (GET_CODE (operands[0]) != MEM)
7130 abort ();
7132 output_asm_insn ("fldcw\t%3", operands);
7133 if (stack_top_dies || dimode_p)
7134 output_asm_insn ("fistp%z0\t%0", operands);
7135 else
7136 output_asm_insn ("fist%z0\t%0", operands);
7137 output_asm_insn ("fldcw\t%2", operands);
7139 return "";
7142 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7143 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7144 when fucom should be used. */
7146 const char *
7147 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7149 int stack_top_dies;
7150 rtx cmp_op0 = operands[0];
7151 rtx cmp_op1 = operands[1];
7152 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7154 if (eflags_p == 2)
7156 cmp_op0 = cmp_op1;
7157 cmp_op1 = operands[2];
7159 if (is_sse)
7161 if (GET_MODE (operands[0]) == SFmode)
7162 if (unordered_p)
7163 return "ucomiss\t{%1, %0|%0, %1}";
7164 else
7165 return "comiss\t{%1, %0|%0, %1}";
7166 else
7167 if (unordered_p)
7168 return "ucomisd\t{%1, %0|%0, %1}";
7169 else
7170 return "comisd\t{%1, %0|%0, %1}";
7173 if (! STACK_TOP_P (cmp_op0))
7174 abort ();
7176 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7178 if (STACK_REG_P (cmp_op1)
7179 && stack_top_dies
7180 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7181 && REGNO (cmp_op1) != FIRST_STACK_REG)
7183 /* If both the top of the 387 stack dies, and the other operand
7184 is also a stack register that dies, then this must be a
7185 `fcompp' float compare */
7187 if (eflags_p == 1)
7189 /* There is no double popping fcomi variant. Fortunately,
7190 eflags is immune from the fstp's cc clobbering. */
7191 if (unordered_p)
7192 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7193 else
7194 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7195 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7197 else
7199 if (eflags_p == 2)
7201 if (unordered_p)
7202 return "fucompp\n\tfnstsw\t%0";
7203 else
7204 return "fcompp\n\tfnstsw\t%0";
7206 else
7208 if (unordered_p)
7209 return "fucompp";
7210 else
7211 return "fcompp";
7215 else
7217 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7219 static const char * const alt[24] =
7221 "fcom%z1\t%y1",
7222 "fcomp%z1\t%y1",
7223 "fucom%z1\t%y1",
7224 "fucomp%z1\t%y1",
7226 "ficom%z1\t%y1",
7227 "ficomp%z1\t%y1",
7228 NULL,
7229 NULL,
7231 "fcomi\t{%y1, %0|%0, %y1}",
7232 "fcomip\t{%y1, %0|%0, %y1}",
7233 "fucomi\t{%y1, %0|%0, %y1}",
7234 "fucomip\t{%y1, %0|%0, %y1}",
7236 NULL,
7237 NULL,
7238 NULL,
7239 NULL,
7241 "fcom%z2\t%y2\n\tfnstsw\t%0",
7242 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7243 "fucom%z2\t%y2\n\tfnstsw\t%0",
7244 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7246 "ficom%z2\t%y2\n\tfnstsw\t%0",
7247 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7248 NULL,
7249 NULL
7252 int mask;
7253 const char *ret;
7255 mask = eflags_p << 3;
7256 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7257 mask |= unordered_p << 1;
7258 mask |= stack_top_dies;
7260 if (mask >= 24)
7261 abort ();
7262 ret = alt[mask];
7263 if (ret == NULL)
7264 abort ();
7266 return ret;
7270 void
7271 ix86_output_addr_vec_elt (FILE *file, int value)
7273 const char *directive = ASM_LONG;
7275 if (TARGET_64BIT)
7277 #ifdef ASM_QUAD
7278 directive = ASM_QUAD;
7279 #else
7280 abort ();
7281 #endif
7284 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7287 void
7288 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7290 if (TARGET_64BIT)
7291 fprintf (file, "%s%s%d-%s%d\n",
7292 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7293 else if (HAVE_AS_GOTOFF_IN_DATA)
7294 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7295 #if TARGET_MACHO
7296 else if (TARGET_MACHO)
7298 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7299 machopic_output_function_base_name (file);
7300 fprintf(file, "\n");
7302 #endif
7303 else
7304 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7305 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7308 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7309 for the target. */
7311 void
7312 ix86_expand_clear (rtx dest)
7314 rtx tmp;
7316 /* We play register width games, which are only valid after reload. */
7317 if (!reload_completed)
7318 abort ();
7320 /* Avoid HImode and its attendant prefix byte. */
7321 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7322 dest = gen_rtx_REG (SImode, REGNO (dest));
7324 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7326 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7327 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7329 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7330 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7333 emit_insn (tmp);
7336 /* X is an unchanging MEM. If it is a constant pool reference, return
7337 the constant pool rtx, else NULL. */
7340 maybe_get_pool_constant (rtx x)
7342 x = ix86_delegitimize_address (XEXP (x, 0));
7344 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7345 return get_pool_constant (x);
7347 return NULL_RTX;
7350 void
7351 ix86_expand_move (enum machine_mode mode, rtx operands[])
7353 int strict = (reload_in_progress || reload_completed);
7354 rtx op0, op1;
7355 enum tls_model model;
7357 op0 = operands[0];
7358 op1 = operands[1];
7360 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7361 if (model)
7363 op1 = legitimize_tls_address (op1, model, true);
7364 op1 = force_operand (op1, op0);
7365 if (op1 == op0)
7366 return;
7369 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7371 #if TARGET_MACHO
7372 if (MACHOPIC_PURE)
7374 rtx temp = ((reload_in_progress
7375 || ((op0 && GET_CODE (op0) == REG)
7376 && mode == Pmode))
7377 ? op0 : gen_reg_rtx (Pmode));
7378 op1 = machopic_indirect_data_reference (op1, temp);
7379 op1 = machopic_legitimize_pic_address (op1, mode,
7380 temp == op1 ? 0 : temp);
7382 else if (MACHOPIC_INDIRECT)
7383 op1 = machopic_indirect_data_reference (op1, 0);
7384 if (op0 == op1)
7385 return;
7386 #else
7387 if (GET_CODE (op0) == MEM)
7388 op1 = force_reg (Pmode, op1);
7389 else
7390 op1 = legitimize_address (op1, op1, Pmode);
7391 #endif /* TARGET_MACHO */
7393 else
7395 if (GET_CODE (op0) == MEM
7396 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7397 || !push_operand (op0, mode))
7398 && GET_CODE (op1) == MEM)
7399 op1 = force_reg (mode, op1);
7401 if (push_operand (op0, mode)
7402 && ! general_no_elim_operand (op1, mode))
7403 op1 = copy_to_mode_reg (mode, op1);
7405 /* Force large constants in 64bit compilation into register
7406 to get them CSEed. */
7407 if (TARGET_64BIT && mode == DImode
7408 && immediate_operand (op1, mode)
7409 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7410 && !register_operand (op0, mode)
7411 && optimize && !reload_completed && !reload_in_progress)
7412 op1 = copy_to_mode_reg (mode, op1);
7414 if (FLOAT_MODE_P (mode))
7416 /* If we are loading a floating point constant to a register,
7417 force the value to memory now, since we'll get better code
7418 out the back end. */
7420 if (strict)
7422 else if (GET_CODE (op1) == CONST_DOUBLE)
7424 op1 = validize_mem (force_const_mem (mode, op1));
7425 if (!register_operand (op0, mode))
7427 rtx temp = gen_reg_rtx (mode);
7428 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7429 emit_move_insn (op0, temp);
7430 return;
7436 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7439 void
7440 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7442 /* Force constants other than zero into memory. We do not know how
7443 the instructions used to build constants modify the upper 64 bits
7444 of the register, once we have that information we may be able
7445 to handle some of them more efficiently. */
7446 if ((reload_in_progress | reload_completed) == 0
7447 && register_operand (operands[0], mode)
7448 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7449 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7451 /* Make operand1 a register if it isn't already. */
7452 if (!no_new_pseudos
7453 && !register_operand (operands[0], mode)
7454 && !register_operand (operands[1], mode))
7456 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7457 emit_move_insn (operands[0], temp);
7458 return;
7461 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7464 /* Attempt to expand a binary operator. Make the expansion closer to the
7465 actual machine, then just general_operand, which will allow 3 separate
7466 memory references (one output, two input) in a single insn. */
7468 void
7469 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7470 rtx operands[])
7472 int matching_memory;
7473 rtx src1, src2, dst, op, clob;
7475 dst = operands[0];
7476 src1 = operands[1];
7477 src2 = operands[2];
7479 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7480 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7481 && (rtx_equal_p (dst, src2)
7482 || immediate_operand (src1, mode)))
7484 rtx temp = src1;
7485 src1 = src2;
7486 src2 = temp;
7489 /* If the destination is memory, and we do not have matching source
7490 operands, do things in registers. */
7491 matching_memory = 0;
7492 if (GET_CODE (dst) == MEM)
7494 if (rtx_equal_p (dst, src1))
7495 matching_memory = 1;
7496 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7497 && rtx_equal_p (dst, src2))
7498 matching_memory = 2;
7499 else
7500 dst = gen_reg_rtx (mode);
7503 /* Both source operands cannot be in memory. */
7504 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7506 if (matching_memory != 2)
7507 src2 = force_reg (mode, src2);
7508 else
7509 src1 = force_reg (mode, src1);
7512 /* If the operation is not commutable, source 1 cannot be a constant
7513 or non-matching memory. */
7514 if ((CONSTANT_P (src1)
7515 || (!matching_memory && GET_CODE (src1) == MEM))
7516 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7517 src1 = force_reg (mode, src1);
7519 /* If optimizing, copy to regs to improve CSE */
7520 if (optimize && ! no_new_pseudos)
7522 if (GET_CODE (dst) == MEM)
7523 dst = gen_reg_rtx (mode);
7524 if (GET_CODE (src1) == MEM)
7525 src1 = force_reg (mode, src1);
7526 if (GET_CODE (src2) == MEM)
7527 src2 = force_reg (mode, src2);
7530 /* Emit the instruction. */
7532 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7533 if (reload_in_progress)
7535 /* Reload doesn't know about the flags register, and doesn't know that
7536 it doesn't want to clobber it. We can only do this with PLUS. */
7537 if (code != PLUS)
7538 abort ();
7539 emit_insn (op);
7541 else
7543 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7544 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7547 /* Fix up the destination if needed. */
7548 if (dst != operands[0])
7549 emit_move_insn (operands[0], dst);
7552 /* Return TRUE or FALSE depending on whether the binary operator meets the
7553 appropriate constraints. */
7556 ix86_binary_operator_ok (enum rtx_code code,
7557 enum machine_mode mode ATTRIBUTE_UNUSED,
7558 rtx operands[3])
7560 /* Both source operands cannot be in memory. */
7561 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7562 return 0;
7563 /* If the operation is not commutable, source 1 cannot be a constant. */
7564 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7565 return 0;
7566 /* If the destination is memory, we must have a matching source operand. */
7567 if (GET_CODE (operands[0]) == MEM
7568 && ! (rtx_equal_p (operands[0], operands[1])
7569 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7570 && rtx_equal_p (operands[0], operands[2]))))
7571 return 0;
7572 /* If the operation is not commutable and the source 1 is memory, we must
7573 have a matching destination. */
7574 if (GET_CODE (operands[1]) == MEM
7575 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7576 && ! rtx_equal_p (operands[0], operands[1]))
7577 return 0;
7578 return 1;
7581 /* Attempt to expand a unary operator. Make the expansion closer to the
7582 actual machine, then just general_operand, which will allow 2 separate
7583 memory references (one output, one input) in a single insn. */
7585 void
7586 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7587 rtx operands[])
7589 int matching_memory;
7590 rtx src, dst, op, clob;
7592 dst = operands[0];
7593 src = operands[1];
7595 /* If the destination is memory, and we do not have matching source
7596 operands, do things in registers. */
7597 matching_memory = 0;
7598 if (GET_CODE (dst) == MEM)
7600 if (rtx_equal_p (dst, src))
7601 matching_memory = 1;
7602 else
7603 dst = gen_reg_rtx (mode);
7606 /* When source operand is memory, destination must match. */
7607 if (!matching_memory && GET_CODE (src) == MEM)
7608 src = force_reg (mode, src);
7610 /* If optimizing, copy to regs to improve CSE */
7611 if (optimize && ! no_new_pseudos)
7613 if (GET_CODE (dst) == MEM)
7614 dst = gen_reg_rtx (mode);
7615 if (GET_CODE (src) == MEM)
7616 src = force_reg (mode, src);
7619 /* Emit the instruction. */
7621 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7622 if (reload_in_progress || code == NOT)
7624 /* Reload doesn't know about the flags register, and doesn't know that
7625 it doesn't want to clobber it. */
7626 if (code != NOT)
7627 abort ();
7628 emit_insn (op);
7630 else
7632 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7633 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7636 /* Fix up the destination if needed. */
7637 if (dst != operands[0])
7638 emit_move_insn (operands[0], dst);
7641 /* Return TRUE or FALSE depending on whether the unary operator meets the
7642 appropriate constraints. */
7645 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7646 enum machine_mode mode ATTRIBUTE_UNUSED,
7647 rtx operands[2] ATTRIBUTE_UNUSED)
7649 /* If one of operands is memory, source and destination must match. */
7650 if ((GET_CODE (operands[0]) == MEM
7651 || GET_CODE (operands[1]) == MEM)
7652 && ! rtx_equal_p (operands[0], operands[1]))
7653 return FALSE;
7654 return TRUE;
7657 /* Return TRUE or FALSE depending on whether the first SET in INSN
7658 has source and destination with matching CC modes, and that the
7659 CC mode is at least as constrained as REQ_MODE. */
7662 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7664 rtx set;
7665 enum machine_mode set_mode;
7667 set = PATTERN (insn);
7668 if (GET_CODE (set) == PARALLEL)
7669 set = XVECEXP (set, 0, 0);
7670 if (GET_CODE (set) != SET)
7671 abort ();
7672 if (GET_CODE (SET_SRC (set)) != COMPARE)
7673 abort ();
7675 set_mode = GET_MODE (SET_DEST (set));
7676 switch (set_mode)
7678 case CCNOmode:
7679 if (req_mode != CCNOmode
7680 && (req_mode != CCmode
7681 || XEXP (SET_SRC (set), 1) != const0_rtx))
7682 return 0;
7683 break;
7684 case CCmode:
7685 if (req_mode == CCGCmode)
7686 return 0;
7687 /* FALLTHRU */
7688 case CCGCmode:
7689 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7690 return 0;
7691 /* FALLTHRU */
7692 case CCGOCmode:
7693 if (req_mode == CCZmode)
7694 return 0;
7695 /* FALLTHRU */
7696 case CCZmode:
7697 break;
7699 default:
7700 abort ();
7703 return (GET_MODE (SET_SRC (set)) == set_mode);
7706 /* Generate insn patterns to do an integer compare of OPERANDS. */
7708 static rtx
7709 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7711 enum machine_mode cmpmode;
7712 rtx tmp, flags;
7714 cmpmode = SELECT_CC_MODE (code, op0, op1);
7715 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7717 /* This is very simple, but making the interface the same as in the
7718 FP case makes the rest of the code easier. */
7719 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7720 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7722 /* Return the test that should be put into the flags user, i.e.
7723 the bcc, scc, or cmov instruction. */
7724 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7727 /* Figure out whether to use ordered or unordered fp comparisons.
7728 Return the appropriate mode to use. */
7730 enum machine_mode
7731 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7733 /* ??? In order to make all comparisons reversible, we do all comparisons
7734 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7735 all forms trapping and nontrapping comparisons, we can make inequality
7736 comparisons trapping again, since it results in better code when using
7737 FCOM based compares. */
7738 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7741 enum machine_mode
7742 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7744 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7745 return ix86_fp_compare_mode (code);
7746 switch (code)
7748 /* Only zero flag is needed. */
7749 case EQ: /* ZF=0 */
7750 case NE: /* ZF!=0 */
7751 return CCZmode;
7752 /* Codes needing carry flag. */
7753 case GEU: /* CF=0 */
7754 case GTU: /* CF=0 & ZF=0 */
7755 case LTU: /* CF=1 */
7756 case LEU: /* CF=1 | ZF=1 */
7757 return CCmode;
7758 /* Codes possibly doable only with sign flag when
7759 comparing against zero. */
7760 case GE: /* SF=OF or SF=0 */
7761 case LT: /* SF<>OF or SF=1 */
7762 if (op1 == const0_rtx)
7763 return CCGOCmode;
7764 else
7765 /* For other cases Carry flag is not required. */
7766 return CCGCmode;
7767 /* Codes doable only with sign flag when comparing
7768 against zero, but we miss jump instruction for it
7769 so we need to use relational tests against overflow
7770 that thus needs to be zero. */
7771 case GT: /* ZF=0 & SF=OF */
7772 case LE: /* ZF=1 | SF<>OF */
7773 if (op1 == const0_rtx)
7774 return CCNOmode;
7775 else
7776 return CCGCmode;
7777 /* strcmp pattern do (use flags) and combine may ask us for proper
7778 mode. */
7779 case USE:
7780 return CCmode;
7781 default:
7782 abort ();
7786 /* Return the fixed registers used for condition codes. */
7788 static bool
7789 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7791 *p1 = FLAGS_REG;
7792 *p2 = FPSR_REG;
7793 return true;
7796 /* If two condition code modes are compatible, return a condition code
7797 mode which is compatible with both. Otherwise, return
7798 VOIDmode. */
7800 static enum machine_mode
7801 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7803 if (m1 == m2)
7804 return m1;
7806 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7807 return VOIDmode;
7809 if ((m1 == CCGCmode && m2 == CCGOCmode)
7810 || (m1 == CCGOCmode && m2 == CCGCmode))
7811 return CCGCmode;
7813 switch (m1)
7815 default:
7816 abort ();
7818 case CCmode:
7819 case CCGCmode:
7820 case CCGOCmode:
7821 case CCNOmode:
7822 case CCZmode:
7823 switch (m2)
7825 default:
7826 return VOIDmode;
7828 case CCmode:
7829 case CCGCmode:
7830 case CCGOCmode:
7831 case CCNOmode:
7832 case CCZmode:
7833 return CCmode;
7836 case CCFPmode:
7837 case CCFPUmode:
7838 /* These are only compatible with themselves, which we already
7839 checked above. */
7840 return VOIDmode;
7844 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7847 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
7849 enum rtx_code swapped_code = swap_condition (code);
7850 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7851 || (ix86_fp_comparison_cost (swapped_code)
7852 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7855 /* Swap, force into registers, or otherwise massage the two operands
7856 to a fp comparison. The operands are updated in place; the new
7857 comparison code is returned. */
7859 static enum rtx_code
7860 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
7862 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7863 rtx op0 = *pop0, op1 = *pop1;
7864 enum machine_mode op_mode = GET_MODE (op0);
7865 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7867 /* All of the unordered compare instructions only work on registers.
7868 The same is true of the XFmode compare instructions. The same is
7869 true of the fcomi compare instructions. */
7871 if (!is_sse
7872 && (fpcmp_mode == CCFPUmode
7873 || op_mode == XFmode
7874 || ix86_use_fcomi_compare (code)))
7876 op0 = force_reg (op_mode, op0);
7877 op1 = force_reg (op_mode, op1);
7879 else
7881 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7882 things around if they appear profitable, otherwise force op0
7883 into a register. */
7885 if (standard_80387_constant_p (op0) == 0
7886 || (GET_CODE (op0) == MEM
7887 && ! (standard_80387_constant_p (op1) == 0
7888 || GET_CODE (op1) == MEM)))
7890 rtx tmp;
7891 tmp = op0, op0 = op1, op1 = tmp;
7892 code = swap_condition (code);
7895 if (GET_CODE (op0) != REG)
7896 op0 = force_reg (op_mode, op0);
7898 if (CONSTANT_P (op1))
7900 if (standard_80387_constant_p (op1))
7901 op1 = force_reg (op_mode, op1);
7902 else
7903 op1 = validize_mem (force_const_mem (op_mode, op1));
7907 /* Try to rearrange the comparison to make it cheaper. */
7908 if (ix86_fp_comparison_cost (code)
7909 > ix86_fp_comparison_cost (swap_condition (code))
7910 && (GET_CODE (op1) == REG || !no_new_pseudos))
7912 rtx tmp;
7913 tmp = op0, op0 = op1, op1 = tmp;
7914 code = swap_condition (code);
7915 if (GET_CODE (op0) != REG)
7916 op0 = force_reg (op_mode, op0);
7919 *pop0 = op0;
7920 *pop1 = op1;
7921 return code;
7924 /* Convert comparison codes we use to represent FP comparison to integer
7925 code that will result in proper branch. Return UNKNOWN if no such code
7926 is available. */
7928 enum rtx_code
7929 ix86_fp_compare_code_to_integer (enum rtx_code code)
7931 switch (code)
7933 case GT:
7934 return GTU;
7935 case GE:
7936 return GEU;
7937 case ORDERED:
7938 case UNORDERED:
7939 return code;
7940 break;
7941 case UNEQ:
7942 return EQ;
7943 break;
7944 case UNLT:
7945 return LTU;
7946 break;
7947 case UNLE:
7948 return LEU;
7949 break;
7950 case LTGT:
7951 return NE;
7952 break;
7953 default:
7954 return UNKNOWN;
7958 /* Split comparison code CODE into comparisons we can do using branch
7959 instructions. BYPASS_CODE is comparison code for branch that will
7960 branch around FIRST_CODE and SECOND_CODE. If some of branches
7961 is not required, set value to UNKNOWN.
7962 We never require more than two branches. */
7964 void
7965 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
7966 enum rtx_code *first_code,
7967 enum rtx_code *second_code)
7969 *first_code = code;
7970 *bypass_code = UNKNOWN;
7971 *second_code = UNKNOWN;
7973 /* The fcomi comparison sets flags as follows:
7975 cmp ZF PF CF
7976 > 0 0 0
7977 < 0 0 1
7978 = 1 0 0
7979 un 1 1 1 */
7981 switch (code)
7983 case GT: /* GTU - CF=0 & ZF=0 */
7984 case GE: /* GEU - CF=0 */
7985 case ORDERED: /* PF=0 */
7986 case UNORDERED: /* PF=1 */
7987 case UNEQ: /* EQ - ZF=1 */
7988 case UNLT: /* LTU - CF=1 */
7989 case UNLE: /* LEU - CF=1 | ZF=1 */
7990 case LTGT: /* EQ - ZF=0 */
7991 break;
7992 case LT: /* LTU - CF=1 - fails on unordered */
7993 *first_code = UNLT;
7994 *bypass_code = UNORDERED;
7995 break;
7996 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7997 *first_code = UNLE;
7998 *bypass_code = UNORDERED;
7999 break;
8000 case EQ: /* EQ - ZF=1 - fails on unordered */
8001 *first_code = UNEQ;
8002 *bypass_code = UNORDERED;
8003 break;
8004 case NE: /* NE - ZF=0 - fails on unordered */
8005 *first_code = LTGT;
8006 *second_code = UNORDERED;
8007 break;
8008 case UNGE: /* GEU - CF=0 - fails on unordered */
8009 *first_code = GE;
8010 *second_code = UNORDERED;
8011 break;
8012 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8013 *first_code = GT;
8014 *second_code = UNORDERED;
8015 break;
8016 default:
8017 abort ();
8019 if (!TARGET_IEEE_FP)
8021 *second_code = UNKNOWN;
8022 *bypass_code = UNKNOWN;
8026 /* Return cost of comparison done fcom + arithmetics operations on AX.
8027 All following functions do use number of instructions as a cost metrics.
8028 In future this should be tweaked to compute bytes for optimize_size and
8029 take into account performance of various instructions on various CPUs. */
8030 static int
8031 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8033 if (!TARGET_IEEE_FP)
8034 return 4;
8035 /* The cost of code output by ix86_expand_fp_compare. */
8036 switch (code)
8038 case UNLE:
8039 case UNLT:
8040 case LTGT:
8041 case GT:
8042 case GE:
8043 case UNORDERED:
8044 case ORDERED:
8045 case UNEQ:
8046 return 4;
8047 break;
8048 case LT:
8049 case NE:
8050 case EQ:
8051 case UNGE:
8052 return 5;
8053 break;
8054 case LE:
8055 case UNGT:
8056 return 6;
8057 break;
8058 default:
8059 abort ();
8063 /* Return cost of comparison done using fcomi operation.
8064 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8065 static int
8066 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8068 enum rtx_code bypass_code, first_code, second_code;
8069 /* Return arbitrarily high cost when instruction is not supported - this
8070 prevents gcc from using it. */
8071 if (!TARGET_CMOVE)
8072 return 1024;
8073 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8074 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8077 /* Return cost of comparison done using sahf operation.
8078 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8079 static int
8080 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8082 enum rtx_code bypass_code, first_code, second_code;
8083 /* Return arbitrarily high cost when instruction is not preferred - this
8084 avoids gcc from using it. */
8085 if (!TARGET_USE_SAHF && !optimize_size)
8086 return 1024;
8087 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8088 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8091 /* Compute cost of the comparison done using any method.
8092 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8093 static int
8094 ix86_fp_comparison_cost (enum rtx_code code)
8096 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8097 int min;
8099 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8100 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8102 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8103 if (min > sahf_cost)
8104 min = sahf_cost;
8105 if (min > fcomi_cost)
8106 min = fcomi_cost;
8107 return min;
8110 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8112 static rtx
8113 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8114 rtx *second_test, rtx *bypass_test)
8116 enum machine_mode fpcmp_mode, intcmp_mode;
8117 rtx tmp, tmp2;
8118 int cost = ix86_fp_comparison_cost (code);
8119 enum rtx_code bypass_code, first_code, second_code;
8121 fpcmp_mode = ix86_fp_compare_mode (code);
8122 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8124 if (second_test)
8125 *second_test = NULL_RTX;
8126 if (bypass_test)
8127 *bypass_test = NULL_RTX;
8129 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8131 /* Do fcomi/sahf based test when profitable. */
8132 if ((bypass_code == UNKNOWN || bypass_test)
8133 && (second_code == UNKNOWN || second_test)
8134 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8136 if (TARGET_CMOVE)
8138 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8139 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8140 tmp);
8141 emit_insn (tmp);
8143 else
8145 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8146 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8147 if (!scratch)
8148 scratch = gen_reg_rtx (HImode);
8149 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8150 emit_insn (gen_x86_sahf_1 (scratch));
8153 /* The FP codes work out to act like unsigned. */
8154 intcmp_mode = fpcmp_mode;
8155 code = first_code;
8156 if (bypass_code != UNKNOWN)
8157 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8158 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8159 const0_rtx);
8160 if (second_code != UNKNOWN)
8161 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8162 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8163 const0_rtx);
8165 else
8167 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8168 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8169 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8170 if (!scratch)
8171 scratch = gen_reg_rtx (HImode);
8172 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8174 /* In the unordered case, we have to check C2 for NaN's, which
8175 doesn't happen to work out to anything nice combination-wise.
8176 So do some bit twiddling on the value we've got in AH to come
8177 up with an appropriate set of condition codes. */
8179 intcmp_mode = CCNOmode;
8180 switch (code)
8182 case GT:
8183 case UNGT:
8184 if (code == GT || !TARGET_IEEE_FP)
8186 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8187 code = EQ;
8189 else
8191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8192 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8194 intcmp_mode = CCmode;
8195 code = GEU;
8197 break;
8198 case LT:
8199 case UNLT:
8200 if (code == LT && TARGET_IEEE_FP)
8202 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8203 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8204 intcmp_mode = CCmode;
8205 code = EQ;
8207 else
8209 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8210 code = NE;
8212 break;
8213 case GE:
8214 case UNGE:
8215 if (code == GE || !TARGET_IEEE_FP)
8217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8218 code = EQ;
8220 else
8222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8223 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8224 GEN_INT (0x01)));
8225 code = NE;
8227 break;
8228 case LE:
8229 case UNLE:
8230 if (code == LE && TARGET_IEEE_FP)
8232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8233 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8234 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8235 intcmp_mode = CCmode;
8236 code = LTU;
8238 else
8240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8241 code = NE;
8243 break;
8244 case EQ:
8245 case UNEQ:
8246 if (code == EQ && TARGET_IEEE_FP)
8248 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8249 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8250 intcmp_mode = CCmode;
8251 code = EQ;
8253 else
8255 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8256 code = NE;
8257 break;
8259 break;
8260 case NE:
8261 case LTGT:
8262 if (code == NE && TARGET_IEEE_FP)
8264 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8265 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8266 GEN_INT (0x40)));
8267 code = NE;
8269 else
8271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8272 code = EQ;
8274 break;
8276 case UNORDERED:
8277 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8278 code = NE;
8279 break;
8280 case ORDERED:
8281 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8282 code = EQ;
8283 break;
8285 default:
8286 abort ();
8290 /* Return the test that should be put into the flags user, i.e.
8291 the bcc, scc, or cmov instruction. */
8292 return gen_rtx_fmt_ee (code, VOIDmode,
8293 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8294 const0_rtx);
8298 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8300 rtx op0, op1, ret;
8301 op0 = ix86_compare_op0;
8302 op1 = ix86_compare_op1;
8304 if (second_test)
8305 *second_test = NULL_RTX;
8306 if (bypass_test)
8307 *bypass_test = NULL_RTX;
8309 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8310 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8311 second_test, bypass_test);
8312 else
8313 ret = ix86_expand_int_compare (code, op0, op1);
8315 return ret;
8318 /* Return true if the CODE will result in nontrivial jump sequence. */
8319 bool
8320 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8322 enum rtx_code bypass_code, first_code, second_code;
8323 if (!TARGET_CMOVE)
8324 return true;
8325 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8326 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8329 void
8330 ix86_expand_branch (enum rtx_code code, rtx label)
8332 rtx tmp;
8334 switch (GET_MODE (ix86_compare_op0))
8336 case QImode:
8337 case HImode:
8338 case SImode:
8339 simple:
8340 tmp = ix86_expand_compare (code, NULL, NULL);
8341 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8342 gen_rtx_LABEL_REF (VOIDmode, label),
8343 pc_rtx);
8344 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8345 return;
8347 case SFmode:
8348 case DFmode:
8349 case XFmode:
8351 rtvec vec;
8352 int use_fcomi;
8353 enum rtx_code bypass_code, first_code, second_code;
8355 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8356 &ix86_compare_op1);
8358 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8360 /* Check whether we will use the natural sequence with one jump. If
8361 so, we can expand jump early. Otherwise delay expansion by
8362 creating compound insn to not confuse optimizers. */
8363 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8364 && TARGET_CMOVE)
8366 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8367 gen_rtx_LABEL_REF (VOIDmode, label),
8368 pc_rtx, NULL_RTX);
8370 else
8372 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8373 ix86_compare_op0, ix86_compare_op1);
8374 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8375 gen_rtx_LABEL_REF (VOIDmode, label),
8376 pc_rtx);
8377 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8379 use_fcomi = ix86_use_fcomi_compare (code);
8380 vec = rtvec_alloc (3 + !use_fcomi);
8381 RTVEC_ELT (vec, 0) = tmp;
8382 RTVEC_ELT (vec, 1)
8383 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8384 RTVEC_ELT (vec, 2)
8385 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8386 if (! use_fcomi)
8387 RTVEC_ELT (vec, 3)
8388 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8390 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8392 return;
8395 case DImode:
8396 if (TARGET_64BIT)
8397 goto simple;
8398 /* Expand DImode branch into multiple compare+branch. */
8400 rtx lo[2], hi[2], label2;
8401 enum rtx_code code1, code2, code3;
8403 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8405 tmp = ix86_compare_op0;
8406 ix86_compare_op0 = ix86_compare_op1;
8407 ix86_compare_op1 = tmp;
8408 code = swap_condition (code);
8410 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8411 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8413 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8414 avoid two branches. This costs one extra insn, so disable when
8415 optimizing for size. */
8417 if ((code == EQ || code == NE)
8418 && (!optimize_size
8419 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8421 rtx xor0, xor1;
8423 xor1 = hi[0];
8424 if (hi[1] != const0_rtx)
8425 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8426 NULL_RTX, 0, OPTAB_WIDEN);
8428 xor0 = lo[0];
8429 if (lo[1] != const0_rtx)
8430 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8431 NULL_RTX, 0, OPTAB_WIDEN);
8433 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8434 NULL_RTX, 0, OPTAB_WIDEN);
8436 ix86_compare_op0 = tmp;
8437 ix86_compare_op1 = const0_rtx;
8438 ix86_expand_branch (code, label);
8439 return;
8442 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8443 op1 is a constant and the low word is zero, then we can just
8444 examine the high word. */
8446 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8447 switch (code)
8449 case LT: case LTU: case GE: case GEU:
8450 ix86_compare_op0 = hi[0];
8451 ix86_compare_op1 = hi[1];
8452 ix86_expand_branch (code, label);
8453 return;
8454 default:
8455 break;
8458 /* Otherwise, we need two or three jumps. */
8460 label2 = gen_label_rtx ();
8462 code1 = code;
8463 code2 = swap_condition (code);
8464 code3 = unsigned_condition (code);
8466 switch (code)
8468 case LT: case GT: case LTU: case GTU:
8469 break;
8471 case LE: code1 = LT; code2 = GT; break;
8472 case GE: code1 = GT; code2 = LT; break;
8473 case LEU: code1 = LTU; code2 = GTU; break;
8474 case GEU: code1 = GTU; code2 = LTU; break;
8476 case EQ: code1 = UNKNOWN; code2 = NE; break;
8477 case NE: code2 = UNKNOWN; break;
8479 default:
8480 abort ();
8484 * a < b =>
8485 * if (hi(a) < hi(b)) goto true;
8486 * if (hi(a) > hi(b)) goto false;
8487 * if (lo(a) < lo(b)) goto true;
8488 * false:
8491 ix86_compare_op0 = hi[0];
8492 ix86_compare_op1 = hi[1];
8494 if (code1 != UNKNOWN)
8495 ix86_expand_branch (code1, label);
8496 if (code2 != UNKNOWN)
8497 ix86_expand_branch (code2, label2);
8499 ix86_compare_op0 = lo[0];
8500 ix86_compare_op1 = lo[1];
8501 ix86_expand_branch (code3, label);
8503 if (code2 != UNKNOWN)
8504 emit_label (label2);
8505 return;
8508 default:
8509 abort ();
8513 /* Split branch based on floating point condition. */
8514 void
8515 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8516 rtx target1, rtx target2, rtx tmp)
8518 rtx second, bypass;
8519 rtx label = NULL_RTX;
8520 rtx condition;
8521 int bypass_probability = -1, second_probability = -1, probability = -1;
8522 rtx i;
8524 if (target2 != pc_rtx)
8526 rtx tmp = target2;
8527 code = reverse_condition_maybe_unordered (code);
8528 target2 = target1;
8529 target1 = tmp;
8532 condition = ix86_expand_fp_compare (code, op1, op2,
8533 tmp, &second, &bypass);
8535 if (split_branch_probability >= 0)
8537 /* Distribute the probabilities across the jumps.
8538 Assume the BYPASS and SECOND to be always test
8539 for UNORDERED. */
8540 probability = split_branch_probability;
8542 /* Value of 1 is low enough to make no need for probability
8543 to be updated. Later we may run some experiments and see
8544 if unordered values are more frequent in practice. */
8545 if (bypass)
8546 bypass_probability = 1;
8547 if (second)
8548 second_probability = 1;
8550 if (bypass != NULL_RTX)
8552 label = gen_label_rtx ();
8553 i = emit_jump_insn (gen_rtx_SET
8554 (VOIDmode, pc_rtx,
8555 gen_rtx_IF_THEN_ELSE (VOIDmode,
8556 bypass,
8557 gen_rtx_LABEL_REF (VOIDmode,
8558 label),
8559 pc_rtx)));
8560 if (bypass_probability >= 0)
8561 REG_NOTES (i)
8562 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8563 GEN_INT (bypass_probability),
8564 REG_NOTES (i));
8566 i = emit_jump_insn (gen_rtx_SET
8567 (VOIDmode, pc_rtx,
8568 gen_rtx_IF_THEN_ELSE (VOIDmode,
8569 condition, target1, target2)));
8570 if (probability >= 0)
8571 REG_NOTES (i)
8572 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8573 GEN_INT (probability),
8574 REG_NOTES (i));
8575 if (second != NULL_RTX)
8577 i = emit_jump_insn (gen_rtx_SET
8578 (VOIDmode, pc_rtx,
8579 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8580 target2)));
8581 if (second_probability >= 0)
8582 REG_NOTES (i)
8583 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8584 GEN_INT (second_probability),
8585 REG_NOTES (i));
8587 if (label != NULL_RTX)
8588 emit_label (label);
8592 ix86_expand_setcc (enum rtx_code code, rtx dest)
8594 rtx ret, tmp, tmpreg, equiv;
8595 rtx second_test, bypass_test;
8597 if (GET_MODE (ix86_compare_op0) == DImode
8598 && !TARGET_64BIT)
8599 return 0; /* FAIL */
8601 if (GET_MODE (dest) != QImode)
8602 abort ();
8604 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8605 PUT_MODE (ret, QImode);
8607 tmp = dest;
8608 tmpreg = dest;
8610 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8611 if (bypass_test || second_test)
8613 rtx test = second_test;
8614 int bypass = 0;
8615 rtx tmp2 = gen_reg_rtx (QImode);
8616 if (bypass_test)
8618 if (second_test)
8619 abort ();
8620 test = bypass_test;
8621 bypass = 1;
8622 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8624 PUT_MODE (test, QImode);
8625 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8627 if (bypass)
8628 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8629 else
8630 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8633 /* Attach a REG_EQUAL note describing the comparison result. */
8634 equiv = simplify_gen_relational (code, QImode,
8635 GET_MODE (ix86_compare_op0),
8636 ix86_compare_op0, ix86_compare_op1);
8637 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8639 return 1; /* DONE */
8642 /* Expand comparison setting or clearing carry flag. Return true when
8643 successful and set pop for the operation. */
8644 static bool
8645 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8647 enum machine_mode mode =
8648 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8650 /* Do not handle DImode compares that go trought special path. Also we can't
8651 deal with FP compares yet. This is possible to add. */
8652 if ((mode == DImode && !TARGET_64BIT))
8653 return false;
8654 if (FLOAT_MODE_P (mode))
8656 rtx second_test = NULL, bypass_test = NULL;
8657 rtx compare_op, compare_seq;
8659 /* Shortcut: following common codes never translate into carry flag compares. */
8660 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8661 || code == ORDERED || code == UNORDERED)
8662 return false;
8664 /* These comparisons require zero flag; swap operands so they won't. */
8665 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8666 && !TARGET_IEEE_FP)
8668 rtx tmp = op0;
8669 op0 = op1;
8670 op1 = tmp;
8671 code = swap_condition (code);
8674 /* Try to expand the comparison and verify that we end up with carry flag
8675 based comparison. This is fails to be true only when we decide to expand
8676 comparison using arithmetic that is not too common scenario. */
8677 start_sequence ();
8678 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8679 &second_test, &bypass_test);
8680 compare_seq = get_insns ();
8681 end_sequence ();
8683 if (second_test || bypass_test)
8684 return false;
8685 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8686 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8687 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8688 else
8689 code = GET_CODE (compare_op);
8690 if (code != LTU && code != GEU)
8691 return false;
8692 emit_insn (compare_seq);
8693 *pop = compare_op;
8694 return true;
8696 if (!INTEGRAL_MODE_P (mode))
8697 return false;
8698 switch (code)
8700 case LTU:
8701 case GEU:
8702 break;
8704 /* Convert a==0 into (unsigned)a<1. */
8705 case EQ:
8706 case NE:
8707 if (op1 != const0_rtx)
8708 return false;
8709 op1 = const1_rtx;
8710 code = (code == EQ ? LTU : GEU);
8711 break;
8713 /* Convert a>b into b<a or a>=b-1. */
8714 case GTU:
8715 case LEU:
8716 if (GET_CODE (op1) == CONST_INT)
8718 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8719 /* Bail out on overflow. We still can swap operands but that
8720 would force loading of the constant into register. */
8721 if (op1 == const0_rtx
8722 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8723 return false;
8724 code = (code == GTU ? GEU : LTU);
8726 else
8728 rtx tmp = op1;
8729 op1 = op0;
8730 op0 = tmp;
8731 code = (code == GTU ? LTU : GEU);
8733 break;
8735 /* Convert a>=0 into (unsigned)a<0x80000000. */
8736 case LT:
8737 case GE:
8738 if (mode == DImode || op1 != const0_rtx)
8739 return false;
8740 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8741 code = (code == LT ? GEU : LTU);
8742 break;
8743 case LE:
8744 case GT:
8745 if (mode == DImode || op1 != constm1_rtx)
8746 return false;
8747 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8748 code = (code == LE ? GEU : LTU);
8749 break;
8751 default:
8752 return false;
8754 /* Swapping operands may cause constant to appear as first operand. */
8755 if (!nonimmediate_operand (op0, VOIDmode))
8757 if (no_new_pseudos)
8758 return false;
8759 op0 = force_reg (mode, op0);
8761 ix86_compare_op0 = op0;
8762 ix86_compare_op1 = op1;
8763 *pop = ix86_expand_compare (code, NULL, NULL);
8764 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8765 abort ();
8766 return true;
8770 ix86_expand_int_movcc (rtx operands[])
8772 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8773 rtx compare_seq, compare_op;
8774 rtx second_test, bypass_test;
8775 enum machine_mode mode = GET_MODE (operands[0]);
8776 bool sign_bit_compare_p = false;;
8778 start_sequence ();
8779 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8780 compare_seq = get_insns ();
8781 end_sequence ();
8783 compare_code = GET_CODE (compare_op);
8785 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8786 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8787 sign_bit_compare_p = true;
8789 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8790 HImode insns, we'd be swallowed in word prefix ops. */
8792 if ((mode != HImode || TARGET_FAST_PREFIX)
8793 && (mode != DImode || TARGET_64BIT)
8794 && GET_CODE (operands[2]) == CONST_INT
8795 && GET_CODE (operands[3]) == CONST_INT)
8797 rtx out = operands[0];
8798 HOST_WIDE_INT ct = INTVAL (operands[2]);
8799 HOST_WIDE_INT cf = INTVAL (operands[3]);
8800 HOST_WIDE_INT diff;
8802 diff = ct - cf;
8803 /* Sign bit compares are better done using shifts than we do by using
8804 sbb. */
8805 if (sign_bit_compare_p
8806 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8807 ix86_compare_op1, &compare_op))
8809 /* Detect overlap between destination and compare sources. */
8810 rtx tmp = out;
8812 if (!sign_bit_compare_p)
8814 bool fpcmp = false;
8816 compare_code = GET_CODE (compare_op);
8818 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8819 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8821 fpcmp = true;
8822 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8825 /* To simplify rest of code, restrict to the GEU case. */
8826 if (compare_code == LTU)
8828 HOST_WIDE_INT tmp = ct;
8829 ct = cf;
8830 cf = tmp;
8831 compare_code = reverse_condition (compare_code);
8832 code = reverse_condition (code);
8834 else
8836 if (fpcmp)
8837 PUT_CODE (compare_op,
8838 reverse_condition_maybe_unordered
8839 (GET_CODE (compare_op)));
8840 else
8841 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
8843 diff = ct - cf;
8845 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8846 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8847 tmp = gen_reg_rtx (mode);
8849 if (mode == DImode)
8850 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
8851 else
8852 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
8854 else
8856 if (code == GT || code == GE)
8857 code = reverse_condition (code);
8858 else
8860 HOST_WIDE_INT tmp = ct;
8861 ct = cf;
8862 cf = tmp;
8863 diff = ct - cf;
8865 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
8866 ix86_compare_op1, VOIDmode, 0, -1);
8869 if (diff == 1)
8872 * cmpl op0,op1
8873 * sbbl dest,dest
8874 * [addl dest, ct]
8876 * Size 5 - 8.
8878 if (ct)
8879 tmp = expand_simple_binop (mode, PLUS,
8880 tmp, GEN_INT (ct),
8881 copy_rtx (tmp), 1, OPTAB_DIRECT);
8883 else if (cf == -1)
8886 * cmpl op0,op1
8887 * sbbl dest,dest
8888 * orl $ct, dest
8890 * Size 8.
8892 tmp = expand_simple_binop (mode, IOR,
8893 tmp, GEN_INT (ct),
8894 copy_rtx (tmp), 1, OPTAB_DIRECT);
8896 else if (diff == -1 && ct)
8899 * cmpl op0,op1
8900 * sbbl dest,dest
8901 * notl dest
8902 * [addl dest, cf]
8904 * Size 8 - 11.
8906 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
8907 if (cf)
8908 tmp = expand_simple_binop (mode, PLUS,
8909 copy_rtx (tmp), GEN_INT (cf),
8910 copy_rtx (tmp), 1, OPTAB_DIRECT);
8912 else
8915 * cmpl op0,op1
8916 * sbbl dest,dest
8917 * [notl dest]
8918 * andl cf - ct, dest
8919 * [addl dest, ct]
8921 * Size 8 - 11.
8924 if (cf == 0)
8926 cf = ct;
8927 ct = 0;
8928 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
8931 tmp = expand_simple_binop (mode, AND,
8932 copy_rtx (tmp),
8933 gen_int_mode (cf - ct, mode),
8934 copy_rtx (tmp), 1, OPTAB_DIRECT);
8935 if (ct)
8936 tmp = expand_simple_binop (mode, PLUS,
8937 copy_rtx (tmp), GEN_INT (ct),
8938 copy_rtx (tmp), 1, OPTAB_DIRECT);
8941 if (!rtx_equal_p (tmp, out))
8942 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
8944 return 1; /* DONE */
8947 if (diff < 0)
8949 HOST_WIDE_INT tmp;
8950 tmp = ct, ct = cf, cf = tmp;
8951 diff = -diff;
8952 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8954 /* We may be reversing unordered compare to normal compare, that
8955 is not valid in general (we may convert non-trapping condition
8956 to trapping one), however on i386 we currently emit all
8957 comparisons unordered. */
8958 compare_code = reverse_condition_maybe_unordered (compare_code);
8959 code = reverse_condition_maybe_unordered (code);
8961 else
8963 compare_code = reverse_condition (compare_code);
8964 code = reverse_condition (code);
8968 compare_code = UNKNOWN;
8969 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8970 && GET_CODE (ix86_compare_op1) == CONST_INT)
8972 if (ix86_compare_op1 == const0_rtx
8973 && (code == LT || code == GE))
8974 compare_code = code;
8975 else if (ix86_compare_op1 == constm1_rtx)
8977 if (code == LE)
8978 compare_code = LT;
8979 else if (code == GT)
8980 compare_code = GE;
8984 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8985 if (compare_code != UNKNOWN
8986 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8987 && (cf == -1 || ct == -1))
8989 /* If lea code below could be used, only optimize
8990 if it results in a 2 insn sequence. */
8992 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8993 || diff == 3 || diff == 5 || diff == 9)
8994 || (compare_code == LT && ct == -1)
8995 || (compare_code == GE && cf == -1))
8998 * notl op1 (if necessary)
8999 * sarl $31, op1
9000 * orl cf, op1
9002 if (ct != -1)
9004 cf = ct;
9005 ct = -1;
9006 code = reverse_condition (code);
9009 out = emit_store_flag (out, code, ix86_compare_op0,
9010 ix86_compare_op1, VOIDmode, 0, -1);
9012 out = expand_simple_binop (mode, IOR,
9013 out, GEN_INT (cf),
9014 out, 1, OPTAB_DIRECT);
9015 if (out != operands[0])
9016 emit_move_insn (operands[0], out);
9018 return 1; /* DONE */
9023 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9024 || diff == 3 || diff == 5 || diff == 9)
9025 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9026 && (mode != DImode
9027 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9030 * xorl dest,dest
9031 * cmpl op1,op2
9032 * setcc dest
9033 * lea cf(dest*(ct-cf)),dest
9035 * Size 14.
9037 * This also catches the degenerate setcc-only case.
9040 rtx tmp;
9041 int nops;
9043 out = emit_store_flag (out, code, ix86_compare_op0,
9044 ix86_compare_op1, VOIDmode, 0, 1);
9046 nops = 0;
9047 /* On x86_64 the lea instruction operates on Pmode, so we need
9048 to get arithmetics done in proper mode to match. */
9049 if (diff == 1)
9050 tmp = copy_rtx (out);
9051 else
9053 rtx out1;
9054 out1 = copy_rtx (out);
9055 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9056 nops++;
9057 if (diff & 1)
9059 tmp = gen_rtx_PLUS (mode, tmp, out1);
9060 nops++;
9063 if (cf != 0)
9065 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9066 nops++;
9068 if (!rtx_equal_p (tmp, out))
9070 if (nops == 1)
9071 out = force_operand (tmp, copy_rtx (out));
9072 else
9073 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9075 if (!rtx_equal_p (out, operands[0]))
9076 emit_move_insn (operands[0], copy_rtx (out));
9078 return 1; /* DONE */
9082 * General case: Jumpful:
9083 * xorl dest,dest cmpl op1, op2
9084 * cmpl op1, op2 movl ct, dest
9085 * setcc dest jcc 1f
9086 * decl dest movl cf, dest
9087 * andl (cf-ct),dest 1:
9088 * addl ct,dest
9090 * Size 20. Size 14.
9092 * This is reasonably steep, but branch mispredict costs are
9093 * high on modern cpus, so consider failing only if optimizing
9094 * for space.
9097 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9098 && BRANCH_COST >= 2)
9100 if (cf == 0)
9102 cf = ct;
9103 ct = 0;
9104 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9105 /* We may be reversing unordered compare to normal compare,
9106 that is not valid in general (we may convert non-trapping
9107 condition to trapping one), however on i386 we currently
9108 emit all comparisons unordered. */
9109 code = reverse_condition_maybe_unordered (code);
9110 else
9112 code = reverse_condition (code);
9113 if (compare_code != UNKNOWN)
9114 compare_code = reverse_condition (compare_code);
9118 if (compare_code != UNKNOWN)
9120 /* notl op1 (if needed)
9121 sarl $31, op1
9122 andl (cf-ct), op1
9123 addl ct, op1
9125 For x < 0 (resp. x <= -1) there will be no notl,
9126 so if possible swap the constants to get rid of the
9127 complement.
9128 True/false will be -1/0 while code below (store flag
9129 followed by decrement) is 0/-1, so the constants need
9130 to be exchanged once more. */
9132 if (compare_code == GE || !cf)
9134 code = reverse_condition (code);
9135 compare_code = LT;
9137 else
9139 HOST_WIDE_INT tmp = cf;
9140 cf = ct;
9141 ct = tmp;
9144 out = emit_store_flag (out, code, ix86_compare_op0,
9145 ix86_compare_op1, VOIDmode, 0, -1);
9147 else
9149 out = emit_store_flag (out, code, ix86_compare_op0,
9150 ix86_compare_op1, VOIDmode, 0, 1);
9152 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9153 copy_rtx (out), 1, OPTAB_DIRECT);
9156 out = expand_simple_binop (mode, AND, copy_rtx (out),
9157 gen_int_mode (cf - ct, mode),
9158 copy_rtx (out), 1, OPTAB_DIRECT);
9159 if (ct)
9160 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9161 copy_rtx (out), 1, OPTAB_DIRECT);
9162 if (!rtx_equal_p (out, operands[0]))
9163 emit_move_insn (operands[0], copy_rtx (out));
9165 return 1; /* DONE */
9169 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9171 /* Try a few things more with specific constants and a variable. */
9173 optab op;
9174 rtx var, orig_out, out, tmp;
9176 if (BRANCH_COST <= 2)
9177 return 0; /* FAIL */
9179 /* If one of the two operands is an interesting constant, load a
9180 constant with the above and mask it in with a logical operation. */
9182 if (GET_CODE (operands[2]) == CONST_INT)
9184 var = operands[3];
9185 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9186 operands[3] = constm1_rtx, op = and_optab;
9187 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9188 operands[3] = const0_rtx, op = ior_optab;
9189 else
9190 return 0; /* FAIL */
9192 else if (GET_CODE (operands[3]) == CONST_INT)
9194 var = operands[2];
9195 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9196 operands[2] = constm1_rtx, op = and_optab;
9197 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9198 operands[2] = const0_rtx, op = ior_optab;
9199 else
9200 return 0; /* FAIL */
9202 else
9203 return 0; /* FAIL */
9205 orig_out = operands[0];
9206 tmp = gen_reg_rtx (mode);
9207 operands[0] = tmp;
9209 /* Recurse to get the constant loaded. */
9210 if (ix86_expand_int_movcc (operands) == 0)
9211 return 0; /* FAIL */
9213 /* Mask in the interesting variable. */
9214 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9215 OPTAB_WIDEN);
9216 if (!rtx_equal_p (out, orig_out))
9217 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9219 return 1; /* DONE */
9223 * For comparison with above,
9225 * movl cf,dest
9226 * movl ct,tmp
9227 * cmpl op1,op2
9228 * cmovcc tmp,dest
9230 * Size 15.
9233 if (! nonimmediate_operand (operands[2], mode))
9234 operands[2] = force_reg (mode, operands[2]);
9235 if (! nonimmediate_operand (operands[3], mode))
9236 operands[3] = force_reg (mode, operands[3]);
9238 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9240 rtx tmp = gen_reg_rtx (mode);
9241 emit_move_insn (tmp, operands[3]);
9242 operands[3] = tmp;
9244 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9246 rtx tmp = gen_reg_rtx (mode);
9247 emit_move_insn (tmp, operands[2]);
9248 operands[2] = tmp;
9251 if (! register_operand (operands[2], VOIDmode)
9252 && (mode == QImode
9253 || ! register_operand (operands[3], VOIDmode)))
9254 operands[2] = force_reg (mode, operands[2]);
9256 if (mode == QImode
9257 && ! register_operand (operands[3], VOIDmode))
9258 operands[3] = force_reg (mode, operands[3]);
9260 emit_insn (compare_seq);
9261 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9262 gen_rtx_IF_THEN_ELSE (mode,
9263 compare_op, operands[2],
9264 operands[3])));
9265 if (bypass_test)
9266 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9267 gen_rtx_IF_THEN_ELSE (mode,
9268 bypass_test,
9269 copy_rtx (operands[3]),
9270 copy_rtx (operands[0]))));
9271 if (second_test)
9272 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9273 gen_rtx_IF_THEN_ELSE (mode,
9274 second_test,
9275 copy_rtx (operands[2]),
9276 copy_rtx (operands[0]))));
9278 return 1; /* DONE */
9282 ix86_expand_fp_movcc (rtx operands[])
9284 enum rtx_code code;
9285 rtx tmp;
9286 rtx compare_op, second_test, bypass_test;
9288 /* For SF/DFmode conditional moves based on comparisons
9289 in same mode, we may want to use SSE min/max instructions. */
9290 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9291 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9292 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9293 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9294 && (!TARGET_IEEE_FP
9295 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9296 /* We may be called from the post-reload splitter. */
9297 && (!REG_P (operands[0])
9298 || SSE_REG_P (operands[0])
9299 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9301 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9302 code = GET_CODE (operands[1]);
9304 /* See if we have (cross) match between comparison operands and
9305 conditional move operands. */
9306 if (rtx_equal_p (operands[2], op1))
9308 rtx tmp = op0;
9309 op0 = op1;
9310 op1 = tmp;
9311 code = reverse_condition_maybe_unordered (code);
9313 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9315 /* Check for min operation. */
9316 if (code == LT || code == UNLE)
9318 if (code == UNLE)
9320 rtx tmp = op0;
9321 op0 = op1;
9322 op1 = tmp;
9324 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9325 if (memory_operand (op0, VOIDmode))
9326 op0 = force_reg (GET_MODE (operands[0]), op0);
9327 if (GET_MODE (operands[0]) == SFmode)
9328 emit_insn (gen_minsf3 (operands[0], op0, op1));
9329 else
9330 emit_insn (gen_mindf3 (operands[0], op0, op1));
9331 return 1;
9333 /* Check for max operation. */
9334 if (code == GT || code == UNGE)
9336 if (code == UNGE)
9338 rtx tmp = op0;
9339 op0 = op1;
9340 op1 = tmp;
9342 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9343 if (memory_operand (op0, VOIDmode))
9344 op0 = force_reg (GET_MODE (operands[0]), op0);
9345 if (GET_MODE (operands[0]) == SFmode)
9346 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9347 else
9348 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9349 return 1;
9352 /* Manage condition to be sse_comparison_operator. In case we are
9353 in non-ieee mode, try to canonicalize the destination operand
9354 to be first in the comparison - this helps reload to avoid extra
9355 moves. */
9356 if (!sse_comparison_operator (operands[1], VOIDmode)
9357 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9359 rtx tmp = ix86_compare_op0;
9360 ix86_compare_op0 = ix86_compare_op1;
9361 ix86_compare_op1 = tmp;
9362 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9363 VOIDmode, ix86_compare_op0,
9364 ix86_compare_op1);
9366 /* Similarly try to manage result to be first operand of conditional
9367 move. We also don't support the NE comparison on SSE, so try to
9368 avoid it. */
9369 if ((rtx_equal_p (operands[0], operands[3])
9370 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9371 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9373 rtx tmp = operands[2];
9374 operands[2] = operands[3];
9375 operands[3] = tmp;
9376 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9377 (GET_CODE (operands[1])),
9378 VOIDmode, ix86_compare_op0,
9379 ix86_compare_op1);
9381 if (GET_MODE (operands[0]) == SFmode)
9382 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9383 operands[2], operands[3],
9384 ix86_compare_op0, ix86_compare_op1));
9385 else
9386 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9387 operands[2], operands[3],
9388 ix86_compare_op0, ix86_compare_op1));
9389 return 1;
9392 /* The floating point conditional move instructions don't directly
9393 support conditions resulting from a signed integer comparison. */
9395 code = GET_CODE (operands[1]);
9396 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9398 /* The floating point conditional move instructions don't directly
9399 support signed integer comparisons. */
9401 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9403 if (second_test != NULL || bypass_test != NULL)
9404 abort ();
9405 tmp = gen_reg_rtx (QImode);
9406 ix86_expand_setcc (code, tmp);
9407 code = NE;
9408 ix86_compare_op0 = tmp;
9409 ix86_compare_op1 = const0_rtx;
9410 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9412 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9414 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9415 emit_move_insn (tmp, operands[3]);
9416 operands[3] = tmp;
9418 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9420 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9421 emit_move_insn (tmp, operands[2]);
9422 operands[2] = tmp;
9425 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9426 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9427 compare_op,
9428 operands[2],
9429 operands[3])));
9430 if (bypass_test)
9431 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9432 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9433 bypass_test,
9434 operands[3],
9435 operands[0])));
9436 if (second_test)
9437 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9438 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9439 second_test,
9440 operands[2],
9441 operands[0])));
9443 return 1;
9446 /* Expand conditional increment or decrement using adb/sbb instructions.
9447 The default case using setcc followed by the conditional move can be
9448 done by generic code. */
9450 ix86_expand_int_addcc (rtx operands[])
9452 enum rtx_code code = GET_CODE (operands[1]);
9453 rtx compare_op;
9454 rtx val = const0_rtx;
9455 bool fpcmp = false;
9456 enum machine_mode mode = GET_MODE (operands[0]);
9458 if (operands[3] != const1_rtx
9459 && operands[3] != constm1_rtx)
9460 return 0;
9461 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9462 ix86_compare_op1, &compare_op))
9463 return 0;
9464 code = GET_CODE (compare_op);
9466 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9467 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9469 fpcmp = true;
9470 code = ix86_fp_compare_code_to_integer (code);
9473 if (code != LTU)
9475 val = constm1_rtx;
9476 if (fpcmp)
9477 PUT_CODE (compare_op,
9478 reverse_condition_maybe_unordered
9479 (GET_CODE (compare_op)));
9480 else
9481 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9483 PUT_MODE (compare_op, mode);
9485 /* Construct either adc or sbb insn. */
9486 if ((code == LTU) == (operands[3] == constm1_rtx))
9488 switch (GET_MODE (operands[0]))
9490 case QImode:
9491 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9492 break;
9493 case HImode:
9494 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9495 break;
9496 case SImode:
9497 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9498 break;
9499 case DImode:
9500 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9501 break;
9502 default:
9503 abort ();
9506 else
9508 switch (GET_MODE (operands[0]))
9510 case QImode:
9511 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9512 break;
9513 case HImode:
9514 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9515 break;
9516 case SImode:
9517 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9518 break;
9519 case DImode:
9520 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9521 break;
9522 default:
9523 abort ();
9526 return 1; /* DONE */
9530 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9531 works for floating pointer parameters and nonoffsetable memories.
9532 For pushes, it returns just stack offsets; the values will be saved
9533 in the right order. Maximally three parts are generated. */
9535 static int
9536 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9538 int size;
9540 if (!TARGET_64BIT)
9541 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9542 else
9543 size = (GET_MODE_SIZE (mode) + 4) / 8;
9545 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9546 abort ();
9547 if (size < 2 || size > 3)
9548 abort ();
9550 /* Optimize constant pool reference to immediates. This is used by fp
9551 moves, that force all constants to memory to allow combining. */
9552 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9554 rtx tmp = maybe_get_pool_constant (operand);
9555 if (tmp)
9556 operand = tmp;
9559 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9561 /* The only non-offsetable memories we handle are pushes. */
9562 if (! push_operand (operand, VOIDmode))
9563 abort ();
9565 operand = copy_rtx (operand);
9566 PUT_MODE (operand, Pmode);
9567 parts[0] = parts[1] = parts[2] = operand;
9569 else if (!TARGET_64BIT)
9571 if (mode == DImode)
9572 split_di (&operand, 1, &parts[0], &parts[1]);
9573 else
9575 if (REG_P (operand))
9577 if (!reload_completed)
9578 abort ();
9579 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9580 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9581 if (size == 3)
9582 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9584 else if (offsettable_memref_p (operand))
9586 operand = adjust_address (operand, SImode, 0);
9587 parts[0] = operand;
9588 parts[1] = adjust_address (operand, SImode, 4);
9589 if (size == 3)
9590 parts[2] = adjust_address (operand, SImode, 8);
9592 else if (GET_CODE (operand) == CONST_DOUBLE)
9594 REAL_VALUE_TYPE r;
9595 long l[4];
9597 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9598 switch (mode)
9600 case XFmode:
9601 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9602 parts[2] = gen_int_mode (l[2], SImode);
9603 break;
9604 case DFmode:
9605 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9606 break;
9607 default:
9608 abort ();
9610 parts[1] = gen_int_mode (l[1], SImode);
9611 parts[0] = gen_int_mode (l[0], SImode);
9613 else
9614 abort ();
9617 else
9619 if (mode == TImode)
9620 split_ti (&operand, 1, &parts[0], &parts[1]);
9621 if (mode == XFmode || mode == TFmode)
9623 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9624 if (REG_P (operand))
9626 if (!reload_completed)
9627 abort ();
9628 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9629 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9631 else if (offsettable_memref_p (operand))
9633 operand = adjust_address (operand, DImode, 0);
9634 parts[0] = operand;
9635 parts[1] = adjust_address (operand, upper_mode, 8);
9637 else if (GET_CODE (operand) == CONST_DOUBLE)
9639 REAL_VALUE_TYPE r;
9640 long l[3];
9642 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9643 real_to_target (l, &r, mode);
9644 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9645 if (HOST_BITS_PER_WIDE_INT >= 64)
9646 parts[0]
9647 = gen_int_mode
9648 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9649 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9650 DImode);
9651 else
9652 parts[0] = immed_double_const (l[0], l[1], DImode);
9653 if (upper_mode == SImode)
9654 parts[1] = gen_int_mode (l[2], SImode);
9655 else if (HOST_BITS_PER_WIDE_INT >= 64)
9656 parts[1]
9657 = gen_int_mode
9658 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9659 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9660 DImode);
9661 else
9662 parts[1] = immed_double_const (l[2], l[3], DImode);
9664 else
9665 abort ();
9669 return size;
9672 /* Emit insns to perform a move or push of DI, DF, and XF values.
9673 Return false when normal moves are needed; true when all required
9674 insns have been emitted. Operands 2-4 contain the input values
9675 int the correct order; operands 5-7 contain the output values. */
9677 void
9678 ix86_split_long_move (rtx operands[])
9680 rtx part[2][3];
9681 int nparts;
9682 int push = 0;
9683 int collisions = 0;
9684 enum machine_mode mode = GET_MODE (operands[0]);
9686 /* The DFmode expanders may ask us to move double.
9687 For 64bit target this is single move. By hiding the fact
9688 here we simplify i386.md splitters. */
9689 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9691 /* Optimize constant pool reference to immediates. This is used by
9692 fp moves, that force all constants to memory to allow combining. */
9694 if (GET_CODE (operands[1]) == MEM
9695 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9696 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9697 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9698 if (push_operand (operands[0], VOIDmode))
9700 operands[0] = copy_rtx (operands[0]);
9701 PUT_MODE (operands[0], Pmode);
9703 else
9704 operands[0] = gen_lowpart (DImode, operands[0]);
9705 operands[1] = gen_lowpart (DImode, operands[1]);
9706 emit_move_insn (operands[0], operands[1]);
9707 return;
9710 /* The only non-offsettable memory we handle is push. */
9711 if (push_operand (operands[0], VOIDmode))
9712 push = 1;
9713 else if (GET_CODE (operands[0]) == MEM
9714 && ! offsettable_memref_p (operands[0]))
9715 abort ();
9717 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9718 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9720 /* When emitting push, take care for source operands on the stack. */
9721 if (push && GET_CODE (operands[1]) == MEM
9722 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9724 if (nparts == 3)
9725 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9726 XEXP (part[1][2], 0));
9727 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9728 XEXP (part[1][1], 0));
9731 /* We need to do copy in the right order in case an address register
9732 of the source overlaps the destination. */
9733 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9735 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9736 collisions++;
9737 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9738 collisions++;
9739 if (nparts == 3
9740 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9741 collisions++;
9743 /* Collision in the middle part can be handled by reordering. */
9744 if (collisions == 1 && nparts == 3
9745 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9747 rtx tmp;
9748 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9749 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9752 /* If there are more collisions, we can't handle it by reordering.
9753 Do an lea to the last part and use only one colliding move. */
9754 else if (collisions > 1)
9756 rtx base;
9758 collisions = 1;
9760 base = part[0][nparts - 1];
9762 /* Handle the case when the last part isn't valid for lea.
9763 Happens in 64-bit mode storing the 12-byte XFmode. */
9764 if (GET_MODE (base) != Pmode)
9765 base = gen_rtx_REG (Pmode, REGNO (base));
9767 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9768 part[1][0] = replace_equiv_address (part[1][0], base);
9769 part[1][1] = replace_equiv_address (part[1][1],
9770 plus_constant (base, UNITS_PER_WORD));
9771 if (nparts == 3)
9772 part[1][2] = replace_equiv_address (part[1][2],
9773 plus_constant (base, 8));
9777 if (push)
9779 if (!TARGET_64BIT)
9781 if (nparts == 3)
9783 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9784 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
9785 emit_move_insn (part[0][2], part[1][2]);
9788 else
9790 /* In 64bit mode we don't have 32bit push available. In case this is
9791 register, it is OK - we will just use larger counterpart. We also
9792 retype memory - these comes from attempt to avoid REX prefix on
9793 moving of second half of TFmode value. */
9794 if (GET_MODE (part[1][1]) == SImode)
9796 if (GET_CODE (part[1][1]) == MEM)
9797 part[1][1] = adjust_address (part[1][1], DImode, 0);
9798 else if (REG_P (part[1][1]))
9799 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9800 else
9801 abort ();
9802 if (GET_MODE (part[1][0]) == SImode)
9803 part[1][0] = part[1][1];
9806 emit_move_insn (part[0][1], part[1][1]);
9807 emit_move_insn (part[0][0], part[1][0]);
9808 return;
9811 /* Choose correct order to not overwrite the source before it is copied. */
9812 if ((REG_P (part[0][0])
9813 && REG_P (part[1][1])
9814 && (REGNO (part[0][0]) == REGNO (part[1][1])
9815 || (nparts == 3
9816 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9817 || (collisions > 0
9818 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9820 if (nparts == 3)
9822 operands[2] = part[0][2];
9823 operands[3] = part[0][1];
9824 operands[4] = part[0][0];
9825 operands[5] = part[1][2];
9826 operands[6] = part[1][1];
9827 operands[7] = part[1][0];
9829 else
9831 operands[2] = part[0][1];
9832 operands[3] = part[0][0];
9833 operands[5] = part[1][1];
9834 operands[6] = part[1][0];
9837 else
9839 if (nparts == 3)
9841 operands[2] = part[0][0];
9842 operands[3] = part[0][1];
9843 operands[4] = part[0][2];
9844 operands[5] = part[1][0];
9845 operands[6] = part[1][1];
9846 operands[7] = part[1][2];
9848 else
9850 operands[2] = part[0][0];
9851 operands[3] = part[0][1];
9852 operands[5] = part[1][0];
9853 operands[6] = part[1][1];
9856 emit_move_insn (operands[2], operands[5]);
9857 emit_move_insn (operands[3], operands[6]);
9858 if (nparts == 3)
9859 emit_move_insn (operands[4], operands[7]);
9861 return;
9864 void
9865 ix86_split_ashldi (rtx *operands, rtx scratch)
9867 rtx low[2], high[2];
9868 int count;
9870 if (GET_CODE (operands[2]) == CONST_INT)
9872 split_di (operands, 2, low, high);
9873 count = INTVAL (operands[2]) & 63;
9875 if (count >= 32)
9877 emit_move_insn (high[0], low[1]);
9878 emit_move_insn (low[0], const0_rtx);
9880 if (count > 32)
9881 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9883 else
9885 if (!rtx_equal_p (operands[0], operands[1]))
9886 emit_move_insn (operands[0], operands[1]);
9887 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9888 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9891 else
9893 if (!rtx_equal_p (operands[0], operands[1]))
9894 emit_move_insn (operands[0], operands[1]);
9896 split_di (operands, 1, low, high);
9898 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9899 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9901 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9903 if (! no_new_pseudos)
9904 scratch = force_reg (SImode, const0_rtx);
9905 else
9906 emit_move_insn (scratch, const0_rtx);
9908 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9909 scratch));
9911 else
9912 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9916 void
9917 ix86_split_ashrdi (rtx *operands, rtx scratch)
9919 rtx low[2], high[2];
9920 int count;
9922 if (GET_CODE (operands[2]) == CONST_INT)
9924 split_di (operands, 2, low, high);
9925 count = INTVAL (operands[2]) & 63;
9927 if (count == 63)
9929 emit_move_insn (high[0], high[1]);
9930 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9931 emit_move_insn (low[0], high[0]);
9934 else if (count >= 32)
9936 emit_move_insn (low[0], high[1]);
9938 if (! reload_completed)
9939 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9940 else
9942 emit_move_insn (high[0], low[0]);
9943 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9946 if (count > 32)
9947 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9949 else
9951 if (!rtx_equal_p (operands[0], operands[1]))
9952 emit_move_insn (operands[0], operands[1]);
9953 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9954 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9957 else
9959 if (!rtx_equal_p (operands[0], operands[1]))
9960 emit_move_insn (operands[0], operands[1]);
9962 split_di (operands, 1, low, high);
9964 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9965 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9967 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9969 if (! no_new_pseudos)
9970 scratch = gen_reg_rtx (SImode);
9971 emit_move_insn (scratch, high[0]);
9972 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9973 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9974 scratch));
9976 else
9977 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9981 void
9982 ix86_split_lshrdi (rtx *operands, rtx scratch)
9984 rtx low[2], high[2];
9985 int count;
9987 if (GET_CODE (operands[2]) == CONST_INT)
9989 split_di (operands, 2, low, high);
9990 count = INTVAL (operands[2]) & 63;
9992 if (count >= 32)
9994 emit_move_insn (low[0], high[1]);
9995 emit_move_insn (high[0], const0_rtx);
9997 if (count > 32)
9998 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10000 else
10002 if (!rtx_equal_p (operands[0], operands[1]))
10003 emit_move_insn (operands[0], operands[1]);
10004 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10005 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10008 else
10010 if (!rtx_equal_p (operands[0], operands[1]))
10011 emit_move_insn (operands[0], operands[1]);
10013 split_di (operands, 1, low, high);
10015 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10016 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10018 /* Heh. By reversing the arguments, we can reuse this pattern. */
10019 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10021 if (! no_new_pseudos)
10022 scratch = force_reg (SImode, const0_rtx);
10023 else
10024 emit_move_insn (scratch, const0_rtx);
10026 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10027 scratch));
10029 else
10030 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10034 /* Helper function for the string operations below. Dest VARIABLE whether
10035 it is aligned to VALUE bytes. If true, jump to the label. */
10036 static rtx
10037 ix86_expand_aligntest (rtx variable, int value)
10039 rtx label = gen_label_rtx ();
10040 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10041 if (GET_MODE (variable) == DImode)
10042 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10043 else
10044 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10045 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10046 1, label);
10047 return label;
10050 /* Adjust COUNTER by the VALUE. */
10051 static void
10052 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10054 if (GET_MODE (countreg) == DImode)
10055 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10056 else
10057 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10060 /* Zero extend possibly SImode EXP to Pmode register. */
10062 ix86_zero_extend_to_Pmode (rtx exp)
10064 rtx r;
10065 if (GET_MODE (exp) == VOIDmode)
10066 return force_reg (Pmode, exp);
10067 if (GET_MODE (exp) == Pmode)
10068 return copy_to_mode_reg (Pmode, exp);
10069 r = gen_reg_rtx (Pmode);
10070 emit_insn (gen_zero_extendsidi2 (r, exp));
10071 return r;
10074 /* Expand string move (memcpy) operation. Use i386 string operations when
10075 profitable. expand_clrmem contains similar code. */
10077 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10079 rtx srcreg, destreg, countreg, srcexp, destexp;
10080 enum machine_mode counter_mode;
10081 HOST_WIDE_INT align = 0;
10082 unsigned HOST_WIDE_INT count = 0;
10084 if (GET_CODE (align_exp) == CONST_INT)
10085 align = INTVAL (align_exp);
10087 /* Can't use any of this if the user has appropriated esi or edi. */
10088 if (global_regs[4] || global_regs[5])
10089 return 0;
10091 /* This simple hack avoids all inlining code and simplifies code below. */
10092 if (!TARGET_ALIGN_STRINGOPS)
10093 align = 64;
10095 if (GET_CODE (count_exp) == CONST_INT)
10097 count = INTVAL (count_exp);
10098 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10099 return 0;
10102 /* Figure out proper mode for counter. For 32bits it is always SImode,
10103 for 64bits use SImode when possible, otherwise DImode.
10104 Set count to number of bytes copied when known at compile time. */
10105 if (!TARGET_64BIT
10106 || GET_MODE (count_exp) == SImode
10107 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10108 counter_mode = SImode;
10109 else
10110 counter_mode = DImode;
10112 if (counter_mode != SImode && counter_mode != DImode)
10113 abort ();
10115 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10116 if (destreg != XEXP (dst, 0))
10117 dst = replace_equiv_address_nv (dst, destreg);
10118 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10119 if (srcreg != XEXP (src, 0))
10120 src = replace_equiv_address_nv (src, srcreg);
10122 /* When optimizing for size emit simple rep ; movsb instruction for
10123 counts not divisible by 4. */
10125 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10127 emit_insn (gen_cld ());
10128 countreg = ix86_zero_extend_to_Pmode (count_exp);
10129 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10130 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10131 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10132 destexp, srcexp));
10135 /* For constant aligned (or small unaligned) copies use rep movsl
10136 followed by code copying the rest. For PentiumPro ensure 8 byte
10137 alignment to allow rep movsl acceleration. */
10139 else if (count != 0
10140 && (align >= 8
10141 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10142 || optimize_size || count < (unsigned int) 64))
10144 unsigned HOST_WIDE_INT offset = 0;
10145 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10146 rtx srcmem, dstmem;
10148 emit_insn (gen_cld ());
10149 if (count & ~(size - 1))
10151 countreg = copy_to_mode_reg (counter_mode,
10152 GEN_INT ((count >> (size == 4 ? 2 : 3))
10153 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10154 countreg = ix86_zero_extend_to_Pmode (countreg);
10156 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10157 GEN_INT (size == 4 ? 2 : 3));
10158 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10159 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10161 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10162 countreg, destexp, srcexp));
10163 offset = count & ~(size - 1);
10165 if (size == 8 && (count & 0x04))
10167 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10168 offset);
10169 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10170 offset);
10171 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10172 offset += 4;
10174 if (count & 0x02)
10176 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10177 offset);
10178 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10179 offset);
10180 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10181 offset += 2;
10183 if (count & 0x01)
10185 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10186 offset);
10187 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10188 offset);
10189 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10192 /* The generic code based on the glibc implementation:
10193 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10194 allowing accelerated copying there)
10195 - copy the data using rep movsl
10196 - copy the rest. */
10197 else
10199 rtx countreg2;
10200 rtx label = NULL;
10201 rtx srcmem, dstmem;
10202 int desired_alignment = (TARGET_PENTIUMPRO
10203 && (count == 0 || count >= (unsigned int) 260)
10204 ? 8 : UNITS_PER_WORD);
10205 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10206 dst = change_address (dst, BLKmode, destreg);
10207 src = change_address (src, BLKmode, srcreg);
10209 /* In case we don't know anything about the alignment, default to
10210 library version, since it is usually equally fast and result in
10211 shorter code.
10213 Also emit call when we know that the count is large and call overhead
10214 will not be important. */
10215 if (!TARGET_INLINE_ALL_STRINGOPS
10216 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10217 return 0;
10219 if (TARGET_SINGLE_STRINGOP)
10220 emit_insn (gen_cld ());
10222 countreg2 = gen_reg_rtx (Pmode);
10223 countreg = copy_to_mode_reg (counter_mode, count_exp);
10225 /* We don't use loops to align destination and to copy parts smaller
10226 than 4 bytes, because gcc is able to optimize such code better (in
10227 the case the destination or the count really is aligned, gcc is often
10228 able to predict the branches) and also it is friendlier to the
10229 hardware branch prediction.
10231 Using loops is beneficial for generic case, because we can
10232 handle small counts using the loops. Many CPUs (such as Athlon)
10233 have large REP prefix setup costs.
10235 This is quite costly. Maybe we can revisit this decision later or
10236 add some customizability to this code. */
10238 if (count == 0 && align < desired_alignment)
10240 label = gen_label_rtx ();
10241 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10242 LEU, 0, counter_mode, 1, label);
10244 if (align <= 1)
10246 rtx label = ix86_expand_aligntest (destreg, 1);
10247 srcmem = change_address (src, QImode, srcreg);
10248 dstmem = change_address (dst, QImode, destreg);
10249 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10250 ix86_adjust_counter (countreg, 1);
10251 emit_label (label);
10252 LABEL_NUSES (label) = 1;
10254 if (align <= 2)
10256 rtx label = ix86_expand_aligntest (destreg, 2);
10257 srcmem = change_address (src, HImode, srcreg);
10258 dstmem = change_address (dst, HImode, destreg);
10259 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10260 ix86_adjust_counter (countreg, 2);
10261 emit_label (label);
10262 LABEL_NUSES (label) = 1;
10264 if (align <= 4 && desired_alignment > 4)
10266 rtx label = ix86_expand_aligntest (destreg, 4);
10267 srcmem = change_address (src, SImode, srcreg);
10268 dstmem = change_address (dst, SImode, destreg);
10269 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10270 ix86_adjust_counter (countreg, 4);
10271 emit_label (label);
10272 LABEL_NUSES (label) = 1;
10275 if (label && desired_alignment > 4 && !TARGET_64BIT)
10277 emit_label (label);
10278 LABEL_NUSES (label) = 1;
10279 label = NULL_RTX;
10281 if (!TARGET_SINGLE_STRINGOP)
10282 emit_insn (gen_cld ());
10283 if (TARGET_64BIT)
10285 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10286 GEN_INT (3)));
10287 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10289 else
10291 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10292 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10294 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10295 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10296 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10297 countreg2, destexp, srcexp));
10299 if (label)
10301 emit_label (label);
10302 LABEL_NUSES (label) = 1;
10304 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10306 srcmem = change_address (src, SImode, srcreg);
10307 dstmem = change_address (dst, SImode, destreg);
10308 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10310 if ((align <= 4 || count == 0) && TARGET_64BIT)
10312 rtx label = ix86_expand_aligntest (countreg, 4);
10313 srcmem = change_address (src, SImode, srcreg);
10314 dstmem = change_address (dst, SImode, destreg);
10315 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10316 emit_label (label);
10317 LABEL_NUSES (label) = 1;
10319 if (align > 2 && count != 0 && (count & 2))
10321 srcmem = change_address (src, HImode, srcreg);
10322 dstmem = change_address (dst, HImode, destreg);
10323 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10325 if (align <= 2 || count == 0)
10327 rtx label = ix86_expand_aligntest (countreg, 2);
10328 srcmem = change_address (src, HImode, srcreg);
10329 dstmem = change_address (dst, HImode, destreg);
10330 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10331 emit_label (label);
10332 LABEL_NUSES (label) = 1;
10334 if (align > 1 && count != 0 && (count & 1))
10336 srcmem = change_address (src, QImode, srcreg);
10337 dstmem = change_address (dst, QImode, destreg);
10338 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10340 if (align <= 1 || count == 0)
10342 rtx label = ix86_expand_aligntest (countreg, 1);
10343 srcmem = change_address (src, QImode, srcreg);
10344 dstmem = change_address (dst, QImode, destreg);
10345 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10346 emit_label (label);
10347 LABEL_NUSES (label) = 1;
10351 return 1;
10354 /* Expand string clear operation (bzero). Use i386 string operations when
10355 profitable. expand_movmem contains similar code. */
10357 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10359 rtx destreg, zeroreg, countreg, destexp;
10360 enum machine_mode counter_mode;
10361 HOST_WIDE_INT align = 0;
10362 unsigned HOST_WIDE_INT count = 0;
10364 if (GET_CODE (align_exp) == CONST_INT)
10365 align = INTVAL (align_exp);
10367 /* Can't use any of this if the user has appropriated esi. */
10368 if (global_regs[4])
10369 return 0;
10371 /* This simple hack avoids all inlining code and simplifies code below. */
10372 if (!TARGET_ALIGN_STRINGOPS)
10373 align = 32;
10375 if (GET_CODE (count_exp) == CONST_INT)
10377 count = INTVAL (count_exp);
10378 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10379 return 0;
10381 /* Figure out proper mode for counter. For 32bits it is always SImode,
10382 for 64bits use SImode when possible, otherwise DImode.
10383 Set count to number of bytes copied when known at compile time. */
10384 if (!TARGET_64BIT
10385 || GET_MODE (count_exp) == SImode
10386 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10387 counter_mode = SImode;
10388 else
10389 counter_mode = DImode;
10391 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10392 if (destreg != XEXP (dst, 0))
10393 dst = replace_equiv_address_nv (dst, destreg);
10396 /* When optimizing for size emit simple rep ; movsb instruction for
10397 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10398 sequence is 7 bytes long, so if optimizing for size and count is
10399 small enough that some stosl, stosw and stosb instructions without
10400 rep are shorter, fall back into the next if. */
10402 if ((!optimize || optimize_size)
10403 && (count == 0
10404 || ((count & 0x03)
10405 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10407 emit_insn (gen_cld ());
10409 countreg = ix86_zero_extend_to_Pmode (count_exp);
10410 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10411 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10412 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10414 else if (count != 0
10415 && (align >= 8
10416 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10417 || optimize_size || count < (unsigned int) 64))
10419 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10420 unsigned HOST_WIDE_INT offset = 0;
10422 emit_insn (gen_cld ());
10424 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10425 if (count & ~(size - 1))
10427 unsigned HOST_WIDE_INT repcount;
10428 unsigned int max_nonrep;
10430 repcount = count >> (size == 4 ? 2 : 3);
10431 if (!TARGET_64BIT)
10432 repcount &= 0x3fffffff;
10434 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10435 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10436 bytes. In both cases the latter seems to be faster for small
10437 values of N. */
10438 max_nonrep = size == 4 ? 7 : 4;
10439 if (!optimize_size)
10440 switch (ix86_tune)
10442 case PROCESSOR_PENTIUM4:
10443 case PROCESSOR_NOCONA:
10444 max_nonrep = 3;
10445 break;
10446 default:
10447 break;
10450 if (repcount <= max_nonrep)
10451 while (repcount-- > 0)
10453 rtx mem = adjust_automodify_address_nv (dst,
10454 GET_MODE (zeroreg),
10455 destreg, offset);
10456 emit_insn (gen_strset (destreg, mem, zeroreg));
10457 offset += size;
10459 else
10461 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10462 countreg = ix86_zero_extend_to_Pmode (countreg);
10463 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10464 GEN_INT (size == 4 ? 2 : 3));
10465 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10466 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10467 destexp));
10468 offset = count & ~(size - 1);
10471 if (size == 8 && (count & 0x04))
10473 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10474 offset);
10475 emit_insn (gen_strset (destreg, mem,
10476 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10477 offset += 4;
10479 if (count & 0x02)
10481 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10482 offset);
10483 emit_insn (gen_strset (destreg, mem,
10484 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10485 offset += 2;
10487 if (count & 0x01)
10489 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10490 offset);
10491 emit_insn (gen_strset (destreg, mem,
10492 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10495 else
10497 rtx countreg2;
10498 rtx label = NULL;
10499 /* Compute desired alignment of the string operation. */
10500 int desired_alignment = (TARGET_PENTIUMPRO
10501 && (count == 0 || count >= (unsigned int) 260)
10502 ? 8 : UNITS_PER_WORD);
10504 /* In case we don't know anything about the alignment, default to
10505 library version, since it is usually equally fast and result in
10506 shorter code.
10508 Also emit call when we know that the count is large and call overhead
10509 will not be important. */
10510 if (!TARGET_INLINE_ALL_STRINGOPS
10511 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10512 return 0;
10514 if (TARGET_SINGLE_STRINGOP)
10515 emit_insn (gen_cld ());
10517 countreg2 = gen_reg_rtx (Pmode);
10518 countreg = copy_to_mode_reg (counter_mode, count_exp);
10519 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10520 /* Get rid of MEM_OFFSET, it won't be accurate. */
10521 dst = change_address (dst, BLKmode, destreg);
10523 if (count == 0 && align < desired_alignment)
10525 label = gen_label_rtx ();
10526 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10527 LEU, 0, counter_mode, 1, label);
10529 if (align <= 1)
10531 rtx label = ix86_expand_aligntest (destreg, 1);
10532 emit_insn (gen_strset (destreg, dst,
10533 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10534 ix86_adjust_counter (countreg, 1);
10535 emit_label (label);
10536 LABEL_NUSES (label) = 1;
10538 if (align <= 2)
10540 rtx label = ix86_expand_aligntest (destreg, 2);
10541 emit_insn (gen_strset (destreg, dst,
10542 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10543 ix86_adjust_counter (countreg, 2);
10544 emit_label (label);
10545 LABEL_NUSES (label) = 1;
10547 if (align <= 4 && desired_alignment > 4)
10549 rtx label = ix86_expand_aligntest (destreg, 4);
10550 emit_insn (gen_strset (destreg, dst,
10551 (TARGET_64BIT
10552 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10553 : zeroreg)));
10554 ix86_adjust_counter (countreg, 4);
10555 emit_label (label);
10556 LABEL_NUSES (label) = 1;
10559 if (label && desired_alignment > 4 && !TARGET_64BIT)
10561 emit_label (label);
10562 LABEL_NUSES (label) = 1;
10563 label = NULL_RTX;
10566 if (!TARGET_SINGLE_STRINGOP)
10567 emit_insn (gen_cld ());
10568 if (TARGET_64BIT)
10570 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10571 GEN_INT (3)));
10572 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10574 else
10576 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10577 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10579 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10580 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10582 if (label)
10584 emit_label (label);
10585 LABEL_NUSES (label) = 1;
10588 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10589 emit_insn (gen_strset (destreg, dst,
10590 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10591 if (TARGET_64BIT && (align <= 4 || count == 0))
10593 rtx label = ix86_expand_aligntest (countreg, 4);
10594 emit_insn (gen_strset (destreg, dst,
10595 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10596 emit_label (label);
10597 LABEL_NUSES (label) = 1;
10599 if (align > 2 && count != 0 && (count & 2))
10600 emit_insn (gen_strset (destreg, dst,
10601 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10602 if (align <= 2 || count == 0)
10604 rtx label = ix86_expand_aligntest (countreg, 2);
10605 emit_insn (gen_strset (destreg, dst,
10606 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10607 emit_label (label);
10608 LABEL_NUSES (label) = 1;
10610 if (align > 1 && count != 0 && (count & 1))
10611 emit_insn (gen_strset (destreg, dst,
10612 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10613 if (align <= 1 || count == 0)
10615 rtx label = ix86_expand_aligntest (countreg, 1);
10616 emit_insn (gen_strset (destreg, dst,
10617 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10618 emit_label (label);
10619 LABEL_NUSES (label) = 1;
10622 return 1;
10625 /* Expand strlen. */
10627 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10629 rtx addr, scratch1, scratch2, scratch3, scratch4;
10631 /* The generic case of strlen expander is long. Avoid it's
10632 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10634 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10635 && !TARGET_INLINE_ALL_STRINGOPS
10636 && !optimize_size
10637 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10638 return 0;
10640 addr = force_reg (Pmode, XEXP (src, 0));
10641 scratch1 = gen_reg_rtx (Pmode);
10643 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10644 && !optimize_size)
10646 /* Well it seems that some optimizer does not combine a call like
10647 foo(strlen(bar), strlen(bar));
10648 when the move and the subtraction is done here. It does calculate
10649 the length just once when these instructions are done inside of
10650 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10651 often used and I use one fewer register for the lifetime of
10652 output_strlen_unroll() this is better. */
10654 emit_move_insn (out, addr);
10656 ix86_expand_strlensi_unroll_1 (out, src, align);
10658 /* strlensi_unroll_1 returns the address of the zero at the end of
10659 the string, like memchr(), so compute the length by subtracting
10660 the start address. */
10661 if (TARGET_64BIT)
10662 emit_insn (gen_subdi3 (out, out, addr));
10663 else
10664 emit_insn (gen_subsi3 (out, out, addr));
10666 else
10668 rtx unspec;
10669 scratch2 = gen_reg_rtx (Pmode);
10670 scratch3 = gen_reg_rtx (Pmode);
10671 scratch4 = force_reg (Pmode, constm1_rtx);
10673 emit_move_insn (scratch3, addr);
10674 eoschar = force_reg (QImode, eoschar);
10676 emit_insn (gen_cld ());
10677 src = replace_equiv_address_nv (src, scratch3);
10679 /* If .md starts supporting :P, this can be done in .md. */
10680 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10681 scratch4), UNSPEC_SCAS);
10682 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10683 if (TARGET_64BIT)
10685 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10686 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10688 else
10690 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10691 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10694 return 1;
10697 /* Expand the appropriate insns for doing strlen if not just doing
10698 repnz; scasb
10700 out = result, initialized with the start address
10701 align_rtx = alignment of the address.
10702 scratch = scratch register, initialized with the startaddress when
10703 not aligned, otherwise undefined
10705 This is just the body. It needs the initializations mentioned above and
10706 some address computing at the end. These things are done in i386.md. */
10708 static void
10709 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
10711 int align;
10712 rtx tmp;
10713 rtx align_2_label = NULL_RTX;
10714 rtx align_3_label = NULL_RTX;
10715 rtx align_4_label = gen_label_rtx ();
10716 rtx end_0_label = gen_label_rtx ();
10717 rtx mem;
10718 rtx tmpreg = gen_reg_rtx (SImode);
10719 rtx scratch = gen_reg_rtx (SImode);
10720 rtx cmp;
10722 align = 0;
10723 if (GET_CODE (align_rtx) == CONST_INT)
10724 align = INTVAL (align_rtx);
10726 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10728 /* Is there a known alignment and is it less than 4? */
10729 if (align < 4)
10731 rtx scratch1 = gen_reg_rtx (Pmode);
10732 emit_move_insn (scratch1, out);
10733 /* Is there a known alignment and is it not 2? */
10734 if (align != 2)
10736 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10737 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10739 /* Leave just the 3 lower bits. */
10740 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10741 NULL_RTX, 0, OPTAB_WIDEN);
10743 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10744 Pmode, 1, align_4_label);
10745 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
10746 Pmode, 1, align_2_label);
10747 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
10748 Pmode, 1, align_3_label);
10750 else
10752 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10753 check if is aligned to 4 - byte. */
10755 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
10756 NULL_RTX, 0, OPTAB_WIDEN);
10758 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10759 Pmode, 1, align_4_label);
10762 mem = change_address (src, QImode, out);
10764 /* Now compare the bytes. */
10766 /* Compare the first n unaligned byte on a byte per byte basis. */
10767 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10768 QImode, 1, end_0_label);
10770 /* Increment the address. */
10771 if (TARGET_64BIT)
10772 emit_insn (gen_adddi3 (out, out, const1_rtx));
10773 else
10774 emit_insn (gen_addsi3 (out, out, const1_rtx));
10776 /* Not needed with an alignment of 2 */
10777 if (align != 2)
10779 emit_label (align_2_label);
10781 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10782 end_0_label);
10784 if (TARGET_64BIT)
10785 emit_insn (gen_adddi3 (out, out, const1_rtx));
10786 else
10787 emit_insn (gen_addsi3 (out, out, const1_rtx));
10789 emit_label (align_3_label);
10792 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10793 end_0_label);
10795 if (TARGET_64BIT)
10796 emit_insn (gen_adddi3 (out, out, const1_rtx));
10797 else
10798 emit_insn (gen_addsi3 (out, out, const1_rtx));
10801 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10802 align this loop. It gives only huge programs, but does not help to
10803 speed up. */
10804 emit_label (align_4_label);
10806 mem = change_address (src, SImode, out);
10807 emit_move_insn (scratch, mem);
10808 if (TARGET_64BIT)
10809 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10810 else
10811 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10813 /* This formula yields a nonzero result iff one of the bytes is zero.
10814 This saves three branches inside loop and many cycles. */
10816 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10817 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10818 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10819 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10820 gen_int_mode (0x80808080, SImode)));
10821 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10822 align_4_label);
10824 if (TARGET_CMOVE)
10826 rtx reg = gen_reg_rtx (SImode);
10827 rtx reg2 = gen_reg_rtx (Pmode);
10828 emit_move_insn (reg, tmpreg);
10829 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10831 /* If zero is not in the first two bytes, move two bytes forward. */
10832 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10833 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10834 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10835 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10836 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10837 reg,
10838 tmpreg)));
10839 /* Emit lea manually to avoid clobbering of flags. */
10840 emit_insn (gen_rtx_SET (SImode, reg2,
10841 gen_rtx_PLUS (Pmode, out, const2_rtx)));
10843 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10844 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10845 emit_insn (gen_rtx_SET (VOIDmode, out,
10846 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10847 reg2,
10848 out)));
10851 else
10853 rtx end_2_label = gen_label_rtx ();
10854 /* Is zero in the first two bytes? */
10856 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10857 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10858 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10859 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10860 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10861 pc_rtx);
10862 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10863 JUMP_LABEL (tmp) = end_2_label;
10865 /* Not in the first two. Move two bytes forward. */
10866 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10867 if (TARGET_64BIT)
10868 emit_insn (gen_adddi3 (out, out, const2_rtx));
10869 else
10870 emit_insn (gen_addsi3 (out, out, const2_rtx));
10872 emit_label (end_2_label);
10876 /* Avoid branch in fixing the byte. */
10877 tmpreg = gen_lowpart (QImode, tmpreg);
10878 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10879 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
10880 if (TARGET_64BIT)
10881 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
10882 else
10883 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
10885 emit_label (end_0_label);
10888 void
10889 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
10890 rtx callarg2 ATTRIBUTE_UNUSED,
10891 rtx pop, int sibcall)
10893 rtx use = NULL, call;
10895 if (pop == const0_rtx)
10896 pop = NULL;
10897 if (TARGET_64BIT && pop)
10898 abort ();
10900 #if TARGET_MACHO
10901 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10902 fnaddr = machopic_indirect_call_target (fnaddr);
10903 #else
10904 /* Static functions and indirect calls don't need the pic register. */
10905 if (! TARGET_64BIT && flag_pic
10906 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10907 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
10908 use_reg (&use, pic_offset_table_rtx);
10910 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10912 rtx al = gen_rtx_REG (QImode, 0);
10913 emit_move_insn (al, callarg2);
10914 use_reg (&use, al);
10916 #endif /* TARGET_MACHO */
10918 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10920 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10921 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10923 if (sibcall && TARGET_64BIT
10924 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
10926 rtx addr;
10927 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10928 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
10929 emit_move_insn (fnaddr, addr);
10930 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10933 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10934 if (retval)
10935 call = gen_rtx_SET (VOIDmode, retval, call);
10936 if (pop)
10938 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10939 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10940 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10943 call = emit_call_insn (call);
10944 if (use)
10945 CALL_INSN_FUNCTION_USAGE (call) = use;
10949 /* Clear stack slot assignments remembered from previous functions.
10950 This is called from INIT_EXPANDERS once before RTL is emitted for each
10951 function. */
10953 static struct machine_function *
10954 ix86_init_machine_status (void)
10956 struct machine_function *f;
10958 f = ggc_alloc_cleared (sizeof (struct machine_function));
10959 f->use_fast_prologue_epilogue_nregs = -1;
10961 return f;
10964 /* Return a MEM corresponding to a stack slot with mode MODE.
10965 Allocate a new slot if necessary.
10967 The RTL for a function can have several slots available: N is
10968 which slot to use. */
10971 assign_386_stack_local (enum machine_mode mode, int n)
10973 struct stack_local_entry *s;
10975 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10976 abort ();
10978 for (s = ix86_stack_locals; s; s = s->next)
10979 if (s->mode == mode && s->n == n)
10980 return s->rtl;
10982 s = (struct stack_local_entry *)
10983 ggc_alloc (sizeof (struct stack_local_entry));
10984 s->n = n;
10985 s->mode = mode;
10986 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10988 s->next = ix86_stack_locals;
10989 ix86_stack_locals = s;
10990 return s->rtl;
10993 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10995 static GTY(()) rtx ix86_tls_symbol;
10997 ix86_tls_get_addr (void)
11000 if (!ix86_tls_symbol)
11002 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11003 (TARGET_GNU_TLS && !TARGET_64BIT)
11004 ? "___tls_get_addr"
11005 : "__tls_get_addr");
11008 return ix86_tls_symbol;
11011 /* Calculate the length of the memory address in the instruction
11012 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11015 memory_address_length (rtx addr)
11017 struct ix86_address parts;
11018 rtx base, index, disp;
11019 int len;
11021 if (GET_CODE (addr) == PRE_DEC
11022 || GET_CODE (addr) == POST_INC
11023 || GET_CODE (addr) == PRE_MODIFY
11024 || GET_CODE (addr) == POST_MODIFY)
11025 return 0;
11027 if (! ix86_decompose_address (addr, &parts))
11028 abort ();
11030 base = parts.base;
11031 index = parts.index;
11032 disp = parts.disp;
11033 len = 0;
11035 /* Rule of thumb:
11036 - esp as the base always wants an index,
11037 - ebp as the base always wants a displacement. */
11039 /* Register Indirect. */
11040 if (base && !index && !disp)
11042 /* esp (for its index) and ebp (for its displacement) need
11043 the two-byte modrm form. */
11044 if (addr == stack_pointer_rtx
11045 || addr == arg_pointer_rtx
11046 || addr == frame_pointer_rtx
11047 || addr == hard_frame_pointer_rtx)
11048 len = 1;
11051 /* Direct Addressing. */
11052 else if (disp && !base && !index)
11053 len = 4;
11055 else
11057 /* Find the length of the displacement constant. */
11058 if (disp)
11060 if (GET_CODE (disp) == CONST_INT
11061 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11062 && base)
11063 len = 1;
11064 else
11065 len = 4;
11067 /* ebp always wants a displacement. */
11068 else if (base == hard_frame_pointer_rtx)
11069 len = 1;
11071 /* An index requires the two-byte modrm form.... */
11072 if (index
11073 /* ...like esp, which always wants an index. */
11074 || base == stack_pointer_rtx
11075 || base == arg_pointer_rtx
11076 || base == frame_pointer_rtx)
11077 len += 1;
11080 return len;
11083 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11084 is set, expect that insn have 8bit immediate alternative. */
11086 ix86_attr_length_immediate_default (rtx insn, int shortform)
11088 int len = 0;
11089 int i;
11090 extract_insn_cached (insn);
11091 for (i = recog_data.n_operands - 1; i >= 0; --i)
11092 if (CONSTANT_P (recog_data.operand[i]))
11094 if (len)
11095 abort ();
11096 if (shortform
11097 && GET_CODE (recog_data.operand[i]) == CONST_INT
11098 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11099 len = 1;
11100 else
11102 switch (get_attr_mode (insn))
11104 case MODE_QI:
11105 len+=1;
11106 break;
11107 case MODE_HI:
11108 len+=2;
11109 break;
11110 case MODE_SI:
11111 len+=4;
11112 break;
11113 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11114 case MODE_DI:
11115 len+=4;
11116 break;
11117 default:
11118 fatal_insn ("unknown insn mode", insn);
11122 return len;
11124 /* Compute default value for "length_address" attribute. */
11126 ix86_attr_length_address_default (rtx insn)
11128 int i;
11130 if (get_attr_type (insn) == TYPE_LEA)
11132 rtx set = PATTERN (insn);
11133 if (GET_CODE (set) == SET)
11135 else if (GET_CODE (set) == PARALLEL
11136 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11137 set = XVECEXP (set, 0, 0);
11138 else
11140 #ifdef ENABLE_CHECKING
11141 abort ();
11142 #endif
11143 return 0;
11146 return memory_address_length (SET_SRC (set));
11149 extract_insn_cached (insn);
11150 for (i = recog_data.n_operands - 1; i >= 0; --i)
11151 if (GET_CODE (recog_data.operand[i]) == MEM)
11153 return memory_address_length (XEXP (recog_data.operand[i], 0));
11154 break;
11156 return 0;
11159 /* Return the maximum number of instructions a cpu can issue. */
11161 static int
11162 ix86_issue_rate (void)
11164 switch (ix86_tune)
11166 case PROCESSOR_PENTIUM:
11167 case PROCESSOR_K6:
11168 return 2;
11170 case PROCESSOR_PENTIUMPRO:
11171 case PROCESSOR_PENTIUM4:
11172 case PROCESSOR_ATHLON:
11173 case PROCESSOR_K8:
11174 case PROCESSOR_NOCONA:
11175 return 3;
11177 default:
11178 return 1;
11182 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11183 by DEP_INSN and nothing set by DEP_INSN. */
11185 static int
11186 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11188 rtx set, set2;
11190 /* Simplify the test for uninteresting insns. */
11191 if (insn_type != TYPE_SETCC
11192 && insn_type != TYPE_ICMOV
11193 && insn_type != TYPE_FCMOV
11194 && insn_type != TYPE_IBR)
11195 return 0;
11197 if ((set = single_set (dep_insn)) != 0)
11199 set = SET_DEST (set);
11200 set2 = NULL_RTX;
11202 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11203 && XVECLEN (PATTERN (dep_insn), 0) == 2
11204 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11205 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11207 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11208 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11210 else
11211 return 0;
11213 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11214 return 0;
11216 /* This test is true if the dependent insn reads the flags but
11217 not any other potentially set register. */
11218 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11219 return 0;
11221 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11222 return 0;
11224 return 1;
11227 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11228 address with operands set by DEP_INSN. */
11230 static int
11231 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11233 rtx addr;
11235 if (insn_type == TYPE_LEA
11236 && TARGET_PENTIUM)
11238 addr = PATTERN (insn);
11239 if (GET_CODE (addr) == SET)
11241 else if (GET_CODE (addr) == PARALLEL
11242 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11243 addr = XVECEXP (addr, 0, 0);
11244 else
11245 abort ();
11246 addr = SET_SRC (addr);
11248 else
11250 int i;
11251 extract_insn_cached (insn);
11252 for (i = recog_data.n_operands - 1; i >= 0; --i)
11253 if (GET_CODE (recog_data.operand[i]) == MEM)
11255 addr = XEXP (recog_data.operand[i], 0);
11256 goto found;
11258 return 0;
11259 found:;
11262 return modified_in_p (addr, dep_insn);
11265 static int
11266 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11268 enum attr_type insn_type, dep_insn_type;
11269 enum attr_memory memory;
11270 rtx set, set2;
11271 int dep_insn_code_number;
11273 /* Anti and output dependencies have zero cost on all CPUs. */
11274 if (REG_NOTE_KIND (link) != 0)
11275 return 0;
11277 dep_insn_code_number = recog_memoized (dep_insn);
11279 /* If we can't recognize the insns, we can't really do anything. */
11280 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11281 return cost;
11283 insn_type = get_attr_type (insn);
11284 dep_insn_type = get_attr_type (dep_insn);
11286 switch (ix86_tune)
11288 case PROCESSOR_PENTIUM:
11289 /* Address Generation Interlock adds a cycle of latency. */
11290 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11291 cost += 1;
11293 /* ??? Compares pair with jump/setcc. */
11294 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11295 cost = 0;
11297 /* Floating point stores require value to be ready one cycle earlier. */
11298 if (insn_type == TYPE_FMOV
11299 && get_attr_memory (insn) == MEMORY_STORE
11300 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11301 cost += 1;
11302 break;
11304 case PROCESSOR_PENTIUMPRO:
11305 memory = get_attr_memory (insn);
11307 /* INT->FP conversion is expensive. */
11308 if (get_attr_fp_int_src (dep_insn))
11309 cost += 5;
11311 /* There is one cycle extra latency between an FP op and a store. */
11312 if (insn_type == TYPE_FMOV
11313 && (set = single_set (dep_insn)) != NULL_RTX
11314 && (set2 = single_set (insn)) != NULL_RTX
11315 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11316 && GET_CODE (SET_DEST (set2)) == MEM)
11317 cost += 1;
11319 /* Show ability of reorder buffer to hide latency of load by executing
11320 in parallel with previous instruction in case
11321 previous instruction is not needed to compute the address. */
11322 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11323 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11325 /* Claim moves to take one cycle, as core can issue one load
11326 at time and the next load can start cycle later. */
11327 if (dep_insn_type == TYPE_IMOV
11328 || dep_insn_type == TYPE_FMOV)
11329 cost = 1;
11330 else if (cost > 1)
11331 cost--;
11333 break;
11335 case PROCESSOR_K6:
11336 memory = get_attr_memory (insn);
11338 /* The esp dependency is resolved before the instruction is really
11339 finished. */
11340 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11341 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11342 return 1;
11344 /* INT->FP conversion is expensive. */
11345 if (get_attr_fp_int_src (dep_insn))
11346 cost += 5;
11348 /* Show ability of reorder buffer to hide latency of load by executing
11349 in parallel with previous instruction in case
11350 previous instruction is not needed to compute the address. */
11351 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11352 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11354 /* Claim moves to take one cycle, as core can issue one load
11355 at time and the next load can start cycle later. */
11356 if (dep_insn_type == TYPE_IMOV
11357 || dep_insn_type == TYPE_FMOV)
11358 cost = 1;
11359 else if (cost > 2)
11360 cost -= 2;
11361 else
11362 cost = 1;
11364 break;
11366 case PROCESSOR_ATHLON:
11367 case PROCESSOR_K8:
11368 memory = get_attr_memory (insn);
11370 /* Show ability of reorder buffer to hide latency of load by executing
11371 in parallel with previous instruction in case
11372 previous instruction is not needed to compute the address. */
11373 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11374 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11376 enum attr_unit unit = get_attr_unit (insn);
11377 int loadcost = 3;
11379 /* Because of the difference between the length of integer and
11380 floating unit pipeline preparation stages, the memory operands
11381 for floating point are cheaper.
11383 ??? For Athlon it the difference is most probably 2. */
11384 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11385 loadcost = 3;
11386 else
11387 loadcost = TARGET_ATHLON ? 2 : 0;
11389 if (cost >= loadcost)
11390 cost -= loadcost;
11391 else
11392 cost = 0;
11395 default:
11396 break;
11399 return cost;
11402 /* How many alternative schedules to try. This should be as wide as the
11403 scheduling freedom in the DFA, but no wider. Making this value too
11404 large results extra work for the scheduler. */
11406 static int
11407 ia32_multipass_dfa_lookahead (void)
11409 if (ix86_tune == PROCESSOR_PENTIUM)
11410 return 2;
11412 if (ix86_tune == PROCESSOR_PENTIUMPRO
11413 || ix86_tune == PROCESSOR_K6)
11414 return 1;
11416 else
11417 return 0;
11421 /* Compute the alignment given to a constant that is being placed in memory.
11422 EXP is the constant and ALIGN is the alignment that the object would
11423 ordinarily have.
11424 The value of this function is used instead of that alignment to align
11425 the object. */
11428 ix86_constant_alignment (tree exp, int align)
11430 if (TREE_CODE (exp) == REAL_CST)
11432 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11433 return 64;
11434 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11435 return 128;
11437 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11438 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11439 return BITS_PER_WORD;
11441 return align;
11444 /* Compute the alignment for a static variable.
11445 TYPE is the data type, and ALIGN is the alignment that
11446 the object would ordinarily have. The value of this function is used
11447 instead of that alignment to align the object. */
11450 ix86_data_alignment (tree type, int align)
11452 if (AGGREGATE_TYPE_P (type)
11453 && TYPE_SIZE (type)
11454 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11455 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11456 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11457 return 256;
11459 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11460 to 16byte boundary. */
11461 if (TARGET_64BIT)
11463 if (AGGREGATE_TYPE_P (type)
11464 && TYPE_SIZE (type)
11465 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11466 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11467 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11468 return 128;
11471 if (TREE_CODE (type) == ARRAY_TYPE)
11473 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11474 return 64;
11475 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11476 return 128;
11478 else if (TREE_CODE (type) == COMPLEX_TYPE)
11481 if (TYPE_MODE (type) == DCmode && align < 64)
11482 return 64;
11483 if (TYPE_MODE (type) == XCmode && align < 128)
11484 return 128;
11486 else if ((TREE_CODE (type) == RECORD_TYPE
11487 || TREE_CODE (type) == UNION_TYPE
11488 || TREE_CODE (type) == QUAL_UNION_TYPE)
11489 && TYPE_FIELDS (type))
11491 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11492 return 64;
11493 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11494 return 128;
11496 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11497 || TREE_CODE (type) == INTEGER_TYPE)
11499 if (TYPE_MODE (type) == DFmode && align < 64)
11500 return 64;
11501 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11502 return 128;
11505 return align;
11508 /* Compute the alignment for a local variable.
11509 TYPE is the data type, and ALIGN is the alignment that
11510 the object would ordinarily have. The value of this macro is used
11511 instead of that alignment to align the object. */
11514 ix86_local_alignment (tree type, int align)
11516 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11517 to 16byte boundary. */
11518 if (TARGET_64BIT)
11520 if (AGGREGATE_TYPE_P (type)
11521 && TYPE_SIZE (type)
11522 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11523 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11524 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11525 return 128;
11527 if (TREE_CODE (type) == ARRAY_TYPE)
11529 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11530 return 64;
11531 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11532 return 128;
11534 else if (TREE_CODE (type) == COMPLEX_TYPE)
11536 if (TYPE_MODE (type) == DCmode && align < 64)
11537 return 64;
11538 if (TYPE_MODE (type) == XCmode && align < 128)
11539 return 128;
11541 else if ((TREE_CODE (type) == RECORD_TYPE
11542 || TREE_CODE (type) == UNION_TYPE
11543 || TREE_CODE (type) == QUAL_UNION_TYPE)
11544 && TYPE_FIELDS (type))
11546 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11547 return 64;
11548 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11549 return 128;
11551 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11552 || TREE_CODE (type) == INTEGER_TYPE)
11555 if (TYPE_MODE (type) == DFmode && align < 64)
11556 return 64;
11557 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11558 return 128;
11560 return align;
11563 /* Emit RTL insns to initialize the variable parts of a trampoline.
11564 FNADDR is an RTX for the address of the function's pure code.
11565 CXT is an RTX for the static chain value for the function. */
11566 void
11567 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11569 if (!TARGET_64BIT)
11571 /* Compute offset from the end of the jmp to the target function. */
11572 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11573 plus_constant (tramp, 10),
11574 NULL_RTX, 1, OPTAB_DIRECT);
11575 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11576 gen_int_mode (0xb9, QImode));
11577 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11578 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11579 gen_int_mode (0xe9, QImode));
11580 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11582 else
11584 int offset = 0;
11585 /* Try to load address using shorter movl instead of movabs.
11586 We may want to support movq for kernel mode, but kernel does not use
11587 trampolines at the moment. */
11588 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11590 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11591 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11592 gen_int_mode (0xbb41, HImode));
11593 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11594 gen_lowpart (SImode, fnaddr));
11595 offset += 6;
11597 else
11599 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11600 gen_int_mode (0xbb49, HImode));
11601 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11602 fnaddr);
11603 offset += 10;
11605 /* Load static chain using movabs to r10. */
11606 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11607 gen_int_mode (0xba49, HImode));
11608 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11609 cxt);
11610 offset += 10;
11611 /* Jump to the r11 */
11612 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11613 gen_int_mode (0xff49, HImode));
11614 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11615 gen_int_mode (0xe3, QImode));
11616 offset += 3;
11617 if (offset > TRAMPOLINE_SIZE)
11618 abort ();
11621 #ifdef ENABLE_EXECUTE_STACK
11622 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11623 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11624 #endif
11627 #define def_builtin(MASK, NAME, TYPE, CODE) \
11628 do { \
11629 if ((MASK) & target_flags \
11630 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11631 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11632 NULL, NULL_TREE); \
11633 } while (0)
11635 struct builtin_description
11637 const unsigned int mask;
11638 const enum insn_code icode;
11639 const char *const name;
11640 const enum ix86_builtins code;
11641 const enum rtx_code comparison;
11642 const unsigned int flag;
11645 static const struct builtin_description bdesc_comi[] =
11647 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11648 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11649 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11650 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11651 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11652 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11653 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11654 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11655 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11656 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11657 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11658 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11659 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11660 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11661 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11662 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11663 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11664 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11665 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11666 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11667 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11668 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11669 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11670 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11673 static const struct builtin_description bdesc_2arg[] =
11675 /* SSE */
11676 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11677 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11678 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11679 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11680 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11681 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11682 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11683 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11685 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11686 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11687 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11688 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11689 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11690 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11691 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11692 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11693 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11694 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11695 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11696 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11697 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11698 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11699 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11700 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11701 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11702 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11703 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11704 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11706 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11707 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11708 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11709 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11711 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11712 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11713 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11714 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11716 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11717 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11718 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11719 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11720 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11722 /* MMX */
11723 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11724 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11725 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11726 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
11727 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11728 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11729 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11730 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
11732 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11733 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11734 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11735 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11736 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11737 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11738 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11739 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11741 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11742 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11743 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11745 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11746 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11747 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11748 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11750 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11751 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11753 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11754 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11755 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11756 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11757 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11758 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11760 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11761 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11762 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11763 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11765 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11766 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11767 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11768 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11769 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11770 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11772 /* Special. */
11773 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11774 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11775 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11777 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11778 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11779 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
11781 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11782 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11783 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11784 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11785 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11786 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11788 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11789 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11790 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11791 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11792 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11793 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11795 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11796 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11797 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11798 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11800 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11801 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11803 /* SSE2 */
11804 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11805 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11806 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11807 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11808 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11813 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11814 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11815 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11816 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11817 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11818 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11819 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11820 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11821 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11822 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11823 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11824 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11825 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11826 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11827 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11828 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11829 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11830 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11831 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11832 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11834 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11835 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11836 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11837 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11839 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11841 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11845 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11846 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11848 /* SSE2 MMX */
11849 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11850 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11851 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11855 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11858 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11859 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11860 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11861 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11862 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11863 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11864 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11865 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11867 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11868 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11872 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11875 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11878 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11879 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11880 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11881 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11882 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11883 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11885 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11886 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11887 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11888 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11892 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
11894 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11913 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11914 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11923 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11924 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11925 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11928 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11930 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11931 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11933 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
11935 /* SSE3 MMX */
11936 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
11937 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
11938 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
11939 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
11940 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
11941 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
11944 static const struct builtin_description bdesc_1arg[] =
11946 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11947 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11949 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11950 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11951 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11953 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11954 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11955 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
11956 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11957 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11958 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11961 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11962 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11963 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
11965 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11967 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11968 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11970 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11972 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11973 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11974 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11978 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11979 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11980 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
11981 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11985 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
11989 /* SSE3 */
11990 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
11991 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
11992 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
11995 void
11996 ix86_init_builtins (void)
11998 if (TARGET_MMX)
11999 ix86_init_mmx_sse_builtins ();
12002 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12003 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12004 builtins. */
12005 static void
12006 ix86_init_mmx_sse_builtins (void)
12008 const struct builtin_description * d;
12009 size_t i;
12011 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12012 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12013 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12014 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12015 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12016 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12017 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12018 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12019 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12020 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12022 tree pchar_type_node = build_pointer_type (char_type_node);
12023 tree pcchar_type_node = build_pointer_type (
12024 build_type_variant (char_type_node, 1, 0));
12025 tree pfloat_type_node = build_pointer_type (float_type_node);
12026 tree pcfloat_type_node = build_pointer_type (
12027 build_type_variant (float_type_node, 1, 0));
12028 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12029 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12030 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12032 /* Comparisons. */
12033 tree int_ftype_v4sf_v4sf
12034 = build_function_type_list (integer_type_node,
12035 V4SF_type_node, V4SF_type_node, NULL_TREE);
12036 tree v4si_ftype_v4sf_v4sf
12037 = build_function_type_list (V4SI_type_node,
12038 V4SF_type_node, V4SF_type_node, NULL_TREE);
12039 /* MMX/SSE/integer conversions. */
12040 tree int_ftype_v4sf
12041 = build_function_type_list (integer_type_node,
12042 V4SF_type_node, NULL_TREE);
12043 tree int64_ftype_v4sf
12044 = build_function_type_list (long_long_integer_type_node,
12045 V4SF_type_node, NULL_TREE);
12046 tree int_ftype_v8qi
12047 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12048 tree v4sf_ftype_v4sf_int
12049 = build_function_type_list (V4SF_type_node,
12050 V4SF_type_node, integer_type_node, NULL_TREE);
12051 tree v4sf_ftype_v4sf_int64
12052 = build_function_type_list (V4SF_type_node,
12053 V4SF_type_node, long_long_integer_type_node,
12054 NULL_TREE);
12055 tree v4sf_ftype_v4sf_v2si
12056 = build_function_type_list (V4SF_type_node,
12057 V4SF_type_node, V2SI_type_node, NULL_TREE);
12058 tree int_ftype_v4hi_int
12059 = build_function_type_list (integer_type_node,
12060 V4HI_type_node, integer_type_node, NULL_TREE);
12061 tree v4hi_ftype_v4hi_int_int
12062 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12063 integer_type_node, integer_type_node,
12064 NULL_TREE);
12065 /* Miscellaneous. */
12066 tree v8qi_ftype_v4hi_v4hi
12067 = build_function_type_list (V8QI_type_node,
12068 V4HI_type_node, V4HI_type_node, NULL_TREE);
12069 tree v4hi_ftype_v2si_v2si
12070 = build_function_type_list (V4HI_type_node,
12071 V2SI_type_node, V2SI_type_node, NULL_TREE);
12072 tree v4sf_ftype_v4sf_v4sf_int
12073 = build_function_type_list (V4SF_type_node,
12074 V4SF_type_node, V4SF_type_node,
12075 integer_type_node, NULL_TREE);
12076 tree v2si_ftype_v4hi_v4hi
12077 = build_function_type_list (V2SI_type_node,
12078 V4HI_type_node, V4HI_type_node, NULL_TREE);
12079 tree v4hi_ftype_v4hi_int
12080 = build_function_type_list (V4HI_type_node,
12081 V4HI_type_node, integer_type_node, NULL_TREE);
12082 tree v4hi_ftype_v4hi_di
12083 = build_function_type_list (V4HI_type_node,
12084 V4HI_type_node, long_long_unsigned_type_node,
12085 NULL_TREE);
12086 tree v2si_ftype_v2si_di
12087 = build_function_type_list (V2SI_type_node,
12088 V2SI_type_node, long_long_unsigned_type_node,
12089 NULL_TREE);
12090 tree void_ftype_void
12091 = build_function_type (void_type_node, void_list_node);
12092 tree void_ftype_unsigned
12093 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12094 tree void_ftype_unsigned_unsigned
12095 = build_function_type_list (void_type_node, unsigned_type_node,
12096 unsigned_type_node, NULL_TREE);
12097 tree void_ftype_pcvoid_unsigned_unsigned
12098 = build_function_type_list (void_type_node, const_ptr_type_node,
12099 unsigned_type_node, unsigned_type_node,
12100 NULL_TREE);
12101 tree unsigned_ftype_void
12102 = build_function_type (unsigned_type_node, void_list_node);
12103 tree di_ftype_void
12104 = build_function_type (long_long_unsigned_type_node, void_list_node);
12105 tree v4sf_ftype_void
12106 = build_function_type (V4SF_type_node, void_list_node);
12107 tree v2si_ftype_v4sf
12108 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12109 /* Loads/stores. */
12110 tree void_ftype_v8qi_v8qi_pchar
12111 = build_function_type_list (void_type_node,
12112 V8QI_type_node, V8QI_type_node,
12113 pchar_type_node, NULL_TREE);
12114 tree v4sf_ftype_pcfloat
12115 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12116 /* @@@ the type is bogus */
12117 tree v4sf_ftype_v4sf_pv2si
12118 = build_function_type_list (V4SF_type_node,
12119 V4SF_type_node, pv2si_type_node, NULL_TREE);
12120 tree void_ftype_pv2si_v4sf
12121 = build_function_type_list (void_type_node,
12122 pv2si_type_node, V4SF_type_node, NULL_TREE);
12123 tree void_ftype_pfloat_v4sf
12124 = build_function_type_list (void_type_node,
12125 pfloat_type_node, V4SF_type_node, NULL_TREE);
12126 tree void_ftype_pdi_di
12127 = build_function_type_list (void_type_node,
12128 pdi_type_node, long_long_unsigned_type_node,
12129 NULL_TREE);
12130 tree void_ftype_pv2di_v2di
12131 = build_function_type_list (void_type_node,
12132 pv2di_type_node, V2DI_type_node, NULL_TREE);
12133 /* Normal vector unops. */
12134 tree v4sf_ftype_v4sf
12135 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12137 /* Normal vector binops. */
12138 tree v4sf_ftype_v4sf_v4sf
12139 = build_function_type_list (V4SF_type_node,
12140 V4SF_type_node, V4SF_type_node, NULL_TREE);
12141 tree v8qi_ftype_v8qi_v8qi
12142 = build_function_type_list (V8QI_type_node,
12143 V8QI_type_node, V8QI_type_node, NULL_TREE);
12144 tree v4hi_ftype_v4hi_v4hi
12145 = build_function_type_list (V4HI_type_node,
12146 V4HI_type_node, V4HI_type_node, NULL_TREE);
12147 tree v2si_ftype_v2si_v2si
12148 = build_function_type_list (V2SI_type_node,
12149 V2SI_type_node, V2SI_type_node, NULL_TREE);
12150 tree di_ftype_di_di
12151 = build_function_type_list (long_long_unsigned_type_node,
12152 long_long_unsigned_type_node,
12153 long_long_unsigned_type_node, NULL_TREE);
12155 tree v2si_ftype_v2sf
12156 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12157 tree v2sf_ftype_v2si
12158 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12159 tree v2si_ftype_v2si
12160 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12161 tree v2sf_ftype_v2sf
12162 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12163 tree v2sf_ftype_v2sf_v2sf
12164 = build_function_type_list (V2SF_type_node,
12165 V2SF_type_node, V2SF_type_node, NULL_TREE);
12166 tree v2si_ftype_v2sf_v2sf
12167 = build_function_type_list (V2SI_type_node,
12168 V2SF_type_node, V2SF_type_node, NULL_TREE);
12169 tree pint_type_node = build_pointer_type (integer_type_node);
12170 tree pcint_type_node = build_pointer_type (
12171 build_type_variant (integer_type_node, 1, 0));
12172 tree pdouble_type_node = build_pointer_type (double_type_node);
12173 tree pcdouble_type_node = build_pointer_type (
12174 build_type_variant (double_type_node, 1, 0));
12175 tree int_ftype_v2df_v2df
12176 = build_function_type_list (integer_type_node,
12177 V2DF_type_node, V2DF_type_node, NULL_TREE);
12179 tree ti_ftype_void
12180 = build_function_type (intTI_type_node, void_list_node);
12181 tree v2di_ftype_void
12182 = build_function_type (V2DI_type_node, void_list_node);
12183 tree ti_ftype_ti_ti
12184 = build_function_type_list (intTI_type_node,
12185 intTI_type_node, intTI_type_node, NULL_TREE);
12186 tree void_ftype_pcvoid
12187 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12188 tree v2di_ftype_di
12189 = build_function_type_list (V2DI_type_node,
12190 long_long_unsigned_type_node, NULL_TREE);
12191 tree di_ftype_v2di
12192 = build_function_type_list (long_long_unsigned_type_node,
12193 V2DI_type_node, NULL_TREE);
12194 tree v4sf_ftype_v4si
12195 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12196 tree v4si_ftype_v4sf
12197 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12198 tree v2df_ftype_v4si
12199 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12200 tree v4si_ftype_v2df
12201 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12202 tree v2si_ftype_v2df
12203 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12204 tree v4sf_ftype_v2df
12205 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12206 tree v2df_ftype_v2si
12207 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12208 tree v2df_ftype_v4sf
12209 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12210 tree int_ftype_v2df
12211 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12212 tree int64_ftype_v2df
12213 = build_function_type_list (long_long_integer_type_node,
12214 V2DF_type_node, NULL_TREE);
12215 tree v2df_ftype_v2df_int
12216 = build_function_type_list (V2DF_type_node,
12217 V2DF_type_node, integer_type_node, NULL_TREE);
12218 tree v2df_ftype_v2df_int64
12219 = build_function_type_list (V2DF_type_node,
12220 V2DF_type_node, long_long_integer_type_node,
12221 NULL_TREE);
12222 tree v4sf_ftype_v4sf_v2df
12223 = build_function_type_list (V4SF_type_node,
12224 V4SF_type_node, V2DF_type_node, NULL_TREE);
12225 tree v2df_ftype_v2df_v4sf
12226 = build_function_type_list (V2DF_type_node,
12227 V2DF_type_node, V4SF_type_node, NULL_TREE);
12228 tree v2df_ftype_v2df_v2df_int
12229 = build_function_type_list (V2DF_type_node,
12230 V2DF_type_node, V2DF_type_node,
12231 integer_type_node,
12232 NULL_TREE);
12233 tree v2df_ftype_v2df_pv2si
12234 = build_function_type_list (V2DF_type_node,
12235 V2DF_type_node, pv2si_type_node, NULL_TREE);
12236 tree void_ftype_pv2si_v2df
12237 = build_function_type_list (void_type_node,
12238 pv2si_type_node, V2DF_type_node, NULL_TREE);
12239 tree void_ftype_pdouble_v2df
12240 = build_function_type_list (void_type_node,
12241 pdouble_type_node, V2DF_type_node, NULL_TREE);
12242 tree void_ftype_pint_int
12243 = build_function_type_list (void_type_node,
12244 pint_type_node, integer_type_node, NULL_TREE);
12245 tree void_ftype_v16qi_v16qi_pchar
12246 = build_function_type_list (void_type_node,
12247 V16QI_type_node, V16QI_type_node,
12248 pchar_type_node, NULL_TREE);
12249 tree v2df_ftype_pcdouble
12250 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12251 tree v2df_ftype_v2df_v2df
12252 = build_function_type_list (V2DF_type_node,
12253 V2DF_type_node, V2DF_type_node, NULL_TREE);
12254 tree v16qi_ftype_v16qi_v16qi
12255 = build_function_type_list (V16QI_type_node,
12256 V16QI_type_node, V16QI_type_node, NULL_TREE);
12257 tree v8hi_ftype_v8hi_v8hi
12258 = build_function_type_list (V8HI_type_node,
12259 V8HI_type_node, V8HI_type_node, NULL_TREE);
12260 tree v4si_ftype_v4si_v4si
12261 = build_function_type_list (V4SI_type_node,
12262 V4SI_type_node, V4SI_type_node, NULL_TREE);
12263 tree v2di_ftype_v2di_v2di
12264 = build_function_type_list (V2DI_type_node,
12265 V2DI_type_node, V2DI_type_node, NULL_TREE);
12266 tree v2di_ftype_v2df_v2df
12267 = build_function_type_list (V2DI_type_node,
12268 V2DF_type_node, V2DF_type_node, NULL_TREE);
12269 tree v2df_ftype_v2df
12270 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12271 tree v2df_ftype_double
12272 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12273 tree v2df_ftype_double_double
12274 = build_function_type_list (V2DF_type_node,
12275 double_type_node, double_type_node, NULL_TREE);
12276 tree int_ftype_v8hi_int
12277 = build_function_type_list (integer_type_node,
12278 V8HI_type_node, integer_type_node, NULL_TREE);
12279 tree v8hi_ftype_v8hi_int_int
12280 = build_function_type_list (V8HI_type_node,
12281 V8HI_type_node, integer_type_node,
12282 integer_type_node, NULL_TREE);
12283 tree v2di_ftype_v2di_int
12284 = build_function_type_list (V2DI_type_node,
12285 V2DI_type_node, integer_type_node, NULL_TREE);
12286 tree v4si_ftype_v4si_int
12287 = build_function_type_list (V4SI_type_node,
12288 V4SI_type_node, integer_type_node, NULL_TREE);
12289 tree v8hi_ftype_v8hi_int
12290 = build_function_type_list (V8HI_type_node,
12291 V8HI_type_node, integer_type_node, NULL_TREE);
12292 tree v8hi_ftype_v8hi_v2di
12293 = build_function_type_list (V8HI_type_node,
12294 V8HI_type_node, V2DI_type_node, NULL_TREE);
12295 tree v4si_ftype_v4si_v2di
12296 = build_function_type_list (V4SI_type_node,
12297 V4SI_type_node, V2DI_type_node, NULL_TREE);
12298 tree v4si_ftype_v8hi_v8hi
12299 = build_function_type_list (V4SI_type_node,
12300 V8HI_type_node, V8HI_type_node, NULL_TREE);
12301 tree di_ftype_v8qi_v8qi
12302 = build_function_type_list (long_long_unsigned_type_node,
12303 V8QI_type_node, V8QI_type_node, NULL_TREE);
12304 tree di_ftype_v2si_v2si
12305 = build_function_type_list (long_long_unsigned_type_node,
12306 V2SI_type_node, V2SI_type_node, NULL_TREE);
12307 tree v2di_ftype_v16qi_v16qi
12308 = build_function_type_list (V2DI_type_node,
12309 V16QI_type_node, V16QI_type_node, NULL_TREE);
12310 tree v2di_ftype_v4si_v4si
12311 = build_function_type_list (V2DI_type_node,
12312 V4SI_type_node, V4SI_type_node, NULL_TREE);
12313 tree int_ftype_v16qi
12314 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12315 tree v16qi_ftype_pcchar
12316 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12317 tree void_ftype_pchar_v16qi
12318 = build_function_type_list (void_type_node,
12319 pchar_type_node, V16QI_type_node, NULL_TREE);
12320 tree v4si_ftype_pcint
12321 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12322 tree void_ftype_pcint_v4si
12323 = build_function_type_list (void_type_node,
12324 pcint_type_node, V4SI_type_node, NULL_TREE);
12325 tree v2di_ftype_v2di
12326 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12328 tree float80_type;
12329 tree float128_type;
12331 /* The __float80 type. */
12332 if (TYPE_MODE (long_double_type_node) == XFmode)
12333 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12334 "__float80");
12335 else
12337 /* The __float80 type. */
12338 float80_type = make_node (REAL_TYPE);
12339 TYPE_PRECISION (float80_type) = 80;
12340 layout_type (float80_type);
12341 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12344 float128_type = make_node (REAL_TYPE);
12345 TYPE_PRECISION (float128_type) = 128;
12346 layout_type (float128_type);
12347 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12349 /* Add all builtins that are more or less simple operations on two
12350 operands. */
12351 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12353 /* Use one of the operands; the target can have a different mode for
12354 mask-generating compares. */
12355 enum machine_mode mode;
12356 tree type;
12358 if (d->name == 0)
12359 continue;
12360 mode = insn_data[d->icode].operand[1].mode;
12362 switch (mode)
12364 case V16QImode:
12365 type = v16qi_ftype_v16qi_v16qi;
12366 break;
12367 case V8HImode:
12368 type = v8hi_ftype_v8hi_v8hi;
12369 break;
12370 case V4SImode:
12371 type = v4si_ftype_v4si_v4si;
12372 break;
12373 case V2DImode:
12374 type = v2di_ftype_v2di_v2di;
12375 break;
12376 case V2DFmode:
12377 type = v2df_ftype_v2df_v2df;
12378 break;
12379 case TImode:
12380 type = ti_ftype_ti_ti;
12381 break;
12382 case V4SFmode:
12383 type = v4sf_ftype_v4sf_v4sf;
12384 break;
12385 case V8QImode:
12386 type = v8qi_ftype_v8qi_v8qi;
12387 break;
12388 case V4HImode:
12389 type = v4hi_ftype_v4hi_v4hi;
12390 break;
12391 case V2SImode:
12392 type = v2si_ftype_v2si_v2si;
12393 break;
12394 case DImode:
12395 type = di_ftype_di_di;
12396 break;
12398 default:
12399 abort ();
12402 /* Override for comparisons. */
12403 if (d->icode == CODE_FOR_maskcmpv4sf3
12404 || d->icode == CODE_FOR_maskncmpv4sf3
12405 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12406 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12407 type = v4si_ftype_v4sf_v4sf;
12409 if (d->icode == CODE_FOR_maskcmpv2df3
12410 || d->icode == CODE_FOR_maskncmpv2df3
12411 || d->icode == CODE_FOR_vmmaskcmpv2df3
12412 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12413 type = v2di_ftype_v2df_v2df;
12415 def_builtin (d->mask, d->name, type, d->code);
12418 /* Add the remaining MMX insns with somewhat more complicated types. */
12419 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12420 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12421 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12422 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12423 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12425 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12426 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12427 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12429 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12430 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12432 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12433 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12435 /* comi/ucomi insns. */
12436 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12437 if (d->mask == MASK_SSE2)
12438 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12439 else
12440 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12442 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12443 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12444 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12446 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12447 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12448 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12449 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12450 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12451 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12452 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12453 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12454 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12455 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12456 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12458 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12459 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12461 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12463 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12464 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12465 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12466 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12467 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12468 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12470 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12471 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12472 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12473 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12475 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12476 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12477 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12478 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12480 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12482 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12484 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12485 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12486 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12487 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12488 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12489 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12491 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12493 /* Original 3DNow! */
12494 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12495 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12496 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12497 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12498 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12499 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12512 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12513 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12515 /* 3DNow! extension as used in the Athlon CPU. */
12516 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12517 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12518 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12519 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12520 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12521 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12523 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12525 /* SSE2 */
12526 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12529 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12534 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12535 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12538 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12541 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12542 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12546 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12547 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12548 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12549 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12552 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12553 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12554 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12556 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12557 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12561 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12562 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12566 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12567 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12568 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12572 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12573 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12574 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12575 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12577 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12578 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12579 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12581 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12582 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12583 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12584 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12586 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12587 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12588 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12589 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12590 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12591 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12592 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12594 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12595 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12596 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12598 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12599 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12600 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12601 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12602 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12603 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12604 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12606 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12608 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12609 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12611 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12612 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12613 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12615 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12616 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12617 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12619 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12620 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12622 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12623 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12624 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12625 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12627 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12628 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12629 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12630 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12632 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12633 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12635 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12637 /* Prescott New Instructions. */
12638 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12639 void_ftype_pcvoid_unsigned_unsigned,
12640 IX86_BUILTIN_MONITOR);
12641 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12642 void_ftype_unsigned_unsigned,
12643 IX86_BUILTIN_MWAIT);
12644 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12645 v4sf_ftype_v4sf,
12646 IX86_BUILTIN_MOVSHDUP);
12647 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12648 v4sf_ftype_v4sf,
12649 IX86_BUILTIN_MOVSLDUP);
12650 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12651 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12652 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12653 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12654 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12655 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12658 /* Errors in the source file can cause expand_expr to return const0_rtx
12659 where we expect a vector. To avoid crashing, use one of the vector
12660 clear instructions. */
12661 static rtx
12662 safe_vector_operand (rtx x, enum machine_mode mode)
12664 if (x != const0_rtx)
12665 return x;
12666 x = gen_reg_rtx (mode);
12668 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12669 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12670 : gen_rtx_SUBREG (DImode, x, 0)));
12671 else
12672 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12673 : gen_rtx_SUBREG (V4SFmode, x, 0),
12674 CONST0_RTX (V4SFmode)));
12675 return x;
12678 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12680 static rtx
12681 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
12683 rtx pat;
12684 tree arg0 = TREE_VALUE (arglist);
12685 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12686 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12687 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12688 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12689 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12690 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12692 if (VECTOR_MODE_P (mode0))
12693 op0 = safe_vector_operand (op0, mode0);
12694 if (VECTOR_MODE_P (mode1))
12695 op1 = safe_vector_operand (op1, mode1);
12697 if (! target
12698 || GET_MODE (target) != tmode
12699 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12700 target = gen_reg_rtx (tmode);
12702 if (GET_MODE (op1) == SImode && mode1 == TImode)
12704 rtx x = gen_reg_rtx (V4SImode);
12705 emit_insn (gen_sse2_loadd (x, op1));
12706 op1 = gen_lowpart (TImode, x);
12709 /* In case the insn wants input operands in modes different from
12710 the result, abort. */
12711 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12712 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
12713 abort ();
12715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12716 op0 = copy_to_mode_reg (mode0, op0);
12717 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12718 op1 = copy_to_mode_reg (mode1, op1);
12720 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12721 yet one of the two must not be a memory. This is normally enforced
12722 by expanders, but we didn't bother to create one here. */
12723 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12724 op0 = copy_to_mode_reg (mode0, op0);
12726 pat = GEN_FCN (icode) (target, op0, op1);
12727 if (! pat)
12728 return 0;
12729 emit_insn (pat);
12730 return target;
12733 /* Subroutine of ix86_expand_builtin to take care of stores. */
12735 static rtx
12736 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
12738 rtx pat;
12739 tree arg0 = TREE_VALUE (arglist);
12740 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12741 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12742 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12743 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12744 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12746 if (VECTOR_MODE_P (mode1))
12747 op1 = safe_vector_operand (op1, mode1);
12749 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12750 op1 = copy_to_mode_reg (mode1, op1);
12752 pat = GEN_FCN (icode) (op0, op1);
12753 if (pat)
12754 emit_insn (pat);
12755 return 0;
12758 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12760 static rtx
12761 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
12762 rtx target, int do_load)
12764 rtx pat;
12765 tree arg0 = TREE_VALUE (arglist);
12766 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12767 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12768 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12770 if (! target
12771 || GET_MODE (target) != tmode
12772 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12773 target = gen_reg_rtx (tmode);
12774 if (do_load)
12775 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12776 else
12778 if (VECTOR_MODE_P (mode0))
12779 op0 = safe_vector_operand (op0, mode0);
12781 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12782 op0 = copy_to_mode_reg (mode0, op0);
12785 pat = GEN_FCN (icode) (target, op0);
12786 if (! pat)
12787 return 0;
12788 emit_insn (pat);
12789 return target;
12792 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12793 sqrtss, rsqrtss, rcpss. */
12795 static rtx
12796 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
12798 rtx pat;
12799 tree arg0 = TREE_VALUE (arglist);
12800 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12801 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12802 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12804 if (! target
12805 || GET_MODE (target) != tmode
12806 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12807 target = gen_reg_rtx (tmode);
12809 if (VECTOR_MODE_P (mode0))
12810 op0 = safe_vector_operand (op0, mode0);
12812 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12813 op0 = copy_to_mode_reg (mode0, op0);
12815 op1 = op0;
12816 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12817 op1 = copy_to_mode_reg (mode0, op1);
12819 pat = GEN_FCN (icode) (target, op0, op1);
12820 if (! pat)
12821 return 0;
12822 emit_insn (pat);
12823 return target;
12826 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12828 static rtx
12829 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
12830 rtx target)
12832 rtx pat;
12833 tree arg0 = TREE_VALUE (arglist);
12834 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12835 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12836 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12837 rtx op2;
12838 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12839 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12840 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12841 enum rtx_code comparison = d->comparison;
12843 if (VECTOR_MODE_P (mode0))
12844 op0 = safe_vector_operand (op0, mode0);
12845 if (VECTOR_MODE_P (mode1))
12846 op1 = safe_vector_operand (op1, mode1);
12848 /* Swap operands if we have a comparison that isn't available in
12849 hardware. */
12850 if (d->flag)
12852 rtx tmp = gen_reg_rtx (mode1);
12853 emit_move_insn (tmp, op1);
12854 op1 = op0;
12855 op0 = tmp;
12858 if (! target
12859 || GET_MODE (target) != tmode
12860 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12861 target = gen_reg_rtx (tmode);
12863 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12864 op0 = copy_to_mode_reg (mode0, op0);
12865 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12866 op1 = copy_to_mode_reg (mode1, op1);
12868 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12869 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12870 if (! pat)
12871 return 0;
12872 emit_insn (pat);
12873 return target;
12876 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12878 static rtx
12879 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
12880 rtx target)
12882 rtx pat;
12883 tree arg0 = TREE_VALUE (arglist);
12884 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12885 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12886 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12887 rtx op2;
12888 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12889 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12890 enum rtx_code comparison = d->comparison;
12892 if (VECTOR_MODE_P (mode0))
12893 op0 = safe_vector_operand (op0, mode0);
12894 if (VECTOR_MODE_P (mode1))
12895 op1 = safe_vector_operand (op1, mode1);
12897 /* Swap operands if we have a comparison that isn't available in
12898 hardware. */
12899 if (d->flag)
12901 rtx tmp = op1;
12902 op1 = op0;
12903 op0 = tmp;
12906 target = gen_reg_rtx (SImode);
12907 emit_move_insn (target, const0_rtx);
12908 target = gen_rtx_SUBREG (QImode, target, 0);
12910 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12911 op0 = copy_to_mode_reg (mode0, op0);
12912 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12913 op1 = copy_to_mode_reg (mode1, op1);
12915 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12916 pat = GEN_FCN (d->icode) (op0, op1);
12917 if (! pat)
12918 return 0;
12919 emit_insn (pat);
12920 emit_insn (gen_rtx_SET (VOIDmode,
12921 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12922 gen_rtx_fmt_ee (comparison, QImode,
12923 SET_DEST (pat),
12924 const0_rtx)));
12926 return SUBREG_REG (target);
12929 /* Expand an expression EXP that calls a built-in function,
12930 with result going to TARGET if that's convenient
12931 (and in mode MODE if that's convenient).
12932 SUBTARGET may be used as the target for computing one of EXP's operands.
12933 IGNORE is nonzero if the value is to be ignored. */
12936 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12937 enum machine_mode mode ATTRIBUTE_UNUSED,
12938 int ignore ATTRIBUTE_UNUSED)
12940 const struct builtin_description *d;
12941 size_t i;
12942 enum insn_code icode;
12943 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12944 tree arglist = TREE_OPERAND (exp, 1);
12945 tree arg0, arg1, arg2;
12946 rtx op0, op1, op2, pat;
12947 enum machine_mode tmode, mode0, mode1, mode2;
12948 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12950 switch (fcode)
12952 case IX86_BUILTIN_EMMS:
12953 emit_insn (gen_emms ());
12954 return 0;
12956 case IX86_BUILTIN_SFENCE:
12957 emit_insn (gen_sfence ());
12958 return 0;
12960 case IX86_BUILTIN_PEXTRW:
12961 case IX86_BUILTIN_PEXTRW128:
12962 icode = (fcode == IX86_BUILTIN_PEXTRW
12963 ? CODE_FOR_mmx_pextrw
12964 : CODE_FOR_sse2_pextrw);
12965 arg0 = TREE_VALUE (arglist);
12966 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12967 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12968 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12969 tmode = insn_data[icode].operand[0].mode;
12970 mode0 = insn_data[icode].operand[1].mode;
12971 mode1 = insn_data[icode].operand[2].mode;
12973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12974 op0 = copy_to_mode_reg (mode0, op0);
12975 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12977 error ("selector must be an integer constant in the range 0..%i",
12978 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
12979 return gen_reg_rtx (tmode);
12981 if (target == 0
12982 || GET_MODE (target) != tmode
12983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12984 target = gen_reg_rtx (tmode);
12985 pat = GEN_FCN (icode) (target, op0, op1);
12986 if (! pat)
12987 return 0;
12988 emit_insn (pat);
12989 return target;
12991 case IX86_BUILTIN_PINSRW:
12992 case IX86_BUILTIN_PINSRW128:
12993 icode = (fcode == IX86_BUILTIN_PINSRW
12994 ? CODE_FOR_mmx_pinsrw
12995 : CODE_FOR_sse2_pinsrw);
12996 arg0 = TREE_VALUE (arglist);
12997 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12998 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12999 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13000 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13001 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13002 tmode = insn_data[icode].operand[0].mode;
13003 mode0 = insn_data[icode].operand[1].mode;
13004 mode1 = insn_data[icode].operand[2].mode;
13005 mode2 = insn_data[icode].operand[3].mode;
13007 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13008 op0 = copy_to_mode_reg (mode0, op0);
13009 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13010 op1 = copy_to_mode_reg (mode1, op1);
13011 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13013 error ("selector must be an integer constant in the range 0..%i",
13014 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13015 return const0_rtx;
13017 if (target == 0
13018 || GET_MODE (target) != tmode
13019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13020 target = gen_reg_rtx (tmode);
13021 pat = GEN_FCN (icode) (target, op0, op1, op2);
13022 if (! pat)
13023 return 0;
13024 emit_insn (pat);
13025 return target;
13027 case IX86_BUILTIN_MASKMOVQ:
13028 case IX86_BUILTIN_MASKMOVDQU:
13029 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13030 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13031 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13032 : CODE_FOR_sse2_maskmovdqu));
13033 /* Note the arg order is different from the operand order. */
13034 arg1 = TREE_VALUE (arglist);
13035 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13036 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13037 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13038 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13039 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13040 mode0 = insn_data[icode].operand[0].mode;
13041 mode1 = insn_data[icode].operand[1].mode;
13042 mode2 = insn_data[icode].operand[2].mode;
13044 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13045 op0 = copy_to_mode_reg (mode0, op0);
13046 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13047 op1 = copy_to_mode_reg (mode1, op1);
13048 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13049 op2 = copy_to_mode_reg (mode2, op2);
13050 pat = GEN_FCN (icode) (op0, op1, op2);
13051 if (! pat)
13052 return 0;
13053 emit_insn (pat);
13054 return 0;
13056 case IX86_BUILTIN_SQRTSS:
13057 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13058 case IX86_BUILTIN_RSQRTSS:
13059 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13060 case IX86_BUILTIN_RCPSS:
13061 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13063 case IX86_BUILTIN_LOADAPS:
13064 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13066 case IX86_BUILTIN_LOADUPS:
13067 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13069 case IX86_BUILTIN_STOREAPS:
13070 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13072 case IX86_BUILTIN_STOREUPS:
13073 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13075 case IX86_BUILTIN_LOADSS:
13076 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13078 case IX86_BUILTIN_STORESS:
13079 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13081 case IX86_BUILTIN_LOADHPS:
13082 case IX86_BUILTIN_LOADLPS:
13083 case IX86_BUILTIN_LOADHPD:
13084 case IX86_BUILTIN_LOADLPD:
13085 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13086 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13087 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13088 : CODE_FOR_sse2_movsd);
13089 arg0 = TREE_VALUE (arglist);
13090 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13091 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13092 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13093 tmode = insn_data[icode].operand[0].mode;
13094 mode0 = insn_data[icode].operand[1].mode;
13095 mode1 = insn_data[icode].operand[2].mode;
13097 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13098 op0 = copy_to_mode_reg (mode0, op0);
13099 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13100 if (target == 0
13101 || GET_MODE (target) != tmode
13102 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13103 target = gen_reg_rtx (tmode);
13104 pat = GEN_FCN (icode) (target, op0, op1);
13105 if (! pat)
13106 return 0;
13107 emit_insn (pat);
13108 return target;
13110 case IX86_BUILTIN_STOREHPS:
13111 case IX86_BUILTIN_STORELPS:
13112 case IX86_BUILTIN_STOREHPD:
13113 case IX86_BUILTIN_STORELPD:
13114 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13115 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13116 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13117 : CODE_FOR_sse2_movsd);
13118 arg0 = TREE_VALUE (arglist);
13119 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13120 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13121 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13122 mode0 = insn_data[icode].operand[1].mode;
13123 mode1 = insn_data[icode].operand[2].mode;
13125 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13126 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13127 op1 = copy_to_mode_reg (mode1, op1);
13129 pat = GEN_FCN (icode) (op0, op0, op1);
13130 if (! pat)
13131 return 0;
13132 emit_insn (pat);
13133 return 0;
13135 case IX86_BUILTIN_MOVNTPS:
13136 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13137 case IX86_BUILTIN_MOVNTQ:
13138 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13140 case IX86_BUILTIN_LDMXCSR:
13141 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13142 target = assign_386_stack_local (SImode, 0);
13143 emit_move_insn (target, op0);
13144 emit_insn (gen_ldmxcsr (target));
13145 return 0;
13147 case IX86_BUILTIN_STMXCSR:
13148 target = assign_386_stack_local (SImode, 0);
13149 emit_insn (gen_stmxcsr (target));
13150 return copy_to_mode_reg (SImode, target);
13152 case IX86_BUILTIN_SHUFPS:
13153 case IX86_BUILTIN_SHUFPD:
13154 icode = (fcode == IX86_BUILTIN_SHUFPS
13155 ? CODE_FOR_sse_shufps
13156 : CODE_FOR_sse2_shufpd);
13157 arg0 = TREE_VALUE (arglist);
13158 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13159 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13162 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13163 tmode = insn_data[icode].operand[0].mode;
13164 mode0 = insn_data[icode].operand[1].mode;
13165 mode1 = insn_data[icode].operand[2].mode;
13166 mode2 = insn_data[icode].operand[3].mode;
13168 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13169 op0 = copy_to_mode_reg (mode0, op0);
13170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13171 op1 = copy_to_mode_reg (mode1, op1);
13172 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13174 /* @@@ better error message */
13175 error ("mask must be an immediate");
13176 return gen_reg_rtx (tmode);
13178 if (target == 0
13179 || GET_MODE (target) != tmode
13180 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13181 target = gen_reg_rtx (tmode);
13182 pat = GEN_FCN (icode) (target, op0, op1, op2);
13183 if (! pat)
13184 return 0;
13185 emit_insn (pat);
13186 return target;
13188 case IX86_BUILTIN_PSHUFW:
13189 case IX86_BUILTIN_PSHUFD:
13190 case IX86_BUILTIN_PSHUFHW:
13191 case IX86_BUILTIN_PSHUFLW:
13192 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13193 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13194 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13195 : CODE_FOR_mmx_pshufw);
13196 arg0 = TREE_VALUE (arglist);
13197 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13198 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13199 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13200 tmode = insn_data[icode].operand[0].mode;
13201 mode1 = insn_data[icode].operand[1].mode;
13202 mode2 = insn_data[icode].operand[2].mode;
13204 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13205 op0 = copy_to_mode_reg (mode1, op0);
13206 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13208 /* @@@ better error message */
13209 error ("mask must be an immediate");
13210 return const0_rtx;
13212 if (target == 0
13213 || GET_MODE (target) != tmode
13214 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13215 target = gen_reg_rtx (tmode);
13216 pat = GEN_FCN (icode) (target, op0, op1);
13217 if (! pat)
13218 return 0;
13219 emit_insn (pat);
13220 return target;
13222 case IX86_BUILTIN_PSLLDQI128:
13223 case IX86_BUILTIN_PSRLDQI128:
13224 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13225 : CODE_FOR_sse2_lshrti3);
13226 arg0 = TREE_VALUE (arglist);
13227 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13228 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13229 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13230 tmode = insn_data[icode].operand[0].mode;
13231 mode1 = insn_data[icode].operand[1].mode;
13232 mode2 = insn_data[icode].operand[2].mode;
13234 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13236 op0 = copy_to_reg (op0);
13237 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13239 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13241 error ("shift must be an immediate");
13242 return const0_rtx;
13244 target = gen_reg_rtx (V2DImode);
13245 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13246 if (! pat)
13247 return 0;
13248 emit_insn (pat);
13249 return target;
13251 case IX86_BUILTIN_FEMMS:
13252 emit_insn (gen_femms ());
13253 return NULL_RTX;
13255 case IX86_BUILTIN_PAVGUSB:
13256 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13258 case IX86_BUILTIN_PF2ID:
13259 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13261 case IX86_BUILTIN_PFACC:
13262 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13264 case IX86_BUILTIN_PFADD:
13265 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13267 case IX86_BUILTIN_PFCMPEQ:
13268 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13270 case IX86_BUILTIN_PFCMPGE:
13271 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13273 case IX86_BUILTIN_PFCMPGT:
13274 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13276 case IX86_BUILTIN_PFMAX:
13277 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13279 case IX86_BUILTIN_PFMIN:
13280 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13282 case IX86_BUILTIN_PFMUL:
13283 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13285 case IX86_BUILTIN_PFRCP:
13286 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13288 case IX86_BUILTIN_PFRCPIT1:
13289 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13291 case IX86_BUILTIN_PFRCPIT2:
13292 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13294 case IX86_BUILTIN_PFRSQIT1:
13295 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13297 case IX86_BUILTIN_PFRSQRT:
13298 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13300 case IX86_BUILTIN_PFSUB:
13301 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13303 case IX86_BUILTIN_PFSUBR:
13304 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13306 case IX86_BUILTIN_PI2FD:
13307 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13309 case IX86_BUILTIN_PMULHRW:
13310 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13312 case IX86_BUILTIN_PF2IW:
13313 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13315 case IX86_BUILTIN_PFNACC:
13316 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13318 case IX86_BUILTIN_PFPNACC:
13319 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13321 case IX86_BUILTIN_PI2FW:
13322 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13324 case IX86_BUILTIN_PSWAPDSI:
13325 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13327 case IX86_BUILTIN_PSWAPDSF:
13328 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13330 case IX86_BUILTIN_SSE_ZERO:
13331 target = gen_reg_rtx (V4SFmode);
13332 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13333 return target;
13335 case IX86_BUILTIN_MMX_ZERO:
13336 target = gen_reg_rtx (DImode);
13337 emit_insn (gen_mmx_clrdi (target));
13338 return target;
13340 case IX86_BUILTIN_CLRTI:
13341 target = gen_reg_rtx (V2DImode);
13342 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13343 return target;
13346 case IX86_BUILTIN_SQRTSD:
13347 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13348 case IX86_BUILTIN_LOADAPD:
13349 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13350 case IX86_BUILTIN_LOADUPD:
13351 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13353 case IX86_BUILTIN_STOREAPD:
13354 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13355 case IX86_BUILTIN_STOREUPD:
13356 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13358 case IX86_BUILTIN_LOADSD:
13359 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13361 case IX86_BUILTIN_STORESD:
13362 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13364 case IX86_BUILTIN_SETPD1:
13365 target = assign_386_stack_local (DFmode, 0);
13366 arg0 = TREE_VALUE (arglist);
13367 emit_move_insn (adjust_address (target, DFmode, 0),
13368 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13369 op0 = gen_reg_rtx (V2DFmode);
13370 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13371 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13372 return op0;
13374 case IX86_BUILTIN_SETPD:
13375 target = assign_386_stack_local (V2DFmode, 0);
13376 arg0 = TREE_VALUE (arglist);
13377 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13378 emit_move_insn (adjust_address (target, DFmode, 0),
13379 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13380 emit_move_insn (adjust_address (target, DFmode, 8),
13381 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13382 op0 = gen_reg_rtx (V2DFmode);
13383 emit_insn (gen_sse2_movapd (op0, target));
13384 return op0;
13386 case IX86_BUILTIN_LOADRPD:
13387 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13388 gen_reg_rtx (V2DFmode), 1);
13389 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13390 return target;
13392 case IX86_BUILTIN_LOADPD1:
13393 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13394 gen_reg_rtx (V2DFmode), 1);
13395 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13396 return target;
13398 case IX86_BUILTIN_STOREPD1:
13399 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13400 case IX86_BUILTIN_STORERPD:
13401 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13403 case IX86_BUILTIN_CLRPD:
13404 target = gen_reg_rtx (V2DFmode);
13405 emit_insn (gen_sse_clrv2df (target));
13406 return target;
13408 case IX86_BUILTIN_MFENCE:
13409 emit_insn (gen_sse2_mfence ());
13410 return 0;
13411 case IX86_BUILTIN_LFENCE:
13412 emit_insn (gen_sse2_lfence ());
13413 return 0;
13415 case IX86_BUILTIN_CLFLUSH:
13416 arg0 = TREE_VALUE (arglist);
13417 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13418 icode = CODE_FOR_sse2_clflush;
13419 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13420 op0 = copy_to_mode_reg (Pmode, op0);
13422 emit_insn (gen_sse2_clflush (op0));
13423 return 0;
13425 case IX86_BUILTIN_MOVNTPD:
13426 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13427 case IX86_BUILTIN_MOVNTDQ:
13428 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13429 case IX86_BUILTIN_MOVNTI:
13430 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13432 case IX86_BUILTIN_LOADDQA:
13433 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13434 case IX86_BUILTIN_LOADDQU:
13435 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13436 case IX86_BUILTIN_LOADD:
13437 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13439 case IX86_BUILTIN_STOREDQA:
13440 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13441 case IX86_BUILTIN_STOREDQU:
13442 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13443 case IX86_BUILTIN_STORED:
13444 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13446 case IX86_BUILTIN_MONITOR:
13447 arg0 = TREE_VALUE (arglist);
13448 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13449 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13450 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13451 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13452 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13453 if (!REG_P (op0))
13454 op0 = copy_to_mode_reg (SImode, op0);
13455 if (!REG_P (op1))
13456 op1 = copy_to_mode_reg (SImode, op1);
13457 if (!REG_P (op2))
13458 op2 = copy_to_mode_reg (SImode, op2);
13459 emit_insn (gen_monitor (op0, op1, op2));
13460 return 0;
13462 case IX86_BUILTIN_MWAIT:
13463 arg0 = TREE_VALUE (arglist);
13464 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13465 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13466 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13467 if (!REG_P (op0))
13468 op0 = copy_to_mode_reg (SImode, op0);
13469 if (!REG_P (op1))
13470 op1 = copy_to_mode_reg (SImode, op1);
13471 emit_insn (gen_mwait (op0, op1));
13472 return 0;
13474 case IX86_BUILTIN_LOADDDUP:
13475 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13477 case IX86_BUILTIN_LDDQU:
13478 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13481 default:
13482 break;
13485 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13486 if (d->code == fcode)
13488 /* Compares are treated specially. */
13489 if (d->icode == CODE_FOR_maskcmpv4sf3
13490 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13491 || d->icode == CODE_FOR_maskncmpv4sf3
13492 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13493 || d->icode == CODE_FOR_maskcmpv2df3
13494 || d->icode == CODE_FOR_vmmaskcmpv2df3
13495 || d->icode == CODE_FOR_maskncmpv2df3
13496 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13497 return ix86_expand_sse_compare (d, arglist, target);
13499 return ix86_expand_binop_builtin (d->icode, arglist, target);
13502 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13503 if (d->code == fcode)
13504 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13506 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13507 if (d->code == fcode)
13508 return ix86_expand_sse_comi (d, arglist, target);
13510 /* @@@ Should really do something sensible here. */
13511 return 0;
13514 /* Store OPERAND to the memory after reload is completed. This means
13515 that we can't easily use assign_stack_local. */
13517 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13519 rtx result;
13520 if (!reload_completed)
13521 abort ();
13522 if (TARGET_RED_ZONE)
13524 result = gen_rtx_MEM (mode,
13525 gen_rtx_PLUS (Pmode,
13526 stack_pointer_rtx,
13527 GEN_INT (-RED_ZONE_SIZE)));
13528 emit_move_insn (result, operand);
13530 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13532 switch (mode)
13534 case HImode:
13535 case SImode:
13536 operand = gen_lowpart (DImode, operand);
13537 /* FALLTHRU */
13538 case DImode:
13539 emit_insn (
13540 gen_rtx_SET (VOIDmode,
13541 gen_rtx_MEM (DImode,
13542 gen_rtx_PRE_DEC (DImode,
13543 stack_pointer_rtx)),
13544 operand));
13545 break;
13546 default:
13547 abort ();
13549 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13551 else
13553 switch (mode)
13555 case DImode:
13557 rtx operands[2];
13558 split_di (&operand, 1, operands, operands + 1);
13559 emit_insn (
13560 gen_rtx_SET (VOIDmode,
13561 gen_rtx_MEM (SImode,
13562 gen_rtx_PRE_DEC (Pmode,
13563 stack_pointer_rtx)),
13564 operands[1]));
13565 emit_insn (
13566 gen_rtx_SET (VOIDmode,
13567 gen_rtx_MEM (SImode,
13568 gen_rtx_PRE_DEC (Pmode,
13569 stack_pointer_rtx)),
13570 operands[0]));
13572 break;
13573 case HImode:
13574 /* It is better to store HImodes as SImodes. */
13575 if (!TARGET_PARTIAL_REG_STALL)
13576 operand = gen_lowpart (SImode, operand);
13577 /* FALLTHRU */
13578 case SImode:
13579 emit_insn (
13580 gen_rtx_SET (VOIDmode,
13581 gen_rtx_MEM (GET_MODE (operand),
13582 gen_rtx_PRE_DEC (SImode,
13583 stack_pointer_rtx)),
13584 operand));
13585 break;
13586 default:
13587 abort ();
13589 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13591 return result;
13594 /* Free operand from the memory. */
13595 void
13596 ix86_free_from_memory (enum machine_mode mode)
13598 if (!TARGET_RED_ZONE)
13600 int size;
13602 if (mode == DImode || TARGET_64BIT)
13603 size = 8;
13604 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13605 size = 2;
13606 else
13607 size = 4;
13608 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13609 to pop or add instruction if registers are available. */
13610 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13611 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13612 GEN_INT (size))));
13616 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13617 QImode must go into class Q_REGS.
13618 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13619 movdf to do mem-to-mem moves through integer regs. */
13620 enum reg_class
13621 ix86_preferred_reload_class (rtx x, enum reg_class class)
13623 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13624 return NO_REGS;
13625 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13627 /* SSE can't load any constant directly yet. */
13628 if (SSE_CLASS_P (class))
13629 return NO_REGS;
13630 /* Floats can load 0 and 1. */
13631 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13633 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13634 if (MAYBE_SSE_CLASS_P (class))
13635 return (reg_class_subset_p (class, GENERAL_REGS)
13636 ? GENERAL_REGS : FLOAT_REGS);
13637 else
13638 return class;
13640 /* General regs can load everything. */
13641 if (reg_class_subset_p (class, GENERAL_REGS))
13642 return GENERAL_REGS;
13643 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13644 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13645 return NO_REGS;
13647 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13648 return NO_REGS;
13649 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13650 return Q_REGS;
13651 return class;
13654 /* If we are copying between general and FP registers, we need a memory
13655 location. The same is true for SSE and MMX registers.
13657 The macro can't work reliably when one of the CLASSES is class containing
13658 registers from multiple units (SSE, MMX, integer). We avoid this by never
13659 combining those units in single alternative in the machine description.
13660 Ensure that this constraint holds to avoid unexpected surprises.
13662 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13663 enforce these sanity checks. */
13665 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13666 enum machine_mode mode, int strict)
13668 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13669 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13670 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13671 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13672 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13673 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13675 if (strict)
13676 abort ();
13677 else
13678 return 1;
13680 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13681 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13682 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13683 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13684 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
13686 /* Return the cost of moving data from a register in class CLASS1 to
13687 one in class CLASS2.
13689 It is not required that the cost always equal 2 when FROM is the same as TO;
13690 on some machines it is expensive to move between registers if they are not
13691 general registers. */
13693 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13694 enum reg_class class2)
13696 /* In case we require secondary memory, compute cost of the store followed
13697 by load. In order to avoid bad register allocation choices, we need
13698 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13700 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13702 int cost = 1;
13704 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13705 MEMORY_MOVE_COST (mode, class1, 1));
13706 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13707 MEMORY_MOVE_COST (mode, class2, 1));
13709 /* In case of copying from general_purpose_register we may emit multiple
13710 stores followed by single load causing memory size mismatch stall.
13711 Count this as arbitrarily high cost of 20. */
13712 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13713 cost += 20;
13715 /* In the case of FP/MMX moves, the registers actually overlap, and we
13716 have to switch modes in order to treat them differently. */
13717 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13718 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13719 cost += 20;
13721 return cost;
13724 /* Moves between SSE/MMX and integer unit are expensive. */
13725 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13726 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13727 return ix86_cost->mmxsse_to_integer;
13728 if (MAYBE_FLOAT_CLASS_P (class1))
13729 return ix86_cost->fp_move;
13730 if (MAYBE_SSE_CLASS_P (class1))
13731 return ix86_cost->sse_move;
13732 if (MAYBE_MMX_CLASS_P (class1))
13733 return ix86_cost->mmx_move;
13734 return 2;
13737 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13739 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
13741 /* Flags and only flags can only hold CCmode values. */
13742 if (CC_REGNO_P (regno))
13743 return GET_MODE_CLASS (mode) == MODE_CC;
13744 if (GET_MODE_CLASS (mode) == MODE_CC
13745 || GET_MODE_CLASS (mode) == MODE_RANDOM
13746 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13747 return 0;
13748 if (FP_REGNO_P (regno))
13749 return VALID_FP_MODE_P (mode);
13750 if (SSE_REGNO_P (regno))
13751 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
13752 if (MMX_REGNO_P (regno))
13753 return (TARGET_MMX
13754 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
13755 /* We handle both integer and floats in the general purpose registers.
13756 In future we should be able to handle vector modes as well. */
13757 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13758 return 0;
13759 /* Take care for QImode values - they can be in non-QI regs, but then
13760 they do cause partial register stalls. */
13761 if (regno < 4 || mode != QImode || TARGET_64BIT)
13762 return 1;
13763 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13766 /* Return the cost of moving data of mode M between a
13767 register and memory. A value of 2 is the default; this cost is
13768 relative to those in `REGISTER_MOVE_COST'.
13770 If moving between registers and memory is more expensive than
13771 between two registers, you should define this macro to express the
13772 relative cost.
13774 Model also increased moving costs of QImode registers in non
13775 Q_REGS classes.
13778 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
13780 if (FLOAT_CLASS_P (class))
13782 int index;
13783 switch (mode)
13785 case SFmode:
13786 index = 0;
13787 break;
13788 case DFmode:
13789 index = 1;
13790 break;
13791 case XFmode:
13792 index = 2;
13793 break;
13794 default:
13795 return 100;
13797 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13799 if (SSE_CLASS_P (class))
13801 int index;
13802 switch (GET_MODE_SIZE (mode))
13804 case 4:
13805 index = 0;
13806 break;
13807 case 8:
13808 index = 1;
13809 break;
13810 case 16:
13811 index = 2;
13812 break;
13813 default:
13814 return 100;
13816 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13818 if (MMX_CLASS_P (class))
13820 int index;
13821 switch (GET_MODE_SIZE (mode))
13823 case 4:
13824 index = 0;
13825 break;
13826 case 8:
13827 index = 1;
13828 break;
13829 default:
13830 return 100;
13832 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13834 switch (GET_MODE_SIZE (mode))
13836 case 1:
13837 if (in)
13838 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13839 : ix86_cost->movzbl_load);
13840 else
13841 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13842 : ix86_cost->int_store[0] + 4);
13843 break;
13844 case 2:
13845 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13846 default:
13847 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13848 if (mode == TFmode)
13849 mode = XFmode;
13850 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13851 * (((int) GET_MODE_SIZE (mode)
13852 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
13856 /* Compute a (partial) cost for rtx X. Return true if the complete
13857 cost has been computed, and false if subexpressions should be
13858 scanned. In either case, *TOTAL contains the cost result. */
13860 static bool
13861 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
13863 enum machine_mode mode = GET_MODE (x);
13865 switch (code)
13867 case CONST_INT:
13868 case CONST:
13869 case LABEL_REF:
13870 case SYMBOL_REF:
13871 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
13872 *total = 3;
13873 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
13874 *total = 2;
13875 else if (flag_pic && SYMBOLIC_CONST (x)
13876 && (!TARGET_64BIT
13877 || (!GET_CODE (x) != LABEL_REF
13878 && (GET_CODE (x) != SYMBOL_REF
13879 || !SYMBOL_REF_LOCAL_P (x)))))
13880 *total = 1;
13881 else
13882 *total = 0;
13883 return true;
13885 case CONST_DOUBLE:
13886 if (mode == VOIDmode)
13887 *total = 0;
13888 else
13889 switch (standard_80387_constant_p (x))
13891 case 1: /* 0.0 */
13892 *total = 1;
13893 break;
13894 default: /* Other constants */
13895 *total = 2;
13896 break;
13897 case 0:
13898 case -1:
13899 /* Start with (MEM (SYMBOL_REF)), since that's where
13900 it'll probably end up. Add a penalty for size. */
13901 *total = (COSTS_N_INSNS (1)
13902 + (flag_pic != 0 && !TARGET_64BIT)
13903 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
13904 break;
13906 return true;
13908 case ZERO_EXTEND:
13909 /* The zero extensions is often completely free on x86_64, so make
13910 it as cheap as possible. */
13911 if (TARGET_64BIT && mode == DImode
13912 && GET_MODE (XEXP (x, 0)) == SImode)
13913 *total = 1;
13914 else if (TARGET_ZERO_EXTEND_WITH_AND)
13915 *total = COSTS_N_INSNS (ix86_cost->add);
13916 else
13917 *total = COSTS_N_INSNS (ix86_cost->movzx);
13918 return false;
13920 case SIGN_EXTEND:
13921 *total = COSTS_N_INSNS (ix86_cost->movsx);
13922 return false;
13924 case ASHIFT:
13925 if (GET_CODE (XEXP (x, 1)) == CONST_INT
13926 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
13928 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
13929 if (value == 1)
13931 *total = COSTS_N_INSNS (ix86_cost->add);
13932 return false;
13934 if ((value == 2 || value == 3)
13935 && ix86_cost->lea <= ix86_cost->shift_const)
13937 *total = COSTS_N_INSNS (ix86_cost->lea);
13938 return false;
13941 /* FALLTHRU */
13943 case ROTATE:
13944 case ASHIFTRT:
13945 case LSHIFTRT:
13946 case ROTATERT:
13947 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
13949 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13951 if (INTVAL (XEXP (x, 1)) > 32)
13952 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
13953 else
13954 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
13956 else
13958 if (GET_CODE (XEXP (x, 1)) == AND)
13959 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
13960 else
13961 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
13964 else
13966 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13967 *total = COSTS_N_INSNS (ix86_cost->shift_const);
13968 else
13969 *total = COSTS_N_INSNS (ix86_cost->shift_var);
13971 return false;
13973 case MULT:
13974 if (FLOAT_MODE_P (mode))
13976 *total = COSTS_N_INSNS (ix86_cost->fmul);
13977 return false;
13979 else
13981 rtx op0 = XEXP (x, 0);
13982 rtx op1 = XEXP (x, 1);
13983 int nbits;
13984 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13986 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
13987 for (nbits = 0; value != 0; value &= value - 1)
13988 nbits++;
13990 else
13991 /* This is arbitrary. */
13992 nbits = 7;
13994 /* Compute costs correctly for widening multiplication. */
13995 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
13996 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
13997 == GET_MODE_SIZE (mode))
13999 int is_mulwiden = 0;
14000 enum machine_mode inner_mode = GET_MODE (op0);
14002 if (GET_CODE (op0) == GET_CODE (op1))
14003 is_mulwiden = 1, op1 = XEXP (op1, 0);
14004 else if (GET_CODE (op1) == CONST_INT)
14006 if (GET_CODE (op0) == SIGN_EXTEND)
14007 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14008 == INTVAL (op1);
14009 else
14010 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14013 if (is_mulwiden)
14014 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14017 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14018 + nbits * ix86_cost->mult_bit)
14019 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14021 return true;
14024 case DIV:
14025 case UDIV:
14026 case MOD:
14027 case UMOD:
14028 if (FLOAT_MODE_P (mode))
14029 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14030 else
14031 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14032 return false;
14034 case PLUS:
14035 if (FLOAT_MODE_P (mode))
14036 *total = COSTS_N_INSNS (ix86_cost->fadd);
14037 else if (GET_MODE_CLASS (mode) == MODE_INT
14038 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14040 if (GET_CODE (XEXP (x, 0)) == PLUS
14041 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14042 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14043 && CONSTANT_P (XEXP (x, 1)))
14045 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14046 if (val == 2 || val == 4 || val == 8)
14048 *total = COSTS_N_INSNS (ix86_cost->lea);
14049 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14050 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14051 outer_code);
14052 *total += rtx_cost (XEXP (x, 1), outer_code);
14053 return true;
14056 else if (GET_CODE (XEXP (x, 0)) == MULT
14057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14059 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14060 if (val == 2 || val == 4 || val == 8)
14062 *total = COSTS_N_INSNS (ix86_cost->lea);
14063 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14064 *total += rtx_cost (XEXP (x, 1), outer_code);
14065 return true;
14068 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14070 *total = COSTS_N_INSNS (ix86_cost->lea);
14071 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14072 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14073 *total += rtx_cost (XEXP (x, 1), outer_code);
14074 return true;
14077 /* FALLTHRU */
14079 case MINUS:
14080 if (FLOAT_MODE_P (mode))
14082 *total = COSTS_N_INSNS (ix86_cost->fadd);
14083 return false;
14085 /* FALLTHRU */
14087 case AND:
14088 case IOR:
14089 case XOR:
14090 if (!TARGET_64BIT && mode == DImode)
14092 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14093 + (rtx_cost (XEXP (x, 0), outer_code)
14094 << (GET_MODE (XEXP (x, 0)) != DImode))
14095 + (rtx_cost (XEXP (x, 1), outer_code)
14096 << (GET_MODE (XEXP (x, 1)) != DImode)));
14097 return true;
14099 /* FALLTHRU */
14101 case NEG:
14102 if (FLOAT_MODE_P (mode))
14104 *total = COSTS_N_INSNS (ix86_cost->fchs);
14105 return false;
14107 /* FALLTHRU */
14109 case NOT:
14110 if (!TARGET_64BIT && mode == DImode)
14111 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14112 else
14113 *total = COSTS_N_INSNS (ix86_cost->add);
14114 return false;
14116 case FLOAT_EXTEND:
14117 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14118 *total = 0;
14119 return false;
14121 case ABS:
14122 if (FLOAT_MODE_P (mode))
14123 *total = COSTS_N_INSNS (ix86_cost->fabs);
14124 return false;
14126 case SQRT:
14127 if (FLOAT_MODE_P (mode))
14128 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14129 return false;
14131 case UNSPEC:
14132 if (XINT (x, 1) == UNSPEC_TP)
14133 *total = 0;
14134 return false;
14136 default:
14137 return false;
14141 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14142 static void
14143 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14145 init_section ();
14146 fputs ("\tpushl $", asm_out_file);
14147 assemble_name (asm_out_file, XSTR (symbol, 0));
14148 fputc ('\n', asm_out_file);
14150 #endif
14152 #if TARGET_MACHO
14154 static int current_machopic_label_num;
14156 /* Given a symbol name and its associated stub, write out the
14157 definition of the stub. */
14159 void
14160 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14162 unsigned int length;
14163 char *binder_name, *symbol_name, lazy_ptr_name[32];
14164 int label = ++current_machopic_label_num;
14166 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14167 symb = (*targetm.strip_name_encoding) (symb);
14169 length = strlen (stub);
14170 binder_name = alloca (length + 32);
14171 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14173 length = strlen (symb);
14174 symbol_name = alloca (length + 32);
14175 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14177 sprintf (lazy_ptr_name, "L%d$lz", label);
14179 if (MACHOPIC_PURE)
14180 machopic_picsymbol_stub_section ();
14181 else
14182 machopic_symbol_stub_section ();
14184 fprintf (file, "%s:\n", stub);
14185 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14187 if (MACHOPIC_PURE)
14189 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14190 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14191 fprintf (file, "\tjmp %%edx\n");
14193 else
14194 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14196 fprintf (file, "%s:\n", binder_name);
14198 if (MACHOPIC_PURE)
14200 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14201 fprintf (file, "\tpushl %%eax\n");
14203 else
14204 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14206 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14208 machopic_lazy_symbol_ptr_section ();
14209 fprintf (file, "%s:\n", lazy_ptr_name);
14210 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14211 fprintf (file, "\t.long %s\n", binder_name);
14213 #endif /* TARGET_MACHO */
14215 /* Order the registers for register allocator. */
14217 void
14218 x86_order_regs_for_local_alloc (void)
14220 int pos = 0;
14221 int i;
14223 /* First allocate the local general purpose registers. */
14224 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14225 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14226 reg_alloc_order [pos++] = i;
14228 /* Global general purpose registers. */
14229 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14230 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14231 reg_alloc_order [pos++] = i;
14233 /* x87 registers come first in case we are doing FP math
14234 using them. */
14235 if (!TARGET_SSE_MATH)
14236 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14237 reg_alloc_order [pos++] = i;
14239 /* SSE registers. */
14240 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14241 reg_alloc_order [pos++] = i;
14242 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14243 reg_alloc_order [pos++] = i;
14245 /* x87 registers. */
14246 if (TARGET_SSE_MATH)
14247 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14248 reg_alloc_order [pos++] = i;
14250 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14251 reg_alloc_order [pos++] = i;
14253 /* Initialize the rest of array as we do not allocate some registers
14254 at all. */
14255 while (pos < FIRST_PSEUDO_REGISTER)
14256 reg_alloc_order [pos++] = 0;
14259 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14260 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14261 #endif
14263 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14264 struct attribute_spec.handler. */
14265 static tree
14266 ix86_handle_struct_attribute (tree *node, tree name,
14267 tree args ATTRIBUTE_UNUSED,
14268 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14270 tree *type = NULL;
14271 if (DECL_P (*node))
14273 if (TREE_CODE (*node) == TYPE_DECL)
14274 type = &TREE_TYPE (*node);
14276 else
14277 type = node;
14279 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14280 || TREE_CODE (*type) == UNION_TYPE)))
14282 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14283 *no_add_attrs = true;
14286 else if ((is_attribute_p ("ms_struct", name)
14287 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14288 || ((is_attribute_p ("gcc_struct", name)
14289 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14291 warning ("`%s' incompatible attribute ignored",
14292 IDENTIFIER_POINTER (name));
14293 *no_add_attrs = true;
14296 return NULL_TREE;
14299 static bool
14300 ix86_ms_bitfield_layout_p (tree record_type)
14302 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14303 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14304 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14307 /* Returns an expression indicating where the this parameter is
14308 located on entry to the FUNCTION. */
14310 static rtx
14311 x86_this_parameter (tree function)
14313 tree type = TREE_TYPE (function);
14315 if (TARGET_64BIT)
14317 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14318 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14321 if (ix86_function_regparm (type, function) > 0)
14323 tree parm;
14325 parm = TYPE_ARG_TYPES (type);
14326 /* Figure out whether or not the function has a variable number of
14327 arguments. */
14328 for (; parm; parm = TREE_CHAIN (parm))
14329 if (TREE_VALUE (parm) == void_type_node)
14330 break;
14331 /* If not, the this parameter is in the first argument. */
14332 if (parm)
14334 int regno = 0;
14335 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14336 regno = 2;
14337 return gen_rtx_REG (SImode, regno);
14341 if (aggregate_value_p (TREE_TYPE (type), type))
14342 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14343 else
14344 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14347 /* Determine whether x86_output_mi_thunk can succeed. */
14349 static bool
14350 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14351 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14352 HOST_WIDE_INT vcall_offset, tree function)
14354 /* 64-bit can handle anything. */
14355 if (TARGET_64BIT)
14356 return true;
14358 /* For 32-bit, everything's fine if we have one free register. */
14359 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14360 return true;
14362 /* Need a free register for vcall_offset. */
14363 if (vcall_offset)
14364 return false;
14366 /* Need a free register for GOT references. */
14367 if (flag_pic && !(*targetm.binds_local_p) (function))
14368 return false;
14370 /* Otherwise ok. */
14371 return true;
14374 /* Output the assembler code for a thunk function. THUNK_DECL is the
14375 declaration for the thunk function itself, FUNCTION is the decl for
14376 the target function. DELTA is an immediate constant offset to be
14377 added to THIS. If VCALL_OFFSET is nonzero, the word at
14378 *(*this + vcall_offset) should be added to THIS. */
14380 static void
14381 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14382 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14383 HOST_WIDE_INT vcall_offset, tree function)
14385 rtx xops[3];
14386 rtx this = x86_this_parameter (function);
14387 rtx this_reg, tmp;
14389 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14390 pull it in now and let DELTA benefit. */
14391 if (REG_P (this))
14392 this_reg = this;
14393 else if (vcall_offset)
14395 /* Put the this parameter into %eax. */
14396 xops[0] = this;
14397 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14398 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14400 else
14401 this_reg = NULL_RTX;
14403 /* Adjust the this parameter by a fixed constant. */
14404 if (delta)
14406 xops[0] = GEN_INT (delta);
14407 xops[1] = this_reg ? this_reg : this;
14408 if (TARGET_64BIT)
14410 if (!x86_64_general_operand (xops[0], DImode))
14412 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14413 xops[1] = tmp;
14414 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14415 xops[0] = tmp;
14416 xops[1] = this;
14418 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14420 else
14421 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14424 /* Adjust the this parameter by a value stored in the vtable. */
14425 if (vcall_offset)
14427 if (TARGET_64BIT)
14428 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14429 else
14431 int tmp_regno = 2 /* ECX */;
14432 if (lookup_attribute ("fastcall",
14433 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14434 tmp_regno = 0 /* EAX */;
14435 tmp = gen_rtx_REG (SImode, tmp_regno);
14438 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14439 xops[1] = tmp;
14440 if (TARGET_64BIT)
14441 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14442 else
14443 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14445 /* Adjust the this parameter. */
14446 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14447 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14449 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14450 xops[0] = GEN_INT (vcall_offset);
14451 xops[1] = tmp2;
14452 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14453 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14455 xops[1] = this_reg;
14456 if (TARGET_64BIT)
14457 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14458 else
14459 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14462 /* If necessary, drop THIS back to its stack slot. */
14463 if (this_reg && this_reg != this)
14465 xops[0] = this_reg;
14466 xops[1] = this;
14467 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14470 xops[0] = XEXP (DECL_RTL (function), 0);
14471 if (TARGET_64BIT)
14473 if (!flag_pic || (*targetm.binds_local_p) (function))
14474 output_asm_insn ("jmp\t%P0", xops);
14475 else
14477 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14478 tmp = gen_rtx_CONST (Pmode, tmp);
14479 tmp = gen_rtx_MEM (QImode, tmp);
14480 xops[0] = tmp;
14481 output_asm_insn ("jmp\t%A0", xops);
14484 else
14486 if (!flag_pic || (*targetm.binds_local_p) (function))
14487 output_asm_insn ("jmp\t%P0", xops);
14488 else
14489 #if TARGET_MACHO
14490 if (TARGET_MACHO)
14492 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14493 tmp = (gen_rtx_SYMBOL_REF
14494 (Pmode,
14495 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14496 tmp = gen_rtx_MEM (QImode, tmp);
14497 xops[0] = tmp;
14498 output_asm_insn ("jmp\t%0", xops);
14500 else
14501 #endif /* TARGET_MACHO */
14503 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14504 output_set_got (tmp);
14506 xops[1] = tmp;
14507 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14508 output_asm_insn ("jmp\t{*}%1", xops);
14513 static void
14514 x86_file_start (void)
14516 default_file_start ();
14517 if (X86_FILE_START_VERSION_DIRECTIVE)
14518 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14519 if (X86_FILE_START_FLTUSED)
14520 fputs ("\t.global\t__fltused\n", asm_out_file);
14521 if (ix86_asm_dialect == ASM_INTEL)
14522 fputs ("\t.intel_syntax\n", asm_out_file);
14526 x86_field_alignment (tree field, int computed)
14528 enum machine_mode mode;
14529 tree type = TREE_TYPE (field);
14531 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14532 return computed;
14533 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14534 ? get_inner_array_type (type) : type);
14535 if (mode == DFmode || mode == DCmode
14536 || GET_MODE_CLASS (mode) == MODE_INT
14537 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14538 return MIN (32, computed);
14539 return computed;
14542 /* Output assembler code to FILE to increment profiler label # LABELNO
14543 for profiling a function entry. */
14544 void
14545 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14547 if (TARGET_64BIT)
14548 if (flag_pic)
14550 #ifndef NO_PROFILE_COUNTERS
14551 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14552 #endif
14553 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14555 else
14557 #ifndef NO_PROFILE_COUNTERS
14558 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14559 #endif
14560 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14562 else if (flag_pic)
14564 #ifndef NO_PROFILE_COUNTERS
14565 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14566 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14567 #endif
14568 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14570 else
14572 #ifndef NO_PROFILE_COUNTERS
14573 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14574 PROFILE_COUNT_REGISTER);
14575 #endif
14576 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14580 /* We don't have exact information about the insn sizes, but we may assume
14581 quite safely that we are informed about all 1 byte insns and memory
14582 address sizes. This is enough to eliminate unnecessary padding in
14583 99% of cases. */
14585 static int
14586 min_insn_size (rtx insn)
14588 int l = 0;
14590 if (!INSN_P (insn) || !active_insn_p (insn))
14591 return 0;
14593 /* Discard alignments we've emit and jump instructions. */
14594 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14595 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14596 return 0;
14597 if (GET_CODE (insn) == JUMP_INSN
14598 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14599 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14600 return 0;
14602 /* Important case - calls are always 5 bytes.
14603 It is common to have many calls in the row. */
14604 if (GET_CODE (insn) == CALL_INSN
14605 && symbolic_reference_mentioned_p (PATTERN (insn))
14606 && !SIBLING_CALL_P (insn))
14607 return 5;
14608 if (get_attr_length (insn) <= 1)
14609 return 1;
14611 /* For normal instructions we may rely on the sizes of addresses
14612 and the presence of symbol to require 4 bytes of encoding.
14613 This is not the case for jumps where references are PC relative. */
14614 if (GET_CODE (insn) != JUMP_INSN)
14616 l = get_attr_length_address (insn);
14617 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14618 l = 4;
14620 if (l)
14621 return 1+l;
14622 else
14623 return 2;
14626 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14627 window. */
14629 static void
14630 ix86_avoid_jump_misspredicts (void)
14632 rtx insn, start = get_insns ();
14633 int nbytes = 0, njumps = 0;
14634 int isjump = 0;
14636 /* Look for all minimal intervals of instructions containing 4 jumps.
14637 The intervals are bounded by START and INSN. NBYTES is the total
14638 size of instructions in the interval including INSN and not including
14639 START. When the NBYTES is smaller than 16 bytes, it is possible
14640 that the end of START and INSN ends up in the same 16byte page.
14642 The smallest offset in the page INSN can start is the case where START
14643 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14644 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14646 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14649 nbytes += min_insn_size (insn);
14650 if (dump_file)
14651 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
14652 INSN_UID (insn), min_insn_size (insn));
14653 if ((GET_CODE (insn) == JUMP_INSN
14654 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14655 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14656 || GET_CODE (insn) == CALL_INSN)
14657 njumps++;
14658 else
14659 continue;
14661 while (njumps > 3)
14663 start = NEXT_INSN (start);
14664 if ((GET_CODE (start) == JUMP_INSN
14665 && GET_CODE (PATTERN (start)) != ADDR_VEC
14666 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14667 || GET_CODE (start) == CALL_INSN)
14668 njumps--, isjump = 1;
14669 else
14670 isjump = 0;
14671 nbytes -= min_insn_size (start);
14673 if (njumps < 0)
14674 abort ();
14675 if (dump_file)
14676 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
14677 INSN_UID (start), INSN_UID (insn), nbytes);
14679 if (njumps == 3 && isjump && nbytes < 16)
14681 int padsize = 15 - nbytes + min_insn_size (insn);
14683 if (dump_file)
14684 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14685 INSN_UID (insn), padsize);
14686 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14691 /* AMD Athlon works faster
14692 when RET is not destination of conditional jump or directly preceded
14693 by other jump instruction. We avoid the penalty by inserting NOP just
14694 before the RET instructions in such cases. */
14695 static void
14696 ix86_pad_returns (void)
14698 edge e;
14700 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14702 basic_block bb = e->src;
14703 rtx ret = BB_END (bb);
14704 rtx prev;
14705 bool replace = false;
14707 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
14708 || !maybe_hot_bb_p (bb))
14709 continue;
14710 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14711 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14712 break;
14713 if (prev && GET_CODE (prev) == CODE_LABEL)
14715 edge e;
14716 for (e = bb->pred; e; e = e->pred_next)
14717 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14718 && !(e->flags & EDGE_FALLTHRU))
14719 replace = true;
14721 if (!replace)
14723 prev = prev_active_insn (ret);
14724 if (prev
14725 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
14726 || GET_CODE (prev) == CALL_INSN))
14727 replace = true;
14728 /* Empty functions get branch mispredict even when the jump destination
14729 is not visible to us. */
14730 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14731 replace = true;
14733 if (replace)
14735 emit_insn_before (gen_return_internal_long (), ret);
14736 delete_insn (ret);
14741 /* Implement machine specific optimizations. We implement padding of returns
14742 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14743 static void
14744 ix86_reorg (void)
14746 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
14747 ix86_pad_returns ();
14748 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
14749 ix86_avoid_jump_misspredicts ();
14752 /* Return nonzero when QImode register that must be represented via REX prefix
14753 is used. */
14754 bool
14755 x86_extended_QIreg_mentioned_p (rtx insn)
14757 int i;
14758 extract_insn_cached (insn);
14759 for (i = 0; i < recog_data.n_operands; i++)
14760 if (REG_P (recog_data.operand[i])
14761 && REGNO (recog_data.operand[i]) >= 4)
14762 return true;
14763 return false;
14766 /* Return nonzero when P points to register encoded via REX prefix.
14767 Called via for_each_rtx. */
14768 static int
14769 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
14771 unsigned int regno;
14772 if (!REG_P (*p))
14773 return 0;
14774 regno = REGNO (*p);
14775 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14778 /* Return true when INSN mentions register that must be encoded using REX
14779 prefix. */
14780 bool
14781 x86_extended_reg_mentioned_p (rtx insn)
14783 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14786 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
14787 optabs would emit if we didn't have TFmode patterns. */
14789 void
14790 x86_emit_floatuns (rtx operands[2])
14792 rtx neglab, donelab, i0, i1, f0, in, out;
14793 enum machine_mode mode, inmode;
14795 inmode = GET_MODE (operands[1]);
14796 if (inmode != SImode
14797 && inmode != DImode)
14798 abort ();
14800 out = operands[0];
14801 in = force_reg (inmode, operands[1]);
14802 mode = GET_MODE (out);
14803 neglab = gen_label_rtx ();
14804 donelab = gen_label_rtx ();
14805 i1 = gen_reg_rtx (Pmode);
14806 f0 = gen_reg_rtx (mode);
14808 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
14810 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
14811 emit_jump_insn (gen_jump (donelab));
14812 emit_barrier ();
14814 emit_label (neglab);
14816 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
14817 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
14818 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
14819 expand_float (f0, i0, 0);
14820 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
14822 emit_label (donelab);
14825 /* Initialize vector TARGET via VALS. */
14826 void
14827 ix86_expand_vector_init (rtx target, rtx vals)
14829 enum machine_mode mode = GET_MODE (target);
14830 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
14831 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
14832 int i;
14834 for (i = n_elts - 1; i >= 0; i--)
14835 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
14836 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
14837 break;
14839 /* Few special cases first...
14840 ... constants are best loaded from constant pool. */
14841 if (i < 0)
14843 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
14844 return;
14847 /* ... values where only first field is non-constant are best loaded
14848 from the pool and overwritten via move later. */
14849 if (!i)
14851 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
14852 GET_MODE_INNER (mode), 0);
14854 op = force_reg (mode, op);
14855 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
14856 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
14857 switch (GET_MODE (target))
14859 case V2DFmode:
14860 emit_insn (gen_sse2_movsd (target, target, op));
14861 break;
14862 case V4SFmode:
14863 emit_insn (gen_sse_movss (target, target, op));
14864 break;
14865 default:
14866 break;
14868 return;
14871 /* And the busy sequence doing rotations. */
14872 switch (GET_MODE (target))
14874 case V2DFmode:
14876 rtx vecop0 =
14877 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
14878 rtx vecop1 =
14879 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
14881 vecop0 = force_reg (V2DFmode, vecop0);
14882 vecop1 = force_reg (V2DFmode, vecop1);
14883 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
14885 break;
14886 case V4SFmode:
14888 rtx vecop0 =
14889 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
14890 rtx vecop1 =
14891 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
14892 rtx vecop2 =
14893 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
14894 rtx vecop3 =
14895 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
14896 rtx tmp1 = gen_reg_rtx (V4SFmode);
14897 rtx tmp2 = gen_reg_rtx (V4SFmode);
14899 vecop0 = force_reg (V4SFmode, vecop0);
14900 vecop1 = force_reg (V4SFmode, vecop1);
14901 vecop2 = force_reg (V4SFmode, vecop2);
14902 vecop3 = force_reg (V4SFmode, vecop3);
14903 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
14904 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
14905 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
14907 break;
14908 default:
14909 abort ();
14913 /* Worker function for TARGET_MD_ASM_CLOBBERS.
14915 We do this in the new i386 backend to maintain source compatibility
14916 with the old cc0-based compiler. */
14918 static tree
14919 ix86_md_asm_clobbers (tree clobbers)
14921 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
14922 clobbers);
14923 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
14924 clobbers);
14925 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
14926 clobbers);
14927 return clobbers;
14930 /* Worker function for REVERSE_CONDITION. */
14932 enum rtx_code
14933 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
14935 return (mode != CCFPmode && mode != CCFPUmode
14936 ? reverse_condition (code)
14937 : reverse_condition_maybe_unordered (code));
14940 /* Output code to perform an x87 FP register move, from OPERANDS[1]
14941 to OPERANDS[0]. */
14943 const char *
14944 output_387_reg_move (rtx insn, rtx *operands)
14946 if (REG_P (operands[1])
14947 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14949 if (REGNO (operands[0]) == FIRST_STACK_REG
14950 && TARGET_USE_FFREEP)
14951 return "ffreep\t%y0";
14952 return "fstp\t%y0";
14954 if (STACK_TOP_P (operands[0]))
14955 return "fld%z1\t%y1";
14956 return "fst\t%y0";
14959 /* Output code to perform a conditional jump to LABEL, if C2 flag in
14960 FP status register is set. */
14962 void
14963 ix86_emit_fp_unordered_jump (rtx label)
14965 rtx reg = gen_reg_rtx (HImode);
14966 rtx temp;
14968 emit_insn (gen_x86_fnstsw_1 (reg));
14970 if (TARGET_USE_SAHF)
14972 emit_insn (gen_x86_sahf_1 (reg));
14974 temp = gen_rtx_REG (CCmode, FLAGS_REG);
14975 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
14977 else
14979 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
14981 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14982 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
14985 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
14986 gen_rtx_LABEL_REF (VOIDmode, label),
14987 pc_rtx);
14988 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
14989 emit_jump_insn (temp);
14992 /* Output code to perform a log1p XFmode calculation. */
14994 void ix86_emit_i387_log1p (rtx op0, rtx op1)
14996 rtx label1 = gen_label_rtx ();
14997 rtx label2 = gen_label_rtx ();
14999 rtx tmp = gen_reg_rtx (XFmode);
15000 rtx tmp2 = gen_reg_rtx (XFmode);
15002 emit_insn (gen_absxf2 (tmp, op1));
15003 emit_insn (gen_cmpxf (tmp,
15004 CONST_DOUBLE_FROM_REAL_VALUE (
15005 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15006 XFmode)));
15007 emit_jump_insn (gen_bge (label1));
15009 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15010 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15011 emit_jump (label2);
15013 emit_label (label1);
15014 emit_move_insn (tmp, CONST1_RTX (XFmode));
15015 emit_insn (gen_addxf3 (tmp, op1, tmp));
15016 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15017 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15019 emit_label (label2);
15022 #include "gt-i386.h"