* rtl.h (MEM_READONLY_P): Replace RTX_UNCHANGING_P.
[official-gcc.git] / gcc / config / i386 / i386.c
blob908076721699883f69c963ea57fc63ce7fd5949c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
738 [arguments]
739 <- ARG_POINTER
740 saved pc
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
746 [padding1] \
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
752 [padding2] /
754 struct ix86_frame
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 char internal_label_prefix[16];
833 int internal_label_prefix_len;
835 static void output_pic_addr_const (FILE *, rtx, int);
836 static void put_condition_code (enum rtx_code, enum machine_mode,
837 int, int, FILE *);
838 static const char *get_some_local_dynamic_name (void);
839 static int get_some_local_dynamic_name_1 (rtx *, void *);
840 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
841 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
842 rtx *);
843 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
844 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
845 enum machine_mode);
846 static rtx get_thread_pointer (int);
847 static rtx legitimize_tls_address (rtx, enum tls_model, int);
848 static void get_pc_thunk_name (char [32], unsigned int);
849 static rtx gen_push (rtx);
850 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
851 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
852 static struct machine_function * ix86_init_machine_status (void);
853 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
854 static int ix86_nsaved_regs (void);
855 static void ix86_emit_save_regs (void);
856 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
857 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
858 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
859 static HOST_WIDE_INT ix86_GOT_alias_set (void);
860 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
861 static rtx ix86_expand_aligntest (rtx, int);
862 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
863 static int ix86_issue_rate (void);
864 static int ix86_adjust_cost (rtx, rtx, rtx, int);
865 static int ia32_multipass_dfa_lookahead (void);
866 static void ix86_init_mmx_sse_builtins (void);
867 static rtx x86_this_parameter (tree);
868 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
869 HOST_WIDE_INT, tree);
870 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
871 static void x86_file_start (void);
872 static void ix86_reorg (void);
873 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
874 static tree ix86_build_builtin_va_list (void);
875 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
876 tree, int *, int);
877 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
879 static int ix86_address_cost (rtx);
880 static bool ix86_cannot_force_const_mem (rtx);
881 static rtx ix86_delegitimize_address (rtx);
883 struct builtin_description;
884 static rtx ix86_expand_sse_comi (const struct builtin_description *,
885 tree, rtx);
886 static rtx ix86_expand_sse_compare (const struct builtin_description *,
887 tree, rtx);
888 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
889 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
890 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
891 static rtx ix86_expand_store_builtin (enum insn_code, tree);
892 static rtx safe_vector_operand (rtx, enum machine_mode);
893 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
894 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
895 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
896 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
897 static int ix86_fp_comparison_cost (enum rtx_code code);
898 static unsigned int ix86_select_alt_pic_regnum (void);
899 static int ix86_save_reg (unsigned int, int);
900 static void ix86_compute_frame_layout (struct ix86_frame *);
901 static int ix86_comp_type_attributes (tree, tree);
902 static int ix86_function_regparm (tree, tree);
903 const struct attribute_spec ix86_attribute_table[];
904 static bool ix86_function_ok_for_sibcall (tree, tree);
905 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
906 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
907 static int ix86_value_regno (enum machine_mode);
908 static bool contains_128bit_aligned_vector_p (tree);
909 static rtx ix86_struct_value_rtx (tree, int);
910 static bool ix86_ms_bitfield_layout_p (tree);
911 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
912 static int extended_reg_mentioned_1 (rtx *, void *);
913 static bool ix86_rtx_costs (rtx, int, int, int *);
914 static int min_insn_size (rtx);
915 static tree ix86_md_asm_clobbers (tree clobbers);
916 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
917 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
918 tree, bool);
920 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
921 static void ix86_svr3_asm_out_constructor (rtx, int);
922 #endif
924 /* Register class used for passing given 64bit part of the argument.
925 These represent classes as documented by the PS ABI, with the exception
926 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
927 use SF or DFmode move instead of DImode to avoid reformatting penalties.
929 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
930 whenever possible (upper half does contain padding).
932 enum x86_64_reg_class
934 X86_64_NO_CLASS,
935 X86_64_INTEGER_CLASS,
936 X86_64_INTEGERSI_CLASS,
937 X86_64_SSE_CLASS,
938 X86_64_SSESF_CLASS,
939 X86_64_SSEDF_CLASS,
940 X86_64_SSEUP_CLASS,
941 X86_64_X87_CLASS,
942 X86_64_X87UP_CLASS,
943 X86_64_MEMORY_CLASS
945 static const char * const x86_64_reg_class_name[] =
946 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
948 #define MAX_CLASSES 4
949 static int classify_argument (enum machine_mode, tree,
950 enum x86_64_reg_class [MAX_CLASSES], int);
951 static int examine_argument (enum machine_mode, tree, int, int *, int *);
952 static rtx construct_container (enum machine_mode, tree, int, int, int,
953 const int *, int);
954 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
955 enum x86_64_reg_class);
957 /* Table of constants used by fldpi, fldln2, etc.... */
958 static REAL_VALUE_TYPE ext_80387_constants_table [5];
959 static bool ext_80387_constants_init = 0;
960 static void init_ext_80387_constants (void);
962 /* Initialize the GCC target structure. */
963 #undef TARGET_ATTRIBUTE_TABLE
964 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
965 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
966 # undef TARGET_MERGE_DECL_ATTRIBUTES
967 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
968 #endif
970 #undef TARGET_COMP_TYPE_ATTRIBUTES
971 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973 #undef TARGET_INIT_BUILTINS
974 #define TARGET_INIT_BUILTINS ix86_init_builtins
976 #undef TARGET_EXPAND_BUILTIN
977 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979 #undef TARGET_ASM_FUNCTION_EPILOGUE
980 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
982 #undef TARGET_ASM_OPEN_PAREN
983 #define TARGET_ASM_OPEN_PAREN ""
984 #undef TARGET_ASM_CLOSE_PAREN
985 #define TARGET_ASM_CLOSE_PAREN ""
987 #undef TARGET_ASM_ALIGNED_HI_OP
988 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
989 #undef TARGET_ASM_ALIGNED_SI_OP
990 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
991 #ifdef ASM_QUAD
992 #undef TARGET_ASM_ALIGNED_DI_OP
993 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
994 #endif
996 #undef TARGET_ASM_UNALIGNED_HI_OP
997 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
998 #undef TARGET_ASM_UNALIGNED_SI_OP
999 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1000 #undef TARGET_ASM_UNALIGNED_DI_OP
1001 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1003 #undef TARGET_SCHED_ADJUST_COST
1004 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1005 #undef TARGET_SCHED_ISSUE_RATE
1006 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1007 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1008 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1009 ia32_multipass_dfa_lookahead
1011 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1012 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1014 #ifdef HAVE_AS_TLS
1015 #undef TARGET_HAVE_TLS
1016 #define TARGET_HAVE_TLS true
1017 #endif
1018 #undef TARGET_CANNOT_FORCE_CONST_MEM
1019 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1021 #undef TARGET_DELEGITIMIZE_ADDRESS
1022 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1024 #undef TARGET_MS_BITFIELD_LAYOUT_P
1025 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1027 #undef TARGET_ASM_OUTPUT_MI_THUNK
1028 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1029 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1030 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1032 #undef TARGET_ASM_FILE_START
1033 #define TARGET_ASM_FILE_START x86_file_start
1035 #undef TARGET_RTX_COSTS
1036 #define TARGET_RTX_COSTS ix86_rtx_costs
1037 #undef TARGET_ADDRESS_COST
1038 #define TARGET_ADDRESS_COST ix86_address_cost
1040 #undef TARGET_FIXED_CONDITION_CODE_REGS
1041 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1042 #undef TARGET_CC_MODES_COMPATIBLE
1043 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1045 #undef TARGET_MACHINE_DEPENDENT_REORG
1046 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1048 #undef TARGET_BUILD_BUILTIN_VA_LIST
1049 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1051 #undef TARGET_MD_ASM_CLOBBERS
1052 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1054 #undef TARGET_PROMOTE_PROTOTYPES
1055 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1056 #undef TARGET_STRUCT_VALUE_RTX
1057 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1058 #undef TARGET_SETUP_INCOMING_VARARGS
1059 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1060 #undef TARGET_MUST_PASS_IN_STACK
1061 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1062 #undef TARGET_PASS_BY_REFERENCE
1063 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1065 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1066 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1068 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1069 #undef TARGET_INSERT_ATTRIBUTES
1070 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1071 #endif
1073 struct gcc_target targetm = TARGET_INITIALIZER;
1076 /* The svr4 ABI for the i386 says that records and unions are returned
1077 in memory. */
1078 #ifndef DEFAULT_PCC_STRUCT_RETURN
1079 #define DEFAULT_PCC_STRUCT_RETURN 1
1080 #endif
1082 /* Sometimes certain combinations of command options do not make
1083 sense on a particular target machine. You can define a macro
1084 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1085 defined, is executed once just after all the command options have
1086 been parsed.
1088 Don't use this macro to turn on various extra optimizations for
1089 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1091 void
1092 override_options (void)
1094 int i;
1095 int ix86_tune_defaulted = 0;
1097 /* Comes from final.c -- no real reason to change it. */
1098 #define MAX_CODE_ALIGN 16
1100 static struct ptt
1102 const struct processor_costs *cost; /* Processor costs */
1103 const int target_enable; /* Target flags to enable. */
1104 const int target_disable; /* Target flags to disable. */
1105 const int align_loop; /* Default alignments. */
1106 const int align_loop_max_skip;
1107 const int align_jump;
1108 const int align_jump_max_skip;
1109 const int align_func;
1111 const processor_target_table[PROCESSOR_max] =
1113 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1114 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1115 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1116 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1117 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1118 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1120 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1121 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1124 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1125 static struct pta
1127 const char *const name; /* processor name or nickname. */
1128 const enum processor_type processor;
1129 const enum pta_flags
1131 PTA_SSE = 1,
1132 PTA_SSE2 = 2,
1133 PTA_SSE3 = 4,
1134 PTA_MMX = 8,
1135 PTA_PREFETCH_SSE = 16,
1136 PTA_3DNOW = 32,
1137 PTA_3DNOW_A = 64,
1138 PTA_64BIT = 128
1139 } flags;
1141 const processor_alias_table[] =
1143 {"i386", PROCESSOR_I386, 0},
1144 {"i486", PROCESSOR_I486, 0},
1145 {"i586", PROCESSOR_PENTIUM, 0},
1146 {"pentium", PROCESSOR_PENTIUM, 0},
1147 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1148 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1149 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1150 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1151 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1152 {"i686", PROCESSOR_PENTIUMPRO, 0},
1153 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1154 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1155 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1156 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1157 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1158 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1159 | PTA_MMX | PTA_PREFETCH_SSE},
1160 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1161 | PTA_MMX | PTA_PREFETCH_SSE},
1162 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1163 | PTA_MMX | PTA_PREFETCH_SSE},
1164 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1165 | PTA_MMX | PTA_PREFETCH_SSE},
1166 {"k6", PROCESSOR_K6, PTA_MMX},
1167 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1168 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1169 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1170 | PTA_3DNOW_A},
1171 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1172 | PTA_3DNOW | PTA_3DNOW_A},
1173 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1174 | PTA_3DNOW_A | PTA_SSE},
1175 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1176 | PTA_3DNOW_A | PTA_SSE},
1177 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1178 | PTA_3DNOW_A | PTA_SSE},
1179 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1180 | PTA_SSE | PTA_SSE2 },
1181 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1182 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1183 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1184 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1185 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1186 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1187 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1188 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1191 int const pta_size = ARRAY_SIZE (processor_alias_table);
1193 /* Set the default values for switches whose default depends on TARGET_64BIT
1194 in case they weren't overwritten by command line options. */
1195 if (TARGET_64BIT)
1197 if (flag_omit_frame_pointer == 2)
1198 flag_omit_frame_pointer = 1;
1199 if (flag_asynchronous_unwind_tables == 2)
1200 flag_asynchronous_unwind_tables = 1;
1201 if (flag_pcc_struct_return == 2)
1202 flag_pcc_struct_return = 0;
1204 else
1206 if (flag_omit_frame_pointer == 2)
1207 flag_omit_frame_pointer = 0;
1208 if (flag_asynchronous_unwind_tables == 2)
1209 flag_asynchronous_unwind_tables = 0;
1210 if (flag_pcc_struct_return == 2)
1211 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1214 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1215 SUBTARGET_OVERRIDE_OPTIONS;
1216 #endif
1218 if (!ix86_tune_string && ix86_arch_string)
1219 ix86_tune_string = ix86_arch_string;
1220 if (!ix86_tune_string)
1222 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1223 ix86_tune_defaulted = 1;
1225 if (!ix86_arch_string)
1226 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1228 if (ix86_cmodel_string != 0)
1230 if (!strcmp (ix86_cmodel_string, "small"))
1231 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1232 else if (flag_pic)
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1234 else if (!strcmp (ix86_cmodel_string, "32"))
1235 ix86_cmodel = CM_32;
1236 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1237 ix86_cmodel = CM_KERNEL;
1238 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1239 ix86_cmodel = CM_MEDIUM;
1240 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1241 ix86_cmodel = CM_LARGE;
1242 else
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1245 else
1247 ix86_cmodel = CM_32;
1248 if (TARGET_64BIT)
1249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1251 if (ix86_asm_string != 0)
1253 if (!strcmp (ix86_asm_string, "intel"))
1254 ix86_asm_dialect = ASM_INTEL;
1255 else if (!strcmp (ix86_asm_string, "att"))
1256 ix86_asm_dialect = ASM_ATT;
1257 else
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1260 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1263 if (ix86_cmodel == CM_LARGE)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags & MASK_64BIT) ? 64 : 32);
1269 for (i = 0; i < pta_size; i++)
1270 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1272 ix86_arch = processor_alias_table[i].processor;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune = ix86_arch;
1275 if (processor_alias_table[i].flags & PTA_MMX
1276 && !(target_flags_explicit & MASK_MMX))
1277 target_flags |= MASK_MMX;
1278 if (processor_alias_table[i].flags & PTA_3DNOW
1279 && !(target_flags_explicit & MASK_3DNOW))
1280 target_flags |= MASK_3DNOW;
1281 if (processor_alias_table[i].flags & PTA_3DNOW_A
1282 && !(target_flags_explicit & MASK_3DNOW_A))
1283 target_flags |= MASK_3DNOW_A;
1284 if (processor_alias_table[i].flags & PTA_SSE
1285 && !(target_flags_explicit & MASK_SSE))
1286 target_flags |= MASK_SSE;
1287 if (processor_alias_table[i].flags & PTA_SSE2
1288 && !(target_flags_explicit & MASK_SSE2))
1289 target_flags |= MASK_SSE2;
1290 if (processor_alias_table[i].flags & PTA_SSE3
1291 && !(target_flags_explicit & MASK_SSE3))
1292 target_flags |= MASK_SSE3;
1293 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1294 x86_prefetch_sse = true;
1295 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1297 if (ix86_tune_defaulted)
1299 ix86_tune_string = "x86-64";
1300 for (i = 0; i < pta_size; i++)
1301 if (! strcmp (ix86_tune_string,
1302 processor_alias_table[i].name))
1303 break;
1304 ix86_tune = processor_alias_table[i].processor;
1306 else
1307 error ("CPU you selected does not support x86-64 "
1308 "instruction set");
1310 break;
1313 if (i == pta_size)
1314 error ("bad value (%s) for -march= switch", ix86_arch_string);
1316 for (i = 0; i < pta_size; i++)
1317 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1319 ix86_tune = processor_alias_table[i].processor;
1320 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1321 error ("CPU you selected does not support x86-64 instruction set");
1323 /* Intel CPUs have always interpreted SSE prefetch instructions as
1324 NOPs; so, we can enable SSE prefetch instructions even when
1325 -mtune (rather than -march) points us to a processor that has them.
1326 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1327 higher processors. */
1328 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1329 x86_prefetch_sse = true;
1330 break;
1332 if (i == pta_size)
1333 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1335 if (optimize_size)
1336 ix86_cost = &size_cost;
1337 else
1338 ix86_cost = processor_target_table[ix86_tune].cost;
1339 target_flags |= processor_target_table[ix86_tune].target_enable;
1340 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1342 /* Arrange to set up i386_stack_locals for all functions. */
1343 init_machine_status = ix86_init_machine_status;
1345 /* Validate -mregparm= value. */
1346 if (ix86_regparm_string)
1348 i = atoi (ix86_regparm_string);
1349 if (i < 0 || i > REGPARM_MAX)
1350 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1351 else
1352 ix86_regparm = i;
1354 else
1355 if (TARGET_64BIT)
1356 ix86_regparm = REGPARM_MAX;
1358 /* If the user has provided any of the -malign-* options,
1359 warn and use that value only if -falign-* is not set.
1360 Remove this code in GCC 3.2 or later. */
1361 if (ix86_align_loops_string)
1363 warning ("-malign-loops is obsolete, use -falign-loops");
1364 if (align_loops == 0)
1366 i = atoi (ix86_align_loops_string);
1367 if (i < 0 || i > MAX_CODE_ALIGN)
1368 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1369 else
1370 align_loops = 1 << i;
1374 if (ix86_align_jumps_string)
1376 warning ("-malign-jumps is obsolete, use -falign-jumps");
1377 if (align_jumps == 0)
1379 i = atoi (ix86_align_jumps_string);
1380 if (i < 0 || i > MAX_CODE_ALIGN)
1381 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1382 else
1383 align_jumps = 1 << i;
1387 if (ix86_align_funcs_string)
1389 warning ("-malign-functions is obsolete, use -falign-functions");
1390 if (align_functions == 0)
1392 i = atoi (ix86_align_funcs_string);
1393 if (i < 0 || i > MAX_CODE_ALIGN)
1394 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1395 else
1396 align_functions = 1 << i;
1400 /* Default align_* from the processor table. */
1401 if (align_loops == 0)
1403 align_loops = processor_target_table[ix86_tune].align_loop;
1404 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1406 if (align_jumps == 0)
1408 align_jumps = processor_target_table[ix86_tune].align_jump;
1409 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1411 if (align_functions == 0)
1413 align_functions = processor_target_table[ix86_tune].align_func;
1416 /* Validate -mpreferred-stack-boundary= value, or provide default.
1417 The default of 128 bits is for Pentium III's SSE __m128, but we
1418 don't want additional code to keep the stack aligned when
1419 optimizing for code size. */
1420 ix86_preferred_stack_boundary = (optimize_size
1421 ? TARGET_64BIT ? 128 : 32
1422 : 128);
1423 if (ix86_preferred_stack_boundary_string)
1425 i = atoi (ix86_preferred_stack_boundary_string);
1426 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1427 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1428 TARGET_64BIT ? 4 : 2);
1429 else
1430 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1433 /* Validate -mbranch-cost= value, or provide default. */
1434 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1435 if (ix86_branch_cost_string)
1437 i = atoi (ix86_branch_cost_string);
1438 if (i < 0 || i > 5)
1439 error ("-mbranch-cost=%d is not between 0 and 5", i);
1440 else
1441 ix86_branch_cost = i;
1444 if (ix86_tls_dialect_string)
1446 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1447 ix86_tls_dialect = TLS_DIALECT_GNU;
1448 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1449 ix86_tls_dialect = TLS_DIALECT_SUN;
1450 else
1451 error ("bad value (%s) for -mtls-dialect= switch",
1452 ix86_tls_dialect_string);
1455 /* Keep nonleaf frame pointers. */
1456 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1457 flag_omit_frame_pointer = 1;
1459 /* If we're doing fast math, we don't care about comparison order
1460 wrt NaNs. This lets us use a shorter comparison sequence. */
1461 if (flag_unsafe_math_optimizations)
1462 target_flags &= ~MASK_IEEE_FP;
1464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1465 since the insns won't need emulation. */
1466 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1467 target_flags &= ~MASK_NO_FANCY_MATH_387;
1469 /* Turn on SSE2 builtins for -msse3. */
1470 if (TARGET_SSE3)
1471 target_flags |= MASK_SSE2;
1473 /* Turn on SSE builtins for -msse2. */
1474 if (TARGET_SSE2)
1475 target_flags |= MASK_SSE;
1477 if (TARGET_64BIT)
1479 if (TARGET_ALIGN_DOUBLE)
1480 error ("-malign-double makes no sense in the 64bit mode");
1481 if (TARGET_RTD)
1482 error ("-mrtd calling convention not supported in the 64bit mode");
1483 /* Enable by default the SSE and MMX builtins. */
1484 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1485 ix86_fpmath = FPMATH_SSE;
1487 else
1489 ix86_fpmath = FPMATH_387;
1490 /* i386 ABI does not specify red zone. It still makes sense to use it
1491 when programmer takes care to stack from being destroyed. */
1492 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1493 target_flags |= MASK_NO_RED_ZONE;
1496 if (ix86_fpmath_string != 0)
1498 if (! strcmp (ix86_fpmath_string, "387"))
1499 ix86_fpmath = FPMATH_387;
1500 else if (! strcmp (ix86_fpmath_string, "sse"))
1502 if (!TARGET_SSE)
1504 warning ("SSE instruction set disabled, using 387 arithmetics");
1505 ix86_fpmath = FPMATH_387;
1507 else
1508 ix86_fpmath = FPMATH_SSE;
1510 else if (! strcmp (ix86_fpmath_string, "387,sse")
1511 || ! strcmp (ix86_fpmath_string, "sse,387"))
1513 if (!TARGET_SSE)
1515 warning ("SSE instruction set disabled, using 387 arithmetics");
1516 ix86_fpmath = FPMATH_387;
1518 else if (!TARGET_80387)
1520 warning ("387 instruction set disabled, using SSE arithmetics");
1521 ix86_fpmath = FPMATH_SSE;
1523 else
1524 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1526 else
1527 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1530 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1531 on by -msse. */
1532 if (TARGET_SSE)
1534 target_flags |= MASK_MMX;
1535 x86_prefetch_sse = true;
1538 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1539 if (TARGET_3DNOW)
1541 target_flags |= MASK_MMX;
1542 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1543 extensions it adds. */
1544 if (x86_3dnow_a & (1 << ix86_arch))
1545 target_flags |= MASK_3DNOW_A;
1547 if ((x86_accumulate_outgoing_args & TUNEMASK)
1548 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1549 && !optimize_size)
1550 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1552 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1554 char *p;
1555 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1556 p = strchr (internal_label_prefix, 'X');
1557 internal_label_prefix_len = p - internal_label_prefix;
1558 *p = '\0';
1562 void
1563 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1565 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1566 make the problem with not enough registers even worse. */
1567 #ifdef INSN_SCHEDULING
1568 if (level > 1)
1569 flag_schedule_insns = 0;
1570 #endif
1572 /* The default values of these switches depend on the TARGET_64BIT
1573 that is not known at this moment. Mark these values with 2 and
1574 let user the to override these. In case there is no command line option
1575 specifying them, we will set the defaults in override_options. */
1576 if (optimize >= 1)
1577 flag_omit_frame_pointer = 2;
1578 flag_pcc_struct_return = 2;
1579 flag_asynchronous_unwind_tables = 2;
1582 /* Table of valid machine attributes. */
1583 const struct attribute_spec ix86_attribute_table[] =
1585 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1586 /* Stdcall attribute says callee is responsible for popping arguments
1587 if they are not variable. */
1588 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1589 /* Fastcall attribute says callee is responsible for popping arguments
1590 if they are not variable. */
1591 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1592 /* Cdecl attribute says the callee is a normal C declaration */
1593 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1594 /* Regparm attribute specifies how many integer arguments are to be
1595 passed in registers. */
1596 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1598 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1599 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1600 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1601 #endif
1602 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1603 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1604 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1605 SUBTARGET_ATTRIBUTE_TABLE,
1606 #endif
1607 { NULL, 0, 0, false, false, false, NULL }
1610 /* Decide whether we can make a sibling call to a function. DECL is the
1611 declaration of the function being targeted by the call and EXP is the
1612 CALL_EXPR representing the call. */
1614 static bool
1615 ix86_function_ok_for_sibcall (tree decl, tree exp)
1617 /* If we are generating position-independent code, we cannot sibcall
1618 optimize any indirect call, or a direct call to a global function,
1619 as the PLT requires %ebx be live. */
1620 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1621 return false;
1623 /* If we are returning floats on the 80387 register stack, we cannot
1624 make a sibcall from a function that doesn't return a float to a
1625 function that does or, conversely, from a function that does return
1626 a float to a function that doesn't; the necessary stack adjustment
1627 would not be executed. */
1628 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1629 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1630 return false;
1632 /* If this call is indirect, we'll need to be able to use a call-clobbered
1633 register for the address of the target function. Make sure that all
1634 such registers are not used for passing parameters. */
1635 if (!decl && !TARGET_64BIT)
1637 tree type;
1639 /* We're looking at the CALL_EXPR, we need the type of the function. */
1640 type = TREE_OPERAND (exp, 0); /* pointer expression */
1641 type = TREE_TYPE (type); /* pointer type */
1642 type = TREE_TYPE (type); /* function type */
1644 if (ix86_function_regparm (type, NULL) >= 3)
1646 /* ??? Need to count the actual number of registers to be used,
1647 not the possible number of registers. Fix later. */
1648 return false;
1652 /* Otherwise okay. That also includes certain types of indirect calls. */
1653 return true;
1656 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1657 arguments as in struct attribute_spec.handler. */
1658 static tree
1659 ix86_handle_cdecl_attribute (tree *node, tree name,
1660 tree args ATTRIBUTE_UNUSED,
1661 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1663 if (TREE_CODE (*node) != FUNCTION_TYPE
1664 && TREE_CODE (*node) != METHOD_TYPE
1665 && TREE_CODE (*node) != FIELD_DECL
1666 && TREE_CODE (*node) != TYPE_DECL)
1668 warning ("`%s' attribute only applies to functions",
1669 IDENTIFIER_POINTER (name));
1670 *no_add_attrs = true;
1672 else
1674 if (is_attribute_p ("fastcall", name))
1676 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1678 error ("fastcall and stdcall attributes are not compatible");
1680 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1682 error ("fastcall and regparm attributes are not compatible");
1685 else if (is_attribute_p ("stdcall", name))
1687 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1689 error ("fastcall and stdcall attributes are not compatible");
1694 if (TARGET_64BIT)
1696 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1697 *no_add_attrs = true;
1700 return NULL_TREE;
1703 /* Handle a "regparm" attribute;
1704 arguments as in struct attribute_spec.handler. */
1705 static tree
1706 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1707 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1709 if (TREE_CODE (*node) != FUNCTION_TYPE
1710 && TREE_CODE (*node) != METHOD_TYPE
1711 && TREE_CODE (*node) != FIELD_DECL
1712 && TREE_CODE (*node) != TYPE_DECL)
1714 warning ("`%s' attribute only applies to functions",
1715 IDENTIFIER_POINTER (name));
1716 *no_add_attrs = true;
1718 else
1720 tree cst;
1722 cst = TREE_VALUE (args);
1723 if (TREE_CODE (cst) != INTEGER_CST)
1725 warning ("`%s' attribute requires an integer constant argument",
1726 IDENTIFIER_POINTER (name));
1727 *no_add_attrs = true;
1729 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1731 warning ("argument to `%s' attribute larger than %d",
1732 IDENTIFIER_POINTER (name), REGPARM_MAX);
1733 *no_add_attrs = true;
1736 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1738 error ("fastcall and regparm attributes are not compatible");
1742 return NULL_TREE;
1745 /* Return 0 if the attributes for two types are incompatible, 1 if they
1746 are compatible, and 2 if they are nearly compatible (which causes a
1747 warning to be generated). */
1749 static int
1750 ix86_comp_type_attributes (tree type1, tree type2)
1752 /* Check for mismatch of non-default calling convention. */
1753 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1755 if (TREE_CODE (type1) != FUNCTION_TYPE)
1756 return 1;
1758 /* Check for mismatched fastcall types */
1759 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1760 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1761 return 0;
1763 /* Check for mismatched return types (cdecl vs stdcall). */
1764 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1765 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1766 return 0;
1767 if (ix86_function_regparm (type1, NULL)
1768 != ix86_function_regparm (type2, NULL))
1769 return 0;
1770 return 1;
1773 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1774 DECL may be NULL when calling function indirectly
1775 or considering a libcall. */
1777 static int
1778 ix86_function_regparm (tree type, tree decl)
1780 tree attr;
1781 int regparm = ix86_regparm;
1782 bool user_convention = false;
1784 if (!TARGET_64BIT)
1786 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1787 if (attr)
1789 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1790 user_convention = true;
1793 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1795 regparm = 2;
1796 user_convention = true;
1799 /* Use register calling convention for local functions when possible. */
1800 if (!TARGET_64BIT && !user_convention && decl
1801 && flag_unit_at_a_time && !profile_flag)
1803 struct cgraph_local_info *i = cgraph_local_info (decl);
1804 if (i && i->local)
1806 /* We can't use regparm(3) for nested functions as these use
1807 static chain pointer in third argument. */
1808 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1809 regparm = 2;
1810 else
1811 regparm = 3;
1815 return regparm;
1818 /* Return true if EAX is live at the start of the function. Used by
1819 ix86_expand_prologue to determine if we need special help before
1820 calling allocate_stack_worker. */
1822 static bool
1823 ix86_eax_live_at_start_p (void)
1825 /* Cheat. Don't bother working forward from ix86_function_regparm
1826 to the function type to whether an actual argument is located in
1827 eax. Instead just look at cfg info, which is still close enough
1828 to correct at this point. This gives false positives for broken
1829 functions that might use uninitialized data that happens to be
1830 allocated in eax, but who cares? */
1831 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1834 /* Value is the number of bytes of arguments automatically
1835 popped when returning from a subroutine call.
1836 FUNDECL is the declaration node of the function (as a tree),
1837 FUNTYPE is the data type of the function (as a tree),
1838 or for a library call it is an identifier node for the subroutine name.
1839 SIZE is the number of bytes of arguments passed on the stack.
1841 On the 80386, the RTD insn may be used to pop them if the number
1842 of args is fixed, but if the number is variable then the caller
1843 must pop them all. RTD can't be used for library calls now
1844 because the library is compiled with the Unix compiler.
1845 Use of RTD is a selectable option, since it is incompatible with
1846 standard Unix calling sequences. If the option is not selected,
1847 the caller must always pop the args.
1849 The attribute stdcall is equivalent to RTD on a per module basis. */
1852 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1854 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1856 /* Cdecl functions override -mrtd, and never pop the stack. */
1857 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1859 /* Stdcall and fastcall functions will pop the stack if not
1860 variable args. */
1861 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1862 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1863 rtd = 1;
1865 if (rtd
1866 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1867 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1868 == void_type_node)))
1869 return size;
1872 /* Lose any fake structure return argument if it is passed on the stack. */
1873 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1874 && !TARGET_64BIT)
1876 int nregs = ix86_function_regparm (funtype, fundecl);
1878 if (!nregs)
1879 return GET_MODE_SIZE (Pmode);
1882 return 0;
1885 /* Argument support functions. */
1887 /* Return true when register may be used to pass function parameters. */
1888 bool
1889 ix86_function_arg_regno_p (int regno)
1891 int i;
1892 if (!TARGET_64BIT)
1893 return (regno < REGPARM_MAX
1894 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1895 if (SSE_REGNO_P (regno) && TARGET_SSE)
1896 return true;
1897 /* RAX is used as hidden argument to va_arg functions. */
1898 if (!regno)
1899 return true;
1900 for (i = 0; i < REGPARM_MAX; i++)
1901 if (regno == x86_64_int_parameter_registers[i])
1902 return true;
1903 return false;
1906 /* Return if we do not know how to pass TYPE solely in registers. */
1908 static bool
1909 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1911 if (must_pass_in_stack_var_size_or_pad (mode, type))
1912 return true;
1913 return (!TARGET_64BIT && type && mode == TImode);
1916 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1917 for a call to a function whose data type is FNTYPE.
1918 For a library call, FNTYPE is 0. */
1920 void
1921 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1922 tree fntype, /* tree ptr for function decl */
1923 rtx libname, /* SYMBOL_REF of library name or 0 */
1924 tree fndecl)
1926 static CUMULATIVE_ARGS zero_cum;
1927 tree param, next_param;
1929 if (TARGET_DEBUG_ARG)
1931 fprintf (stderr, "\ninit_cumulative_args (");
1932 if (fntype)
1933 fprintf (stderr, "fntype code = %s, ret code = %s",
1934 tree_code_name[(int) TREE_CODE (fntype)],
1935 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1936 else
1937 fprintf (stderr, "no fntype");
1939 if (libname)
1940 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1943 *cum = zero_cum;
1945 /* Set up the number of registers to use for passing arguments. */
1946 if (fntype)
1947 cum->nregs = ix86_function_regparm (fntype, fndecl);
1948 else
1949 cum->nregs = ix86_regparm;
1950 if (TARGET_SSE)
1951 cum->sse_nregs = SSE_REGPARM_MAX;
1952 if (TARGET_MMX)
1953 cum->mmx_nregs = MMX_REGPARM_MAX;
1954 cum->warn_sse = true;
1955 cum->warn_mmx = true;
1956 cum->maybe_vaarg = false;
1958 /* Use ecx and edx registers if function has fastcall attribute */
1959 if (fntype && !TARGET_64BIT)
1961 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1963 cum->nregs = 2;
1964 cum->fastcall = 1;
1968 /* Determine if this function has variable arguments. This is
1969 indicated by the last argument being 'void_type_mode' if there
1970 are no variable arguments. If there are variable arguments, then
1971 we won't pass anything in registers in 32-bit mode. */
1973 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1975 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1976 param != 0; param = next_param)
1978 next_param = TREE_CHAIN (param);
1979 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1981 if (!TARGET_64BIT)
1983 cum->nregs = 0;
1984 cum->sse_nregs = 0;
1985 cum->mmx_nregs = 0;
1986 cum->warn_sse = 0;
1987 cum->warn_mmx = 0;
1988 cum->fastcall = 0;
1990 cum->maybe_vaarg = true;
1994 if ((!fntype && !libname)
1995 || (fntype && !TYPE_ARG_TYPES (fntype)))
1996 cum->maybe_vaarg = 1;
1998 if (TARGET_DEBUG_ARG)
1999 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2001 return;
2004 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2005 of this code is to classify each 8bytes of incoming argument by the register
2006 class and assign registers accordingly. */
2008 /* Return the union class of CLASS1 and CLASS2.
2009 See the x86-64 PS ABI for details. */
2011 static enum x86_64_reg_class
2012 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2014 /* Rule #1: If both classes are equal, this is the resulting class. */
2015 if (class1 == class2)
2016 return class1;
2018 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2019 the other class. */
2020 if (class1 == X86_64_NO_CLASS)
2021 return class2;
2022 if (class2 == X86_64_NO_CLASS)
2023 return class1;
2025 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2026 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2027 return X86_64_MEMORY_CLASS;
2029 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2030 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2031 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2032 return X86_64_INTEGERSI_CLASS;
2033 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2034 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2035 return X86_64_INTEGER_CLASS;
2037 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2038 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2039 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2040 return X86_64_MEMORY_CLASS;
2042 /* Rule #6: Otherwise class SSE is used. */
2043 return X86_64_SSE_CLASS;
2046 /* Classify the argument of type TYPE and mode MODE.
2047 CLASSES will be filled by the register class used to pass each word
2048 of the operand. The number of words is returned. In case the parameter
2049 should be passed in memory, 0 is returned. As a special case for zero
2050 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2052 BIT_OFFSET is used internally for handling records and specifies offset
2053 of the offset in bits modulo 256 to avoid overflow cases.
2055 See the x86-64 PS ABI for details.
2058 static int
2059 classify_argument (enum machine_mode mode, tree type,
2060 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2062 HOST_WIDE_INT bytes =
2063 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2064 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2066 /* Variable sized entities are always passed/returned in memory. */
2067 if (bytes < 0)
2068 return 0;
2070 if (mode != VOIDmode
2071 && targetm.calls.must_pass_in_stack (mode, type))
2072 return 0;
2074 if (type && AGGREGATE_TYPE_P (type))
2076 int i;
2077 tree field;
2078 enum x86_64_reg_class subclasses[MAX_CLASSES];
2080 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2081 if (bytes > 16)
2082 return 0;
2084 for (i = 0; i < words; i++)
2085 classes[i] = X86_64_NO_CLASS;
2087 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2088 signalize memory class, so handle it as special case. */
2089 if (!words)
2091 classes[0] = X86_64_NO_CLASS;
2092 return 1;
2095 /* Classify each field of record and merge classes. */
2096 if (TREE_CODE (type) == RECORD_TYPE)
2098 /* For classes first merge in the field of the subclasses. */
2099 if (TYPE_BINFO (type))
2101 tree binfo, base_binfo;
2102 int i;
2104 for (binfo = TYPE_BINFO (type), i = 0;
2105 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2107 int num;
2108 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2109 tree type = BINFO_TYPE (base_binfo);
2111 num = classify_argument (TYPE_MODE (type),
2112 type, subclasses,
2113 (offset + bit_offset) % 256);
2114 if (!num)
2115 return 0;
2116 for (i = 0; i < num; i++)
2118 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2119 classes[i + pos] =
2120 merge_classes (subclasses[i], classes[i + pos]);
2124 /* And now merge the fields of structure. */
2125 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2127 if (TREE_CODE (field) == FIELD_DECL)
2129 int num;
2131 /* Bitfields are always classified as integer. Handle them
2132 early, since later code would consider them to be
2133 misaligned integers. */
2134 if (DECL_BIT_FIELD (field))
2136 for (i = int_bit_position (field) / 8 / 8;
2137 i < (int_bit_position (field)
2138 + tree_low_cst (DECL_SIZE (field), 0)
2139 + 63) / 8 / 8; i++)
2140 classes[i] =
2141 merge_classes (X86_64_INTEGER_CLASS,
2142 classes[i]);
2144 else
2146 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2147 TREE_TYPE (field), subclasses,
2148 (int_bit_position (field)
2149 + bit_offset) % 256);
2150 if (!num)
2151 return 0;
2152 for (i = 0; i < num; i++)
2154 int pos =
2155 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2156 classes[i + pos] =
2157 merge_classes (subclasses[i], classes[i + pos]);
2163 /* Arrays are handled as small records. */
2164 else if (TREE_CODE (type) == ARRAY_TYPE)
2166 int num;
2167 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2168 TREE_TYPE (type), subclasses, bit_offset);
2169 if (!num)
2170 return 0;
2172 /* The partial classes are now full classes. */
2173 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2174 subclasses[0] = X86_64_SSE_CLASS;
2175 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2176 subclasses[0] = X86_64_INTEGER_CLASS;
2178 for (i = 0; i < words; i++)
2179 classes[i] = subclasses[i % num];
2181 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2182 else if (TREE_CODE (type) == UNION_TYPE
2183 || TREE_CODE (type) == QUAL_UNION_TYPE)
2185 /* For classes first merge in the field of the subclasses. */
2186 if (TYPE_BINFO (type))
2188 tree binfo, base_binfo;
2189 int i;
2191 for (binfo = TYPE_BINFO (type), i = 0;
2192 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2194 int num;
2195 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2196 tree type = BINFO_TYPE (base_binfo);
2198 num = classify_argument (TYPE_MODE (type),
2199 type, subclasses,
2200 (offset + (bit_offset % 64)) % 256);
2201 if (!num)
2202 return 0;
2203 for (i = 0; i < num; i++)
2205 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2206 classes[i + pos] =
2207 merge_classes (subclasses[i], classes[i + pos]);
2211 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2213 if (TREE_CODE (field) == FIELD_DECL)
2215 int num;
2216 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2217 TREE_TYPE (field), subclasses,
2218 bit_offset);
2219 if (!num)
2220 return 0;
2221 for (i = 0; i < num; i++)
2222 classes[i] = merge_classes (subclasses[i], classes[i]);
2226 else if (TREE_CODE (type) == SET_TYPE)
2228 if (bytes <= 4)
2230 classes[0] = X86_64_INTEGERSI_CLASS;
2231 return 1;
2233 else if (bytes <= 8)
2235 classes[0] = X86_64_INTEGER_CLASS;
2236 return 1;
2238 else if (bytes <= 12)
2240 classes[0] = X86_64_INTEGER_CLASS;
2241 classes[1] = X86_64_INTEGERSI_CLASS;
2242 return 2;
2244 else
2246 classes[0] = X86_64_INTEGER_CLASS;
2247 classes[1] = X86_64_INTEGER_CLASS;
2248 return 2;
2251 else
2252 abort ();
2254 /* Final merger cleanup. */
2255 for (i = 0; i < words; i++)
2257 /* If one class is MEMORY, everything should be passed in
2258 memory. */
2259 if (classes[i] == X86_64_MEMORY_CLASS)
2260 return 0;
2262 /* The X86_64_SSEUP_CLASS should be always preceded by
2263 X86_64_SSE_CLASS. */
2264 if (classes[i] == X86_64_SSEUP_CLASS
2265 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2266 classes[i] = X86_64_SSE_CLASS;
2268 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2269 if (classes[i] == X86_64_X87UP_CLASS
2270 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2271 classes[i] = X86_64_SSE_CLASS;
2273 return words;
2276 /* Compute alignment needed. We align all types to natural boundaries with
2277 exception of XFmode that is aligned to 64bits. */
2278 if (mode != VOIDmode && mode != BLKmode)
2280 int mode_alignment = GET_MODE_BITSIZE (mode);
2282 if (mode == XFmode)
2283 mode_alignment = 128;
2284 else if (mode == XCmode)
2285 mode_alignment = 256;
2286 if (COMPLEX_MODE_P (mode))
2287 mode_alignment /= 2;
2288 /* Misaligned fields are always returned in memory. */
2289 if (bit_offset % mode_alignment)
2290 return 0;
2293 /* for V1xx modes, just use the base mode */
2294 if (VECTOR_MODE_P (mode)
2295 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2296 mode = GET_MODE_INNER (mode);
2298 /* Classification of atomic types. */
2299 switch (mode)
2301 case DImode:
2302 case SImode:
2303 case HImode:
2304 case QImode:
2305 case CSImode:
2306 case CHImode:
2307 case CQImode:
2308 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2309 classes[0] = X86_64_INTEGERSI_CLASS;
2310 else
2311 classes[0] = X86_64_INTEGER_CLASS;
2312 return 1;
2313 case CDImode:
2314 case TImode:
2315 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2316 return 2;
2317 case CTImode:
2318 return 0;
2319 case SFmode:
2320 if (!(bit_offset % 64))
2321 classes[0] = X86_64_SSESF_CLASS;
2322 else
2323 classes[0] = X86_64_SSE_CLASS;
2324 return 1;
2325 case DFmode:
2326 classes[0] = X86_64_SSEDF_CLASS;
2327 return 1;
2328 case XFmode:
2329 classes[0] = X86_64_X87_CLASS;
2330 classes[1] = X86_64_X87UP_CLASS;
2331 return 2;
2332 case TFmode:
2333 classes[0] = X86_64_SSE_CLASS;
2334 classes[1] = X86_64_SSEUP_CLASS;
2335 return 2;
2336 case SCmode:
2337 classes[0] = X86_64_SSE_CLASS;
2338 return 1;
2339 case DCmode:
2340 classes[0] = X86_64_SSEDF_CLASS;
2341 classes[1] = X86_64_SSEDF_CLASS;
2342 return 2;
2343 case XCmode:
2344 case TCmode:
2345 /* These modes are larger than 16 bytes. */
2346 return 0;
2347 case V4SFmode:
2348 case V4SImode:
2349 case V16QImode:
2350 case V8HImode:
2351 case V2DFmode:
2352 case V2DImode:
2353 classes[0] = X86_64_SSE_CLASS;
2354 classes[1] = X86_64_SSEUP_CLASS;
2355 return 2;
2356 case V2SFmode:
2357 case V2SImode:
2358 case V4HImode:
2359 case V8QImode:
2360 classes[0] = X86_64_SSE_CLASS;
2361 return 1;
2362 case BLKmode:
2363 case VOIDmode:
2364 return 0;
2365 default:
2366 if (VECTOR_MODE_P (mode))
2368 if (bytes > 16)
2369 return 0;
2370 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2372 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2373 classes[0] = X86_64_INTEGERSI_CLASS;
2374 else
2375 classes[0] = X86_64_INTEGER_CLASS;
2376 classes[1] = X86_64_INTEGER_CLASS;
2377 return 1 + (bytes > 8);
2380 abort ();
2384 /* Examine the argument and return set number of register required in each
2385 class. Return 0 iff parameter should be passed in memory. */
2386 static int
2387 examine_argument (enum machine_mode mode, tree type, int in_return,
2388 int *int_nregs, int *sse_nregs)
2390 enum x86_64_reg_class class[MAX_CLASSES];
2391 int n = classify_argument (mode, type, class, 0);
2393 *int_nregs = 0;
2394 *sse_nregs = 0;
2395 if (!n)
2396 return 0;
2397 for (n--; n >= 0; n--)
2398 switch (class[n])
2400 case X86_64_INTEGER_CLASS:
2401 case X86_64_INTEGERSI_CLASS:
2402 (*int_nregs)++;
2403 break;
2404 case X86_64_SSE_CLASS:
2405 case X86_64_SSESF_CLASS:
2406 case X86_64_SSEDF_CLASS:
2407 (*sse_nregs)++;
2408 break;
2409 case X86_64_NO_CLASS:
2410 case X86_64_SSEUP_CLASS:
2411 break;
2412 case X86_64_X87_CLASS:
2413 case X86_64_X87UP_CLASS:
2414 if (!in_return)
2415 return 0;
2416 break;
2417 case X86_64_MEMORY_CLASS:
2418 abort ();
2420 return 1;
2422 /* Construct container for the argument used by GCC interface. See
2423 FUNCTION_ARG for the detailed description. */
2424 static rtx
2425 construct_container (enum machine_mode mode, tree type, int in_return,
2426 int nintregs, int nsseregs, const int * intreg,
2427 int sse_regno)
2429 enum machine_mode tmpmode;
2430 int bytes =
2431 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2432 enum x86_64_reg_class class[MAX_CLASSES];
2433 int n;
2434 int i;
2435 int nexps = 0;
2436 int needed_sseregs, needed_intregs;
2437 rtx exp[MAX_CLASSES];
2438 rtx ret;
2440 n = classify_argument (mode, type, class, 0);
2441 if (TARGET_DEBUG_ARG)
2443 if (!n)
2444 fprintf (stderr, "Memory class\n");
2445 else
2447 fprintf (stderr, "Classes:");
2448 for (i = 0; i < n; i++)
2450 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2452 fprintf (stderr, "\n");
2455 if (!n)
2456 return NULL;
2457 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2458 return NULL;
2459 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2460 return NULL;
2462 /* First construct simple cases. Avoid SCmode, since we want to use
2463 single register to pass this type. */
2464 if (n == 1 && mode != SCmode)
2465 switch (class[0])
2467 case X86_64_INTEGER_CLASS:
2468 case X86_64_INTEGERSI_CLASS:
2469 return gen_rtx_REG (mode, intreg[0]);
2470 case X86_64_SSE_CLASS:
2471 case X86_64_SSESF_CLASS:
2472 case X86_64_SSEDF_CLASS:
2473 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2474 case X86_64_X87_CLASS:
2475 return gen_rtx_REG (mode, FIRST_STACK_REG);
2476 case X86_64_NO_CLASS:
2477 /* Zero sized array, struct or class. */
2478 return NULL;
2479 default:
2480 abort ();
2482 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2483 && mode != BLKmode)
2484 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2485 if (n == 2
2486 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2487 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2488 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2489 && class[1] == X86_64_INTEGER_CLASS
2490 && (mode == CDImode || mode == TImode || mode == TFmode)
2491 && intreg[0] + 1 == intreg[1])
2492 return gen_rtx_REG (mode, intreg[0]);
2493 if (n == 4
2494 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2495 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2496 && mode != BLKmode)
2497 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2499 /* Otherwise figure out the entries of the PARALLEL. */
2500 for (i = 0; i < n; i++)
2502 switch (class[i])
2504 case X86_64_NO_CLASS:
2505 break;
2506 case X86_64_INTEGER_CLASS:
2507 case X86_64_INTEGERSI_CLASS:
2508 /* Merge TImodes on aligned occasions here too. */
2509 if (i * 8 + 8 > bytes)
2510 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2511 else if (class[i] == X86_64_INTEGERSI_CLASS)
2512 tmpmode = SImode;
2513 else
2514 tmpmode = DImode;
2515 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2516 if (tmpmode == BLKmode)
2517 tmpmode = DImode;
2518 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2519 gen_rtx_REG (tmpmode, *intreg),
2520 GEN_INT (i*8));
2521 intreg++;
2522 break;
2523 case X86_64_SSESF_CLASS:
2524 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2525 gen_rtx_REG (SFmode,
2526 SSE_REGNO (sse_regno)),
2527 GEN_INT (i*8));
2528 sse_regno++;
2529 break;
2530 case X86_64_SSEDF_CLASS:
2531 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2532 gen_rtx_REG (DFmode,
2533 SSE_REGNO (sse_regno)),
2534 GEN_INT (i*8));
2535 sse_regno++;
2536 break;
2537 case X86_64_SSE_CLASS:
2538 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2539 tmpmode = TImode;
2540 else
2541 tmpmode = DImode;
2542 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2543 gen_rtx_REG (tmpmode,
2544 SSE_REGNO (sse_regno)),
2545 GEN_INT (i*8));
2546 if (tmpmode == TImode)
2547 i++;
2548 sse_regno++;
2549 break;
2550 default:
2551 abort ();
2554 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2555 for (i = 0; i < nexps; i++)
2556 XVECEXP (ret, 0, i) = exp [i];
2557 return ret;
2560 /* Update the data in CUM to advance over an argument
2561 of mode MODE and data type TYPE.
2562 (TYPE is null for libcalls where that information may not be available.) */
2564 void
2565 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2566 enum machine_mode mode, /* current arg mode */
2567 tree type, /* type of the argument or 0 if lib support */
2568 int named) /* whether or not the argument was named */
2570 int bytes =
2571 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2572 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2574 if (TARGET_DEBUG_ARG)
2575 fprintf (stderr,
2576 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2577 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2578 if (TARGET_64BIT)
2580 int int_nregs, sse_nregs;
2581 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2582 cum->words += words;
2583 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2585 cum->nregs -= int_nregs;
2586 cum->sse_nregs -= sse_nregs;
2587 cum->regno += int_nregs;
2588 cum->sse_regno += sse_nregs;
2590 else
2591 cum->words += words;
2593 else
2595 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2596 && (!type || !AGGREGATE_TYPE_P (type)))
2598 cum->sse_words += words;
2599 cum->sse_nregs -= 1;
2600 cum->sse_regno += 1;
2601 if (cum->sse_nregs <= 0)
2603 cum->sse_nregs = 0;
2604 cum->sse_regno = 0;
2607 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2608 && (!type || !AGGREGATE_TYPE_P (type)))
2610 cum->mmx_words += words;
2611 cum->mmx_nregs -= 1;
2612 cum->mmx_regno += 1;
2613 if (cum->mmx_nregs <= 0)
2615 cum->mmx_nregs = 0;
2616 cum->mmx_regno = 0;
2619 else
2621 cum->words += words;
2622 cum->nregs -= words;
2623 cum->regno += words;
2625 if (cum->nregs <= 0)
2627 cum->nregs = 0;
2628 cum->regno = 0;
2632 return;
2635 /* Define where to put the arguments to a function.
2636 Value is zero to push the argument on the stack,
2637 or a hard register in which to store the argument.
2639 MODE is the argument's machine mode.
2640 TYPE is the data type of the argument (as a tree).
2641 This is null for libcalls where that information may
2642 not be available.
2643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2644 the preceding args and about the function being called.
2645 NAMED is nonzero if this argument is a named parameter
2646 (otherwise it is an extra parameter matching an ellipsis). */
2649 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2650 enum machine_mode mode, /* current arg mode */
2651 tree type, /* type of the argument or 0 if lib support */
2652 int named) /* != 0 for normal args, == 0 for ... args */
2654 rtx ret = NULL_RTX;
2655 int bytes =
2656 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2657 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2658 static bool warnedsse, warnedmmx;
2660 /* To simplify the code below, represent vector types with a vector mode
2661 even if MMX/SSE are not active. */
2662 if (type
2663 && TREE_CODE (type) == VECTOR_TYPE
2664 && (bytes == 8 || bytes == 16)
2665 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2666 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2668 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2669 enum machine_mode newmode
2670 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2671 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2673 /* Get the mode which has this inner mode and number of units. */
2674 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2675 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2676 && GET_MODE_INNER (newmode) == innermode)
2678 mode = newmode;
2679 break;
2683 /* Handle a hidden AL argument containing number of registers for varargs
2684 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2685 any AL settings. */
2686 if (mode == VOIDmode)
2688 if (TARGET_64BIT)
2689 return GEN_INT (cum->maybe_vaarg
2690 ? (cum->sse_nregs < 0
2691 ? SSE_REGPARM_MAX
2692 : cum->sse_regno)
2693 : -1);
2694 else
2695 return constm1_rtx;
2697 if (TARGET_64BIT)
2698 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2699 &x86_64_int_parameter_registers [cum->regno],
2700 cum->sse_regno);
2701 else
2702 switch (mode)
2704 /* For now, pass fp/complex values on the stack. */
2705 default:
2706 break;
2708 case BLKmode:
2709 if (bytes < 0)
2710 break;
2711 /* FALLTHRU */
2712 case DImode:
2713 case SImode:
2714 case HImode:
2715 case QImode:
2716 if (words <= cum->nregs)
2718 int regno = cum->regno;
2720 /* Fastcall allocates the first two DWORD (SImode) or
2721 smaller arguments to ECX and EDX. */
2722 if (cum->fastcall)
2724 if (mode == BLKmode || mode == DImode)
2725 break;
2727 /* ECX not EAX is the first allocated register. */
2728 if (regno == 0)
2729 regno = 2;
2731 ret = gen_rtx_REG (mode, regno);
2733 break;
2734 case TImode:
2735 case V16QImode:
2736 case V8HImode:
2737 case V4SImode:
2738 case V2DImode:
2739 case V4SFmode:
2740 case V2DFmode:
2741 if (!type || !AGGREGATE_TYPE_P (type))
2743 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2745 warnedsse = true;
2746 warning ("SSE vector argument without SSE enabled "
2747 "changes the ABI");
2749 if (cum->sse_nregs)
2750 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2752 break;
2753 case V8QImode:
2754 case V4HImode:
2755 case V2SImode:
2756 case V2SFmode:
2757 if (!type || !AGGREGATE_TYPE_P (type))
2759 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2761 warnedmmx = true;
2762 warning ("MMX vector argument without MMX enabled "
2763 "changes the ABI");
2765 if (cum->mmx_nregs)
2766 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2768 break;
2771 if (TARGET_DEBUG_ARG)
2773 fprintf (stderr,
2774 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2775 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2777 if (ret)
2778 print_simple_rtl (stderr, ret);
2779 else
2780 fprintf (stderr, ", stack");
2782 fprintf (stderr, " )\n");
2785 return ret;
2788 /* A C expression that indicates when an argument must be passed by
2789 reference. If nonzero for an argument, a copy of that argument is
2790 made in memory and a pointer to the argument is passed instead of
2791 the argument itself. The pointer is passed in whatever way is
2792 appropriate for passing a pointer to that type. */
2794 static bool
2795 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2796 enum machine_mode mode ATTRIBUTE_UNUSED,
2797 tree type, bool named ATTRIBUTE_UNUSED)
2799 if (!TARGET_64BIT)
2800 return 0;
2802 if (type && int_size_in_bytes (type) == -1)
2804 if (TARGET_DEBUG_ARG)
2805 fprintf (stderr, "function_arg_pass_by_reference\n");
2806 return 1;
2809 return 0;
2812 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2813 ABI. Only called if TARGET_SSE. */
2814 static bool
2815 contains_128bit_aligned_vector_p (tree type)
2817 enum machine_mode mode = TYPE_MODE (type);
2818 if (SSE_REG_MODE_P (mode)
2819 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2820 return true;
2821 if (TYPE_ALIGN (type) < 128)
2822 return false;
2824 if (AGGREGATE_TYPE_P (type))
2826 /* Walk the aggregates recursively. */
2827 if (TREE_CODE (type) == RECORD_TYPE
2828 || TREE_CODE (type) == UNION_TYPE
2829 || TREE_CODE (type) == QUAL_UNION_TYPE)
2831 tree field;
2833 if (TYPE_BINFO (type))
2835 tree binfo, base_binfo;
2836 int i;
2838 for (binfo = TYPE_BINFO (type), i = 0;
2839 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2840 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2841 return true;
2843 /* And now merge the fields of structure. */
2844 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2846 if (TREE_CODE (field) == FIELD_DECL
2847 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2848 return true;
2851 /* Just for use if some languages passes arrays by value. */
2852 else if (TREE_CODE (type) == ARRAY_TYPE)
2854 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2855 return true;
2857 else
2858 abort ();
2860 return false;
2863 /* Gives the alignment boundary, in bits, of an argument with the
2864 specified mode and type. */
2867 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2869 int align;
2870 if (type)
2871 align = TYPE_ALIGN (type);
2872 else
2873 align = GET_MODE_ALIGNMENT (mode);
2874 if (align < PARM_BOUNDARY)
2875 align = PARM_BOUNDARY;
2876 if (!TARGET_64BIT)
2878 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2879 make an exception for SSE modes since these require 128bit
2880 alignment.
2882 The handling here differs from field_alignment. ICC aligns MMX
2883 arguments to 4 byte boundaries, while structure fields are aligned
2884 to 8 byte boundaries. */
2885 if (!TARGET_SSE)
2886 align = PARM_BOUNDARY;
2887 else if (!type)
2889 if (!SSE_REG_MODE_P (mode))
2890 align = PARM_BOUNDARY;
2892 else
2894 if (!contains_128bit_aligned_vector_p (type))
2895 align = PARM_BOUNDARY;
2898 if (align > 128)
2899 align = 128;
2900 return align;
2903 /* Return true if N is a possible register number of function value. */
2904 bool
2905 ix86_function_value_regno_p (int regno)
2907 if (!TARGET_64BIT)
2909 return ((regno) == 0
2910 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2911 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2913 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2914 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2915 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2918 /* Define how to find the value returned by a function.
2919 VALTYPE is the data type of the value (as a tree).
2920 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2921 otherwise, FUNC is 0. */
2923 ix86_function_value (tree valtype)
2925 if (TARGET_64BIT)
2927 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2928 REGPARM_MAX, SSE_REGPARM_MAX,
2929 x86_64_int_return_registers, 0);
2930 /* For zero sized structures, construct_container return NULL, but we need
2931 to keep rest of compiler happy by returning meaningful value. */
2932 if (!ret)
2933 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2934 return ret;
2936 else
2937 return gen_rtx_REG (TYPE_MODE (valtype),
2938 ix86_value_regno (TYPE_MODE (valtype)));
2941 /* Return false iff type is returned in memory. */
2943 ix86_return_in_memory (tree type)
2945 int needed_intregs, needed_sseregs, size;
2946 enum machine_mode mode = TYPE_MODE (type);
2948 if (TARGET_64BIT)
2949 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2951 if (mode == BLKmode)
2952 return 1;
2954 size = int_size_in_bytes (type);
2956 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2957 return 0;
2959 if (VECTOR_MODE_P (mode) || mode == TImode)
2961 /* User-created vectors small enough to fit in EAX. */
2962 if (size < 8)
2963 return 0;
2965 /* MMX/3dNow values are returned on the stack, since we've
2966 got to EMMS/FEMMS before returning. */
2967 if (size == 8)
2968 return 1;
2970 /* SSE values are returned in XMM0, except when it doesn't exist. */
2971 if (size == 16)
2972 return (TARGET_SSE ? 0 : 1);
2975 if (mode == XFmode)
2976 return 0;
2978 if (size > 12)
2979 return 1;
2980 return 0;
2983 /* When returning SSE vector types, we have a choice of either
2984 (1) being abi incompatible with a -march switch, or
2985 (2) generating an error.
2986 Given no good solution, I think the safest thing is one warning.
2987 The user won't be able to use -Werror, but....
2989 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2990 called in response to actually generating a caller or callee that
2991 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2992 via aggregate_value_p for general type probing from tree-ssa. */
2994 static rtx
2995 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2997 static bool warned;
2999 if (!TARGET_SSE && type && !warned)
3001 /* Look at the return type of the function, not the function type. */
3002 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3004 if (mode == TImode
3005 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3007 warned = true;
3008 warning ("SSE vector return without SSE enabled changes the ABI");
3012 return NULL;
3015 /* Define how to find the value returned by a library function
3016 assuming the value has mode MODE. */
3018 ix86_libcall_value (enum machine_mode mode)
3020 if (TARGET_64BIT)
3022 switch (mode)
3024 case SFmode:
3025 case SCmode:
3026 case DFmode:
3027 case DCmode:
3028 case TFmode:
3029 return gen_rtx_REG (mode, FIRST_SSE_REG);
3030 case XFmode:
3031 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3032 case XCmode:
3033 case TCmode:
3034 return NULL;
3035 default:
3036 return gen_rtx_REG (mode, 0);
3039 else
3040 return gen_rtx_REG (mode, ix86_value_regno (mode));
3043 /* Given a mode, return the register to use for a return value. */
3045 static int
3046 ix86_value_regno (enum machine_mode mode)
3048 /* Floating point return values in %st(0). */
3049 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3050 return FIRST_FLOAT_REG;
3051 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3052 we prevent this case when sse is not available. */
3053 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3054 return FIRST_SSE_REG;
3055 /* Everything else in %eax. */
3056 return 0;
3059 /* Create the va_list data type. */
3061 static tree
3062 ix86_build_builtin_va_list (void)
3064 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3066 /* For i386 we use plain pointer to argument area. */
3067 if (!TARGET_64BIT)
3068 return build_pointer_type (char_type_node);
3070 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3071 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3073 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3074 unsigned_type_node);
3075 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3076 unsigned_type_node);
3077 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3078 ptr_type_node);
3079 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3080 ptr_type_node);
3082 DECL_FIELD_CONTEXT (f_gpr) = record;
3083 DECL_FIELD_CONTEXT (f_fpr) = record;
3084 DECL_FIELD_CONTEXT (f_ovf) = record;
3085 DECL_FIELD_CONTEXT (f_sav) = record;
3087 TREE_CHAIN (record) = type_decl;
3088 TYPE_NAME (record) = type_decl;
3089 TYPE_FIELDS (record) = f_gpr;
3090 TREE_CHAIN (f_gpr) = f_fpr;
3091 TREE_CHAIN (f_fpr) = f_ovf;
3092 TREE_CHAIN (f_ovf) = f_sav;
3094 layout_type (record);
3096 /* The correct type is an array type of one element. */
3097 return build_array_type (record, build_index_type (size_zero_node));
3100 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3102 static void
3103 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3104 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3105 int no_rtl)
3107 CUMULATIVE_ARGS next_cum;
3108 rtx save_area = NULL_RTX, mem;
3109 rtx label;
3110 rtx label_ref;
3111 rtx tmp_reg;
3112 rtx nsse_reg;
3113 int set;
3114 tree fntype;
3115 int stdarg_p;
3116 int i;
3118 if (!TARGET_64BIT)
3119 return;
3121 /* Indicate to allocate space on the stack for varargs save area. */
3122 ix86_save_varrargs_registers = 1;
3124 cfun->stack_alignment_needed = 128;
3126 fntype = TREE_TYPE (current_function_decl);
3127 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3128 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3129 != void_type_node));
3131 /* For varargs, we do not want to skip the dummy va_dcl argument.
3132 For stdargs, we do want to skip the last named argument. */
3133 next_cum = *cum;
3134 if (stdarg_p)
3135 function_arg_advance (&next_cum, mode, type, 1);
3137 if (!no_rtl)
3138 save_area = frame_pointer_rtx;
3140 set = get_varargs_alias_set ();
3142 for (i = next_cum.regno; i < ix86_regparm; i++)
3144 mem = gen_rtx_MEM (Pmode,
3145 plus_constant (save_area, i * UNITS_PER_WORD));
3146 set_mem_alias_set (mem, set);
3147 emit_move_insn (mem, gen_rtx_REG (Pmode,
3148 x86_64_int_parameter_registers[i]));
3151 if (next_cum.sse_nregs)
3153 /* Now emit code to save SSE registers. The AX parameter contains number
3154 of SSE parameter registers used to call this function. We use
3155 sse_prologue_save insn template that produces computed jump across
3156 SSE saves. We need some preparation work to get this working. */
3158 label = gen_label_rtx ();
3159 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3161 /* Compute address to jump to :
3162 label - 5*eax + nnamed_sse_arguments*5 */
3163 tmp_reg = gen_reg_rtx (Pmode);
3164 nsse_reg = gen_reg_rtx (Pmode);
3165 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3166 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3167 gen_rtx_MULT (Pmode, nsse_reg,
3168 GEN_INT (4))));
3169 if (next_cum.sse_regno)
3170 emit_move_insn
3171 (nsse_reg,
3172 gen_rtx_CONST (DImode,
3173 gen_rtx_PLUS (DImode,
3174 label_ref,
3175 GEN_INT (next_cum.sse_regno * 4))));
3176 else
3177 emit_move_insn (nsse_reg, label_ref);
3178 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3180 /* Compute address of memory block we save into. We always use pointer
3181 pointing 127 bytes after first byte to store - this is needed to keep
3182 instruction size limited by 4 bytes. */
3183 tmp_reg = gen_reg_rtx (Pmode);
3184 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3185 plus_constant (save_area,
3186 8 * REGPARM_MAX + 127)));
3187 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3188 set_mem_alias_set (mem, set);
3189 set_mem_align (mem, BITS_PER_WORD);
3191 /* And finally do the dirty job! */
3192 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3193 GEN_INT (next_cum.sse_regno), label));
3198 /* Implement va_start. */
3200 void
3201 ix86_va_start (tree valist, rtx nextarg)
3203 HOST_WIDE_INT words, n_gpr, n_fpr;
3204 tree f_gpr, f_fpr, f_ovf, f_sav;
3205 tree gpr, fpr, ovf, sav, t;
3207 /* Only 64bit target needs something special. */
3208 if (!TARGET_64BIT)
3210 std_expand_builtin_va_start (valist, nextarg);
3211 return;
3214 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3215 f_fpr = TREE_CHAIN (f_gpr);
3216 f_ovf = TREE_CHAIN (f_fpr);
3217 f_sav = TREE_CHAIN (f_ovf);
3219 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3220 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3221 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3222 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3223 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3225 /* Count number of gp and fp argument registers used. */
3226 words = current_function_args_info.words;
3227 n_gpr = current_function_args_info.regno;
3228 n_fpr = current_function_args_info.sse_regno;
3230 if (TARGET_DEBUG_ARG)
3231 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3232 (int) words, (int) n_gpr, (int) n_fpr);
3234 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3235 build_int_cst (NULL_TREE, n_gpr * 8, 0));
3236 TREE_SIDE_EFFECTS (t) = 1;
3237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3239 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3240 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX, 0));
3241 TREE_SIDE_EFFECTS (t) = 1;
3242 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3244 /* Find the overflow area. */
3245 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3246 if (words != 0)
3247 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3248 build_int_cst (NULL_TREE, words * UNITS_PER_WORD, 0));
3249 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3253 /* Find the register save area.
3254 Prologue of the function save it right above stack frame. */
3255 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3256 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3257 TREE_SIDE_EFFECTS (t) = 1;
3258 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3261 /* Implement va_arg. */
3263 tree
3264 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3266 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3267 tree f_gpr, f_fpr, f_ovf, f_sav;
3268 tree gpr, fpr, ovf, sav, t;
3269 int size, rsize;
3270 tree lab_false, lab_over = NULL_TREE;
3271 tree addr, t2;
3272 rtx container;
3273 int indirect_p = 0;
3274 tree ptrtype;
3276 /* Only 64bit target needs something special. */
3277 if (!TARGET_64BIT)
3278 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3280 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3281 f_fpr = TREE_CHAIN (f_gpr);
3282 f_ovf = TREE_CHAIN (f_fpr);
3283 f_sav = TREE_CHAIN (f_ovf);
3285 valist = build_fold_indirect_ref (valist);
3286 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3287 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3288 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3289 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3291 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3292 if (indirect_p)
3293 type = build_pointer_type (type);
3294 size = int_size_in_bytes (type);
3295 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3297 container = construct_container (TYPE_MODE (type), type, 0,
3298 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3300 * Pull the value out of the saved registers ...
3303 addr = create_tmp_var (ptr_type_node, "addr");
3304 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3306 if (container)
3308 int needed_intregs, needed_sseregs;
3309 bool need_temp;
3310 tree int_addr, sse_addr;
3312 lab_false = create_artificial_label ();
3313 lab_over = create_artificial_label ();
3315 examine_argument (TYPE_MODE (type), type, 0,
3316 &needed_intregs, &needed_sseregs);
3318 need_temp = (!REG_P (container)
3319 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3320 || TYPE_ALIGN (type) > 128));
3322 /* In case we are passing structure, verify that it is consecutive block
3323 on the register save area. If not we need to do moves. */
3324 if (!need_temp && !REG_P (container))
3326 /* Verify that all registers are strictly consecutive */
3327 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3329 int i;
3331 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3333 rtx slot = XVECEXP (container, 0, i);
3334 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3335 || INTVAL (XEXP (slot, 1)) != i * 16)
3336 need_temp = 1;
3339 else
3341 int i;
3343 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3345 rtx slot = XVECEXP (container, 0, i);
3346 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3347 || INTVAL (XEXP (slot, 1)) != i * 8)
3348 need_temp = 1;
3352 if (!need_temp)
3354 int_addr = addr;
3355 sse_addr = addr;
3357 else
3359 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3360 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3361 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3362 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3364 /* First ensure that we fit completely in registers. */
3365 if (needed_intregs)
3367 t = build_int_cst (TREE_TYPE (gpr),
3368 (REGPARM_MAX - needed_intregs + 1) * 8, 0);
3369 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3370 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3371 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3372 gimplify_and_add (t, pre_p);
3374 if (needed_sseregs)
3376 t = build_int_cst (TREE_TYPE (fpr),
3377 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3378 + REGPARM_MAX * 8, 0);
3379 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3380 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3381 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3382 gimplify_and_add (t, pre_p);
3385 /* Compute index to start of area used for integer regs. */
3386 if (needed_intregs)
3388 /* int_addr = gpr + sav; */
3389 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3390 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3391 gimplify_and_add (t, pre_p);
3393 if (needed_sseregs)
3395 /* sse_addr = fpr + sav; */
3396 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3397 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3398 gimplify_and_add (t, pre_p);
3400 if (need_temp)
3402 int i;
3403 tree temp = create_tmp_var (type, "va_arg_tmp");
3405 /* addr = &temp; */
3406 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3407 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3408 gimplify_and_add (t, pre_p);
3410 for (i = 0; i < XVECLEN (container, 0); i++)
3412 rtx slot = XVECEXP (container, 0, i);
3413 rtx reg = XEXP (slot, 0);
3414 enum machine_mode mode = GET_MODE (reg);
3415 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3416 tree addr_type = build_pointer_type (piece_type);
3417 tree src_addr, src;
3418 int src_offset;
3419 tree dest_addr, dest;
3421 if (SSE_REGNO_P (REGNO (reg)))
3423 src_addr = sse_addr;
3424 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3426 else
3428 src_addr = int_addr;
3429 src_offset = REGNO (reg) * 8;
3431 src_addr = fold_convert (addr_type, src_addr);
3432 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3433 size_int (src_offset)));
3434 src = build_fold_indirect_ref (src_addr);
3436 dest_addr = fold_convert (addr_type, addr);
3437 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3438 size_int (INTVAL (XEXP (slot, 1)))));
3439 dest = build_fold_indirect_ref (dest_addr);
3441 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3442 gimplify_and_add (t, pre_p);
3446 if (needed_intregs)
3448 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3449 build_int_cst (NULL_TREE, needed_intregs * 8, 0));
3450 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3451 gimplify_and_add (t, pre_p);
3453 if (needed_sseregs)
3455 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3456 build_int_cst (NULL_TREE, needed_sseregs * 16, 0));
3457 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3458 gimplify_and_add (t, pre_p);
3461 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3462 gimplify_and_add (t, pre_p);
3464 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3465 append_to_statement_list (t, pre_p);
3468 /* ... otherwise out of the overflow area. */
3470 /* Care for on-stack alignment if needed. */
3471 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3472 t = ovf;
3473 else
3475 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3476 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3477 build_int_cst (NULL_TREE, align - 1, 0));
3478 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3479 build_int_cst (NULL_TREE, -align, -1));
3481 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3483 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3484 gimplify_and_add (t2, pre_p);
3486 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3487 build_int_cst (NULL_TREE, rsize * UNITS_PER_WORD, 0));
3488 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3489 gimplify_and_add (t, pre_p);
3491 if (container)
3493 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3494 append_to_statement_list (t, pre_p);
3497 ptrtype = build_pointer_type (type);
3498 addr = fold_convert (ptrtype, addr);
3500 if (indirect_p)
3501 addr = build_fold_indirect_ref (addr);
3502 return build_fold_indirect_ref (addr);
3505 /* Return nonzero if OPNUM's MEM should be matched
3506 in movabs* patterns. */
3509 ix86_check_movabs (rtx insn, int opnum)
3511 rtx set, mem;
3513 set = PATTERN (insn);
3514 if (GET_CODE (set) == PARALLEL)
3515 set = XVECEXP (set, 0, 0);
3516 if (GET_CODE (set) != SET)
3517 abort ();
3518 mem = XEXP (set, opnum);
3519 while (GET_CODE (mem) == SUBREG)
3520 mem = SUBREG_REG (mem);
3521 if (GET_CODE (mem) != MEM)
3522 abort ();
3523 return (volatile_ok || !MEM_VOLATILE_P (mem));
3526 /* Initialize the table of extra 80387 mathematical constants. */
3528 static void
3529 init_ext_80387_constants (void)
3531 static const char * cst[5] =
3533 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3534 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3535 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3536 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3537 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3539 int i;
3541 for (i = 0; i < 5; i++)
3543 real_from_string (&ext_80387_constants_table[i], cst[i]);
3544 /* Ensure each constant is rounded to XFmode precision. */
3545 real_convert (&ext_80387_constants_table[i],
3546 XFmode, &ext_80387_constants_table[i]);
3549 ext_80387_constants_init = 1;
3552 /* Return true if the constant is something that can be loaded with
3553 a special instruction. */
3556 standard_80387_constant_p (rtx x)
3558 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3559 return -1;
3561 if (x == CONST0_RTX (GET_MODE (x)))
3562 return 1;
3563 if (x == CONST1_RTX (GET_MODE (x)))
3564 return 2;
3566 /* For XFmode constants, try to find a special 80387 instruction when
3567 optimizing for size or on those CPUs that benefit from them. */
3568 if (GET_MODE (x) == XFmode
3569 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3571 REAL_VALUE_TYPE r;
3572 int i;
3574 if (! ext_80387_constants_init)
3575 init_ext_80387_constants ();
3577 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3578 for (i = 0; i < 5; i++)
3579 if (real_identical (&r, &ext_80387_constants_table[i]))
3580 return i + 3;
3583 return 0;
3586 /* Return the opcode of the special instruction to be used to load
3587 the constant X. */
3589 const char *
3590 standard_80387_constant_opcode (rtx x)
3592 switch (standard_80387_constant_p (x))
3594 case 1:
3595 return "fldz";
3596 case 2:
3597 return "fld1";
3598 case 3:
3599 return "fldlg2";
3600 case 4:
3601 return "fldln2";
3602 case 5:
3603 return "fldl2e";
3604 case 6:
3605 return "fldl2t";
3606 case 7:
3607 return "fldpi";
3609 abort ();
3612 /* Return the CONST_DOUBLE representing the 80387 constant that is
3613 loaded by the specified special instruction. The argument IDX
3614 matches the return value from standard_80387_constant_p. */
3617 standard_80387_constant_rtx (int idx)
3619 int i;
3621 if (! ext_80387_constants_init)
3622 init_ext_80387_constants ();
3624 switch (idx)
3626 case 3:
3627 case 4:
3628 case 5:
3629 case 6:
3630 case 7:
3631 i = idx - 3;
3632 break;
3634 default:
3635 abort ();
3638 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3639 XFmode);
3642 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3645 standard_sse_constant_p (rtx x)
3647 if (x == const0_rtx)
3648 return 1;
3649 return (x == CONST0_RTX (GET_MODE (x)));
3652 /* Returns 1 if OP contains a symbol reference */
3655 symbolic_reference_mentioned_p (rtx op)
3657 const char *fmt;
3658 int i;
3660 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3661 return 1;
3663 fmt = GET_RTX_FORMAT (GET_CODE (op));
3664 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3666 if (fmt[i] == 'E')
3668 int j;
3670 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3671 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3672 return 1;
3675 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3676 return 1;
3679 return 0;
3682 /* Return 1 if it is appropriate to emit `ret' instructions in the
3683 body of a function. Do this only if the epilogue is simple, needing a
3684 couple of insns. Prior to reloading, we can't tell how many registers
3685 must be saved, so return 0 then. Return 0 if there is no frame
3686 marker to de-allocate.
3688 If NON_SAVING_SETJMP is defined and true, then it is not possible
3689 for the epilogue to be simple, so return 0. This is a special case
3690 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3691 until final, but jump_optimize may need to know sooner if a
3692 `return' is OK. */
3695 ix86_can_use_return_insn_p (void)
3697 struct ix86_frame frame;
3699 #ifdef NON_SAVING_SETJMP
3700 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3701 return 0;
3702 #endif
3704 if (! reload_completed || frame_pointer_needed)
3705 return 0;
3707 /* Don't allow more than 32 pop, since that's all we can do
3708 with one instruction. */
3709 if (current_function_pops_args
3710 && current_function_args_size >= 32768)
3711 return 0;
3713 ix86_compute_frame_layout (&frame);
3714 return frame.to_allocate == 0 && frame.nregs == 0;
3717 /* Value should be nonzero if functions must have frame pointers.
3718 Zero means the frame pointer need not be set up (and parms may
3719 be accessed via the stack pointer) in functions that seem suitable. */
3722 ix86_frame_pointer_required (void)
3724 /* If we accessed previous frames, then the generated code expects
3725 to be able to access the saved ebp value in our frame. */
3726 if (cfun->machine->accesses_prev_frame)
3727 return 1;
3729 /* Several x86 os'es need a frame pointer for other reasons,
3730 usually pertaining to setjmp. */
3731 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3732 return 1;
3734 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3735 the frame pointer by default. Turn it back on now if we've not
3736 got a leaf function. */
3737 if (TARGET_OMIT_LEAF_FRAME_POINTER
3738 && (!current_function_is_leaf))
3739 return 1;
3741 if (current_function_profile)
3742 return 1;
3744 return 0;
3747 /* Record that the current function accesses previous call frames. */
3749 void
3750 ix86_setup_frame_addresses (void)
3752 cfun->machine->accesses_prev_frame = 1;
3755 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3756 # define USE_HIDDEN_LINKONCE 1
3757 #else
3758 # define USE_HIDDEN_LINKONCE 0
3759 #endif
3761 static int pic_labels_used;
3763 /* Fills in the label name that should be used for a pc thunk for
3764 the given register. */
3766 static void
3767 get_pc_thunk_name (char name[32], unsigned int regno)
3769 if (USE_HIDDEN_LINKONCE)
3770 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3771 else
3772 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3776 /* This function generates code for -fpic that loads %ebx with
3777 the return address of the caller and then returns. */
3779 void
3780 ix86_file_end (void)
3782 rtx xops[2];
3783 int regno;
3785 for (regno = 0; regno < 8; ++regno)
3787 char name[32];
3789 if (! ((pic_labels_used >> regno) & 1))
3790 continue;
3792 get_pc_thunk_name (name, regno);
3794 if (USE_HIDDEN_LINKONCE)
3796 tree decl;
3798 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3799 error_mark_node);
3800 TREE_PUBLIC (decl) = 1;
3801 TREE_STATIC (decl) = 1;
3802 DECL_ONE_ONLY (decl) = 1;
3804 (*targetm.asm_out.unique_section) (decl, 0);
3805 named_section (decl, NULL, 0);
3807 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3808 fputs ("\t.hidden\t", asm_out_file);
3809 assemble_name (asm_out_file, name);
3810 fputc ('\n', asm_out_file);
3811 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3813 else
3815 text_section ();
3816 ASM_OUTPUT_LABEL (asm_out_file, name);
3819 xops[0] = gen_rtx_REG (SImode, regno);
3820 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3821 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3822 output_asm_insn ("ret", xops);
3825 if (NEED_INDICATE_EXEC_STACK)
3826 file_end_indicate_exec_stack ();
3829 /* Emit code for the SET_GOT patterns. */
3831 const char *
3832 output_set_got (rtx dest)
3834 rtx xops[3];
3836 xops[0] = dest;
3837 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3839 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3841 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3843 if (!flag_pic)
3844 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3845 else
3846 output_asm_insn ("call\t%a2", xops);
3848 #if TARGET_MACHO
3849 /* Output the "canonical" label name ("Lxx$pb") here too. This
3850 is what will be referred to by the Mach-O PIC subsystem. */
3851 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3852 #endif
3853 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3854 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3856 if (flag_pic)
3857 output_asm_insn ("pop{l}\t%0", xops);
3859 else
3861 char name[32];
3862 get_pc_thunk_name (name, REGNO (dest));
3863 pic_labels_used |= 1 << REGNO (dest);
3865 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3866 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3867 output_asm_insn ("call\t%X2", xops);
3870 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3871 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3872 else if (!TARGET_MACHO)
3873 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3875 return "";
3878 /* Generate an "push" pattern for input ARG. */
3880 static rtx
3881 gen_push (rtx arg)
3883 return gen_rtx_SET (VOIDmode,
3884 gen_rtx_MEM (Pmode,
3885 gen_rtx_PRE_DEC (Pmode,
3886 stack_pointer_rtx)),
3887 arg);
3890 /* Return >= 0 if there is an unused call-clobbered register available
3891 for the entire function. */
3893 static unsigned int
3894 ix86_select_alt_pic_regnum (void)
3896 if (current_function_is_leaf && !current_function_profile)
3898 int i;
3899 for (i = 2; i >= 0; --i)
3900 if (!regs_ever_live[i])
3901 return i;
3904 return INVALID_REGNUM;
3907 /* Return 1 if we need to save REGNO. */
3908 static int
3909 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3911 if (pic_offset_table_rtx
3912 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3913 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3914 || current_function_profile
3915 || current_function_calls_eh_return
3916 || current_function_uses_const_pool))
3918 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3919 return 0;
3920 return 1;
3923 if (current_function_calls_eh_return && maybe_eh_return)
3925 unsigned i;
3926 for (i = 0; ; i++)
3928 unsigned test = EH_RETURN_DATA_REGNO (i);
3929 if (test == INVALID_REGNUM)
3930 break;
3931 if (test == regno)
3932 return 1;
3936 return (regs_ever_live[regno]
3937 && !call_used_regs[regno]
3938 && !fixed_regs[regno]
3939 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3942 /* Return number of registers to be saved on the stack. */
3944 static int
3945 ix86_nsaved_regs (void)
3947 int nregs = 0;
3948 int regno;
3950 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3951 if (ix86_save_reg (regno, true))
3952 nregs++;
3953 return nregs;
3956 /* Return the offset between two registers, one to be eliminated, and the other
3957 its replacement, at the start of a routine. */
3959 HOST_WIDE_INT
3960 ix86_initial_elimination_offset (int from, int to)
3962 struct ix86_frame frame;
3963 ix86_compute_frame_layout (&frame);
3965 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3966 return frame.hard_frame_pointer_offset;
3967 else if (from == FRAME_POINTER_REGNUM
3968 && to == HARD_FRAME_POINTER_REGNUM)
3969 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3970 else
3972 if (to != STACK_POINTER_REGNUM)
3973 abort ();
3974 else if (from == ARG_POINTER_REGNUM)
3975 return frame.stack_pointer_offset;
3976 else if (from != FRAME_POINTER_REGNUM)
3977 abort ();
3978 else
3979 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3983 /* Fill structure ix86_frame about frame of currently computed function. */
3985 static void
3986 ix86_compute_frame_layout (struct ix86_frame *frame)
3988 HOST_WIDE_INT total_size;
3989 unsigned int stack_alignment_needed;
3990 HOST_WIDE_INT offset;
3991 unsigned int preferred_alignment;
3992 HOST_WIDE_INT size = get_frame_size ();
3994 frame->nregs = ix86_nsaved_regs ();
3995 total_size = size;
3997 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3998 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4000 /* During reload iteration the amount of registers saved can change.
4001 Recompute the value as needed. Do not recompute when amount of registers
4002 didn't change as reload does mutiple calls to the function and does not
4003 expect the decision to change within single iteration. */
4004 if (!optimize_size
4005 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4007 int count = frame->nregs;
4009 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4010 /* The fast prologue uses move instead of push to save registers. This
4011 is significantly longer, but also executes faster as modern hardware
4012 can execute the moves in parallel, but can't do that for push/pop.
4014 Be careful about choosing what prologue to emit: When function takes
4015 many instructions to execute we may use slow version as well as in
4016 case function is known to be outside hot spot (this is known with
4017 feedback only). Weight the size of function by number of registers
4018 to save as it is cheap to use one or two push instructions but very
4019 slow to use many of them. */
4020 if (count)
4021 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4022 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4023 || (flag_branch_probabilities
4024 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4025 cfun->machine->use_fast_prologue_epilogue = false;
4026 else
4027 cfun->machine->use_fast_prologue_epilogue
4028 = !expensive_function_p (count);
4030 if (TARGET_PROLOGUE_USING_MOVE
4031 && cfun->machine->use_fast_prologue_epilogue)
4032 frame->save_regs_using_mov = true;
4033 else
4034 frame->save_regs_using_mov = false;
4037 /* Skip return address and saved base pointer. */
4038 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4040 frame->hard_frame_pointer_offset = offset;
4042 /* Do some sanity checking of stack_alignment_needed and
4043 preferred_alignment, since i386 port is the only using those features
4044 that may break easily. */
4046 if (size && !stack_alignment_needed)
4047 abort ();
4048 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4049 abort ();
4050 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4051 abort ();
4052 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4053 abort ();
4055 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4056 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4058 /* Register save area */
4059 offset += frame->nregs * UNITS_PER_WORD;
4061 /* Va-arg area */
4062 if (ix86_save_varrargs_registers)
4064 offset += X86_64_VARARGS_SIZE;
4065 frame->va_arg_size = X86_64_VARARGS_SIZE;
4067 else
4068 frame->va_arg_size = 0;
4070 /* Align start of frame for local function. */
4071 frame->padding1 = ((offset + stack_alignment_needed - 1)
4072 & -stack_alignment_needed) - offset;
4074 offset += frame->padding1;
4076 /* Frame pointer points here. */
4077 frame->frame_pointer_offset = offset;
4079 offset += size;
4081 /* Add outgoing arguments area. Can be skipped if we eliminated
4082 all the function calls as dead code.
4083 Skipping is however impossible when function calls alloca. Alloca
4084 expander assumes that last current_function_outgoing_args_size
4085 of stack frame are unused. */
4086 if (ACCUMULATE_OUTGOING_ARGS
4087 && (!current_function_is_leaf || current_function_calls_alloca))
4089 offset += current_function_outgoing_args_size;
4090 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4092 else
4093 frame->outgoing_arguments_size = 0;
4095 /* Align stack boundary. Only needed if we're calling another function
4096 or using alloca. */
4097 if (!current_function_is_leaf || current_function_calls_alloca)
4098 frame->padding2 = ((offset + preferred_alignment - 1)
4099 & -preferred_alignment) - offset;
4100 else
4101 frame->padding2 = 0;
4103 offset += frame->padding2;
4105 /* We've reached end of stack frame. */
4106 frame->stack_pointer_offset = offset;
4108 /* Size prologue needs to allocate. */
4109 frame->to_allocate =
4110 (size + frame->padding1 + frame->padding2
4111 + frame->outgoing_arguments_size + frame->va_arg_size);
4113 if ((!frame->to_allocate && frame->nregs <= 1)
4114 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4115 frame->save_regs_using_mov = false;
4117 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4118 && current_function_is_leaf)
4120 frame->red_zone_size = frame->to_allocate;
4121 if (frame->save_regs_using_mov)
4122 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4123 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4124 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4126 else
4127 frame->red_zone_size = 0;
4128 frame->to_allocate -= frame->red_zone_size;
4129 frame->stack_pointer_offset -= frame->red_zone_size;
4130 #if 0
4131 fprintf (stderr, "nregs: %i\n", frame->nregs);
4132 fprintf (stderr, "size: %i\n", size);
4133 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4134 fprintf (stderr, "padding1: %i\n", frame->padding1);
4135 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4136 fprintf (stderr, "padding2: %i\n", frame->padding2);
4137 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4138 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4139 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4140 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4141 frame->hard_frame_pointer_offset);
4142 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4143 #endif
4146 /* Emit code to save registers in the prologue. */
4148 static void
4149 ix86_emit_save_regs (void)
4151 int regno;
4152 rtx insn;
4154 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4155 if (ix86_save_reg (regno, true))
4157 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4158 RTX_FRAME_RELATED_P (insn) = 1;
4162 /* Emit code to save registers using MOV insns. First register
4163 is restored from POINTER + OFFSET. */
4164 static void
4165 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4167 int regno;
4168 rtx insn;
4170 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4171 if (ix86_save_reg (regno, true))
4173 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4174 Pmode, offset),
4175 gen_rtx_REG (Pmode, regno));
4176 RTX_FRAME_RELATED_P (insn) = 1;
4177 offset += UNITS_PER_WORD;
4181 /* Expand prologue or epilogue stack adjustment.
4182 The pattern exist to put a dependency on all ebp-based memory accesses.
4183 STYLE should be negative if instructions should be marked as frame related,
4184 zero if %r11 register is live and cannot be freely used and positive
4185 otherwise. */
4187 static void
4188 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4190 rtx insn;
4192 if (! TARGET_64BIT)
4193 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4194 else if (x86_64_immediate_operand (offset, DImode))
4195 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4196 else
4198 rtx r11;
4199 /* r11 is used by indirect sibcall return as well, set before the
4200 epilogue and used after the epilogue. ATM indirect sibcall
4201 shouldn't be used together with huge frame sizes in one
4202 function because of the frame_size check in sibcall.c. */
4203 if (style == 0)
4204 abort ();
4205 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4206 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4207 if (style < 0)
4208 RTX_FRAME_RELATED_P (insn) = 1;
4209 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4210 offset));
4212 if (style < 0)
4213 RTX_FRAME_RELATED_P (insn) = 1;
4216 /* Expand the prologue into a bunch of separate insns. */
4218 void
4219 ix86_expand_prologue (void)
4221 rtx insn;
4222 bool pic_reg_used;
4223 struct ix86_frame frame;
4224 HOST_WIDE_INT allocate;
4226 ix86_compute_frame_layout (&frame);
4228 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4229 slower on all targets. Also sdb doesn't like it. */
4231 if (frame_pointer_needed)
4233 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4234 RTX_FRAME_RELATED_P (insn) = 1;
4236 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4237 RTX_FRAME_RELATED_P (insn) = 1;
4240 allocate = frame.to_allocate;
4242 if (!frame.save_regs_using_mov)
4243 ix86_emit_save_regs ();
4244 else
4245 allocate += frame.nregs * UNITS_PER_WORD;
4247 /* When using red zone we may start register saving before allocating
4248 the stack frame saving one cycle of the prologue. */
4249 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4250 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4251 : stack_pointer_rtx,
4252 -frame.nregs * UNITS_PER_WORD);
4254 if (allocate == 0)
4256 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4257 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4258 GEN_INT (-allocate), -1);
4259 else
4261 /* Only valid for Win32. */
4262 rtx eax = gen_rtx_REG (SImode, 0);
4263 bool eax_live = ix86_eax_live_at_start_p ();
4265 if (TARGET_64BIT)
4266 abort ();
4268 if (eax_live)
4270 emit_insn (gen_push (eax));
4271 allocate -= 4;
4274 insn = emit_move_insn (eax, GEN_INT (allocate));
4275 RTX_FRAME_RELATED_P (insn) = 1;
4277 insn = emit_insn (gen_allocate_stack_worker (eax));
4278 RTX_FRAME_RELATED_P (insn) = 1;
4280 if (eax_live)
4282 rtx t;
4283 if (frame_pointer_needed)
4284 t = plus_constant (hard_frame_pointer_rtx,
4285 allocate
4286 - frame.to_allocate
4287 - frame.nregs * UNITS_PER_WORD);
4288 else
4289 t = plus_constant (stack_pointer_rtx, allocate);
4290 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4294 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4296 if (!frame_pointer_needed || !frame.to_allocate)
4297 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4298 else
4299 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4300 -frame.nregs * UNITS_PER_WORD);
4303 pic_reg_used = false;
4304 if (pic_offset_table_rtx
4305 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4306 || current_function_profile))
4308 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4310 if (alt_pic_reg_used != INVALID_REGNUM)
4311 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4313 pic_reg_used = true;
4316 if (pic_reg_used)
4318 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4320 /* Even with accurate pre-reload life analysis, we can wind up
4321 deleting all references to the pic register after reload.
4322 Consider if cross-jumping unifies two sides of a branch
4323 controlled by a comparison vs the only read from a global.
4324 In which case, allow the set_got to be deleted, though we're
4325 too late to do anything about the ebx save in the prologue. */
4326 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4329 /* Prevent function calls from be scheduled before the call to mcount.
4330 In the pic_reg_used case, make sure that the got load isn't deleted. */
4331 if (current_function_profile)
4332 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4335 /* Emit code to restore saved registers using MOV insns. First register
4336 is restored from POINTER + OFFSET. */
4337 static void
4338 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4339 int maybe_eh_return)
4341 int regno;
4342 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4344 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4345 if (ix86_save_reg (regno, maybe_eh_return))
4347 /* Ensure that adjust_address won't be forced to produce pointer
4348 out of range allowed by x86-64 instruction set. */
4349 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4351 rtx r11;
4353 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4354 emit_move_insn (r11, GEN_INT (offset));
4355 emit_insn (gen_adddi3 (r11, r11, pointer));
4356 base_address = gen_rtx_MEM (Pmode, r11);
4357 offset = 0;
4359 emit_move_insn (gen_rtx_REG (Pmode, regno),
4360 adjust_address (base_address, Pmode, offset));
4361 offset += UNITS_PER_WORD;
4365 /* Restore function stack, frame, and registers. */
4367 void
4368 ix86_expand_epilogue (int style)
4370 int regno;
4371 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4372 struct ix86_frame frame;
4373 HOST_WIDE_INT offset;
4375 ix86_compute_frame_layout (&frame);
4377 /* Calculate start of saved registers relative to ebp. Special care
4378 must be taken for the normal return case of a function using
4379 eh_return: the eax and edx registers are marked as saved, but not
4380 restored along this path. */
4381 offset = frame.nregs;
4382 if (current_function_calls_eh_return && style != 2)
4383 offset -= 2;
4384 offset *= -UNITS_PER_WORD;
4386 /* If we're only restoring one register and sp is not valid then
4387 using a move instruction to restore the register since it's
4388 less work than reloading sp and popping the register.
4390 The default code result in stack adjustment using add/lea instruction,
4391 while this code results in LEAVE instruction (or discrete equivalent),
4392 so it is profitable in some other cases as well. Especially when there
4393 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4394 and there is exactly one register to pop. This heuristic may need some
4395 tuning in future. */
4396 if ((!sp_valid && frame.nregs <= 1)
4397 || (TARGET_EPILOGUE_USING_MOVE
4398 && cfun->machine->use_fast_prologue_epilogue
4399 && (frame.nregs > 1 || frame.to_allocate))
4400 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4401 || (frame_pointer_needed && TARGET_USE_LEAVE
4402 && cfun->machine->use_fast_prologue_epilogue
4403 && frame.nregs == 1)
4404 || current_function_calls_eh_return)
4406 /* Restore registers. We can use ebp or esp to address the memory
4407 locations. If both are available, default to ebp, since offsets
4408 are known to be small. Only exception is esp pointing directly to the
4409 end of block of saved registers, where we may simplify addressing
4410 mode. */
4412 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4413 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4414 frame.to_allocate, style == 2);
4415 else
4416 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4417 offset, style == 2);
4419 /* eh_return epilogues need %ecx added to the stack pointer. */
4420 if (style == 2)
4422 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4424 if (frame_pointer_needed)
4426 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4427 tmp = plus_constant (tmp, UNITS_PER_WORD);
4428 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4430 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4431 emit_move_insn (hard_frame_pointer_rtx, tmp);
4433 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4434 const0_rtx, style);
4436 else
4438 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4439 tmp = plus_constant (tmp, (frame.to_allocate
4440 + frame.nregs * UNITS_PER_WORD));
4441 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4444 else if (!frame_pointer_needed)
4445 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4446 GEN_INT (frame.to_allocate
4447 + frame.nregs * UNITS_PER_WORD),
4448 style);
4449 /* If not an i386, mov & pop is faster than "leave". */
4450 else if (TARGET_USE_LEAVE || optimize_size
4451 || !cfun->machine->use_fast_prologue_epilogue)
4452 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4453 else
4455 pro_epilogue_adjust_stack (stack_pointer_rtx,
4456 hard_frame_pointer_rtx,
4457 const0_rtx, style);
4458 if (TARGET_64BIT)
4459 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4460 else
4461 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4464 else
4466 /* First step is to deallocate the stack frame so that we can
4467 pop the registers. */
4468 if (!sp_valid)
4470 if (!frame_pointer_needed)
4471 abort ();
4472 pro_epilogue_adjust_stack (stack_pointer_rtx,
4473 hard_frame_pointer_rtx,
4474 GEN_INT (offset), style);
4476 else if (frame.to_allocate)
4477 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4478 GEN_INT (frame.to_allocate), style);
4480 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4481 if (ix86_save_reg (regno, false))
4483 if (TARGET_64BIT)
4484 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4485 else
4486 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4488 if (frame_pointer_needed)
4490 /* Leave results in shorter dependency chains on CPUs that are
4491 able to grok it fast. */
4492 if (TARGET_USE_LEAVE)
4493 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4494 else if (TARGET_64BIT)
4495 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4496 else
4497 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4501 /* Sibcall epilogues don't want a return instruction. */
4502 if (style == 0)
4503 return;
4505 if (current_function_pops_args && current_function_args_size)
4507 rtx popc = GEN_INT (current_function_pops_args);
4509 /* i386 can only pop 64K bytes. If asked to pop more, pop
4510 return address, do explicit add, and jump indirectly to the
4511 caller. */
4513 if (current_function_pops_args >= 65536)
4515 rtx ecx = gen_rtx_REG (SImode, 2);
4517 /* There is no "pascal" calling convention in 64bit ABI. */
4518 if (TARGET_64BIT)
4519 abort ();
4521 emit_insn (gen_popsi1 (ecx));
4522 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4523 emit_jump_insn (gen_return_indirect_internal (ecx));
4525 else
4526 emit_jump_insn (gen_return_pop_internal (popc));
4528 else
4529 emit_jump_insn (gen_return_internal ());
4532 /* Reset from the function's potential modifications. */
4534 static void
4535 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4536 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4538 if (pic_offset_table_rtx)
4539 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4542 /* Extract the parts of an RTL expression that is a valid memory address
4543 for an instruction. Return 0 if the structure of the address is
4544 grossly off. Return -1 if the address contains ASHIFT, so it is not
4545 strictly valid, but still used for computing length of lea instruction. */
4548 ix86_decompose_address (rtx addr, struct ix86_address *out)
4550 rtx base = NULL_RTX;
4551 rtx index = NULL_RTX;
4552 rtx disp = NULL_RTX;
4553 HOST_WIDE_INT scale = 1;
4554 rtx scale_rtx = NULL_RTX;
4555 int retval = 1;
4556 enum ix86_address_seg seg = SEG_DEFAULT;
4558 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4559 base = addr;
4560 else if (GET_CODE (addr) == PLUS)
4562 rtx addends[4], op;
4563 int n = 0, i;
4565 op = addr;
4568 if (n >= 4)
4569 return 0;
4570 addends[n++] = XEXP (op, 1);
4571 op = XEXP (op, 0);
4573 while (GET_CODE (op) == PLUS);
4574 if (n >= 4)
4575 return 0;
4576 addends[n] = op;
4578 for (i = n; i >= 0; --i)
4580 op = addends[i];
4581 switch (GET_CODE (op))
4583 case MULT:
4584 if (index)
4585 return 0;
4586 index = XEXP (op, 0);
4587 scale_rtx = XEXP (op, 1);
4588 break;
4590 case UNSPEC:
4591 if (XINT (op, 1) == UNSPEC_TP
4592 && TARGET_TLS_DIRECT_SEG_REFS
4593 && seg == SEG_DEFAULT)
4594 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4595 else
4596 return 0;
4597 break;
4599 case REG:
4600 case SUBREG:
4601 if (!base)
4602 base = op;
4603 else if (!index)
4604 index = op;
4605 else
4606 return 0;
4607 break;
4609 case CONST:
4610 case CONST_INT:
4611 case SYMBOL_REF:
4612 case LABEL_REF:
4613 if (disp)
4614 return 0;
4615 disp = op;
4616 break;
4618 default:
4619 return 0;
4623 else if (GET_CODE (addr) == MULT)
4625 index = XEXP (addr, 0); /* index*scale */
4626 scale_rtx = XEXP (addr, 1);
4628 else if (GET_CODE (addr) == ASHIFT)
4630 rtx tmp;
4632 /* We're called for lea too, which implements ashift on occasion. */
4633 index = XEXP (addr, 0);
4634 tmp = XEXP (addr, 1);
4635 if (GET_CODE (tmp) != CONST_INT)
4636 return 0;
4637 scale = INTVAL (tmp);
4638 if ((unsigned HOST_WIDE_INT) scale > 3)
4639 return 0;
4640 scale = 1 << scale;
4641 retval = -1;
4643 else
4644 disp = addr; /* displacement */
4646 /* Extract the integral value of scale. */
4647 if (scale_rtx)
4649 if (GET_CODE (scale_rtx) != CONST_INT)
4650 return 0;
4651 scale = INTVAL (scale_rtx);
4654 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4655 if (base && index && scale == 1
4656 && (index == arg_pointer_rtx
4657 || index == frame_pointer_rtx
4658 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4660 rtx tmp = base;
4661 base = index;
4662 index = tmp;
4665 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4666 if ((base == hard_frame_pointer_rtx
4667 || base == frame_pointer_rtx
4668 || base == arg_pointer_rtx) && !disp)
4669 disp = const0_rtx;
4671 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4672 Avoid this by transforming to [%esi+0]. */
4673 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4674 && base && !index && !disp
4675 && REG_P (base)
4676 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4677 disp = const0_rtx;
4679 /* Special case: encode reg+reg instead of reg*2. */
4680 if (!base && index && scale && scale == 2)
4681 base = index, scale = 1;
4683 /* Special case: scaling cannot be encoded without base or displacement. */
4684 if (!base && !disp && index && scale != 1)
4685 disp = const0_rtx;
4687 out->base = base;
4688 out->index = index;
4689 out->disp = disp;
4690 out->scale = scale;
4691 out->seg = seg;
4693 return retval;
4696 /* Return cost of the memory address x.
4697 For i386, it is better to use a complex address than let gcc copy
4698 the address into a reg and make a new pseudo. But not if the address
4699 requires to two regs - that would mean more pseudos with longer
4700 lifetimes. */
4701 static int
4702 ix86_address_cost (rtx x)
4704 struct ix86_address parts;
4705 int cost = 1;
4707 if (!ix86_decompose_address (x, &parts))
4708 abort ();
4710 /* More complex memory references are better. */
4711 if (parts.disp && parts.disp != const0_rtx)
4712 cost--;
4713 if (parts.seg != SEG_DEFAULT)
4714 cost--;
4716 /* Attempt to minimize number of registers in the address. */
4717 if ((parts.base
4718 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4719 || (parts.index
4720 && (!REG_P (parts.index)
4721 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4722 cost++;
4724 if (parts.base
4725 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4726 && parts.index
4727 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4728 && parts.base != parts.index)
4729 cost++;
4731 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4732 since it's predecode logic can't detect the length of instructions
4733 and it degenerates to vector decoded. Increase cost of such
4734 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4735 to split such addresses or even refuse such addresses at all.
4737 Following addressing modes are affected:
4738 [base+scale*index]
4739 [scale*index+disp]
4740 [base+index]
4742 The first and last case may be avoidable by explicitly coding the zero in
4743 memory address, but I don't have AMD-K6 machine handy to check this
4744 theory. */
4746 if (TARGET_K6
4747 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4748 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4749 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4750 cost += 10;
4752 return cost;
4755 /* If X is a machine specific address (i.e. a symbol or label being
4756 referenced as a displacement from the GOT implemented using an
4757 UNSPEC), then return the base term. Otherwise return X. */
4760 ix86_find_base_term (rtx x)
4762 rtx term;
4764 if (TARGET_64BIT)
4766 if (GET_CODE (x) != CONST)
4767 return x;
4768 term = XEXP (x, 0);
4769 if (GET_CODE (term) == PLUS
4770 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4771 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4772 term = XEXP (term, 0);
4773 if (GET_CODE (term) != UNSPEC
4774 || XINT (term, 1) != UNSPEC_GOTPCREL)
4775 return x;
4777 term = XVECEXP (term, 0, 0);
4779 if (GET_CODE (term) != SYMBOL_REF
4780 && GET_CODE (term) != LABEL_REF)
4781 return x;
4783 return term;
4786 term = ix86_delegitimize_address (x);
4788 if (GET_CODE (term) != SYMBOL_REF
4789 && GET_CODE (term) != LABEL_REF)
4790 return x;
4792 return term;
4795 /* Determine if a given RTX is a valid constant. We already know this
4796 satisfies CONSTANT_P. */
4798 bool
4799 legitimate_constant_p (rtx x)
4801 rtx inner;
4803 switch (GET_CODE (x))
4805 case SYMBOL_REF:
4806 /* TLS symbols are not constant. */
4807 if (tls_symbolic_operand (x, Pmode))
4808 return false;
4809 break;
4811 case CONST:
4812 inner = XEXP (x, 0);
4814 /* Offsets of TLS symbols are never valid.
4815 Discourage CSE from creating them. */
4816 if (GET_CODE (inner) == PLUS
4817 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4818 return false;
4820 if (GET_CODE (inner) == PLUS
4821 || GET_CODE (inner) == MINUS)
4823 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
4824 return false;
4825 inner = XEXP (inner, 0);
4828 /* Only some unspecs are valid as "constants". */
4829 if (GET_CODE (inner) == UNSPEC)
4830 switch (XINT (inner, 1))
4832 case UNSPEC_TPOFF:
4833 case UNSPEC_NTPOFF:
4834 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4835 case UNSPEC_DTPOFF:
4836 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4837 default:
4838 return false;
4840 break;
4842 default:
4843 break;
4846 /* Otherwise we handle everything else in the move patterns. */
4847 return true;
4850 /* Determine if it's legal to put X into the constant pool. This
4851 is not possible for the address of thread-local symbols, which
4852 is checked above. */
4854 static bool
4855 ix86_cannot_force_const_mem (rtx x)
4857 return !legitimate_constant_p (x);
4860 /* Determine if a given RTX is a valid constant address. */
4862 bool
4863 constant_address_p (rtx x)
4865 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4868 /* Nonzero if the constant value X is a legitimate general operand
4869 when generating PIC code. It is given that flag_pic is on and
4870 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4872 bool
4873 legitimate_pic_operand_p (rtx x)
4875 rtx inner;
4877 switch (GET_CODE (x))
4879 case CONST:
4880 inner = XEXP (x, 0);
4882 /* Only some unspecs are valid as "constants". */
4883 if (GET_CODE (inner) == UNSPEC)
4884 switch (XINT (inner, 1))
4886 case UNSPEC_TPOFF:
4887 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4888 default:
4889 return false;
4891 /* FALLTHRU */
4893 case SYMBOL_REF:
4894 case LABEL_REF:
4895 return legitimate_pic_address_disp_p (x);
4897 default:
4898 return true;
4902 /* Determine if a given CONST RTX is a valid memory displacement
4903 in PIC mode. */
4906 legitimate_pic_address_disp_p (rtx disp)
4908 bool saw_plus;
4910 /* In 64bit mode we can allow direct addresses of symbols and labels
4911 when they are not dynamic symbols. */
4912 if (TARGET_64BIT)
4914 /* TLS references should always be enclosed in UNSPEC. */
4915 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4916 return 0;
4917 if (GET_CODE (disp) == SYMBOL_REF
4918 && ix86_cmodel == CM_SMALL_PIC
4919 && SYMBOL_REF_LOCAL_P (disp))
4920 return 1;
4921 if (GET_CODE (disp) == LABEL_REF)
4922 return 1;
4923 if (GET_CODE (disp) == CONST
4924 && GET_CODE (XEXP (disp, 0)) == PLUS)
4926 rtx op0 = XEXP (XEXP (disp, 0), 0);
4927 rtx op1 = XEXP (XEXP (disp, 0), 1);
4929 /* TLS references should always be enclosed in UNSPEC. */
4930 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4931 return 0;
4932 if (((GET_CODE (op0) == SYMBOL_REF
4933 && ix86_cmodel == CM_SMALL_PIC
4934 && SYMBOL_REF_LOCAL_P (op0))
4935 || GET_CODE (op0) == LABEL_REF)
4936 && GET_CODE (op1) == CONST_INT
4937 && INTVAL (op1) < 16*1024*1024
4938 && INTVAL (op1) >= -16*1024*1024)
4939 return 1;
4942 if (GET_CODE (disp) != CONST)
4943 return 0;
4944 disp = XEXP (disp, 0);
4946 if (TARGET_64BIT)
4948 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4949 of GOT tables. We should not need these anyway. */
4950 if (GET_CODE (disp) != UNSPEC
4951 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4952 return 0;
4954 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4955 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4956 return 0;
4957 return 1;
4960 saw_plus = false;
4961 if (GET_CODE (disp) == PLUS)
4963 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4964 return 0;
4965 disp = XEXP (disp, 0);
4966 saw_plus = true;
4969 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4970 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4972 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4973 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4974 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4976 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4977 if (! strcmp (sym_name, "<pic base>"))
4978 return 1;
4982 if (GET_CODE (disp) != UNSPEC)
4983 return 0;
4985 switch (XINT (disp, 1))
4987 case UNSPEC_GOT:
4988 if (saw_plus)
4989 return false;
4990 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4991 case UNSPEC_GOTOFF:
4992 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
4993 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
4994 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4995 return false;
4996 case UNSPEC_GOTTPOFF:
4997 case UNSPEC_GOTNTPOFF:
4998 case UNSPEC_INDNTPOFF:
4999 if (saw_plus)
5000 return false;
5001 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5002 case UNSPEC_NTPOFF:
5003 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5004 case UNSPEC_DTPOFF:
5005 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5008 return 0;
5011 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5012 memory address for an instruction. The MODE argument is the machine mode
5013 for the MEM expression that wants to use this address.
5015 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5016 convert common non-canonical forms to canonical form so that they will
5017 be recognized. */
5020 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5022 struct ix86_address parts;
5023 rtx base, index, disp;
5024 HOST_WIDE_INT scale;
5025 const char *reason = NULL;
5026 rtx reason_rtx = NULL_RTX;
5028 if (TARGET_DEBUG_ADDR)
5030 fprintf (stderr,
5031 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5032 GET_MODE_NAME (mode), strict);
5033 debug_rtx (addr);
5036 if (ix86_decompose_address (addr, &parts) <= 0)
5038 reason = "decomposition failed";
5039 goto report_error;
5042 base = parts.base;
5043 index = parts.index;
5044 disp = parts.disp;
5045 scale = parts.scale;
5047 /* Validate base register.
5049 Don't allow SUBREG's here, it can lead to spill failures when the base
5050 is one word out of a two word structure, which is represented internally
5051 as a DImode int. */
5053 if (base)
5055 reason_rtx = base;
5057 if (GET_CODE (base) != REG)
5059 reason = "base is not a register";
5060 goto report_error;
5063 if (GET_MODE (base) != Pmode)
5065 reason = "base is not in Pmode";
5066 goto report_error;
5069 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5070 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5072 reason = "base is not valid";
5073 goto report_error;
5077 /* Validate index register.
5079 Don't allow SUBREG's here, it can lead to spill failures when the index
5080 is one word out of a two word structure, which is represented internally
5081 as a DImode int. */
5083 if (index)
5085 reason_rtx = index;
5087 if (GET_CODE (index) != REG)
5089 reason = "index is not a register";
5090 goto report_error;
5093 if (GET_MODE (index) != Pmode)
5095 reason = "index is not in Pmode";
5096 goto report_error;
5099 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5100 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5102 reason = "index is not valid";
5103 goto report_error;
5107 /* Validate scale factor. */
5108 if (scale != 1)
5110 reason_rtx = GEN_INT (scale);
5111 if (!index)
5113 reason = "scale without index";
5114 goto report_error;
5117 if (scale != 2 && scale != 4 && scale != 8)
5119 reason = "scale is not a valid multiplier";
5120 goto report_error;
5124 /* Validate displacement. */
5125 if (disp)
5127 reason_rtx = disp;
5129 if (GET_CODE (disp) == CONST
5130 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5131 switch (XINT (XEXP (disp, 0), 1))
5133 case UNSPEC_GOT:
5134 case UNSPEC_GOTOFF:
5135 case UNSPEC_GOTPCREL:
5136 if (!flag_pic)
5137 abort ();
5138 goto is_legitimate_pic;
5140 case UNSPEC_GOTTPOFF:
5141 case UNSPEC_GOTNTPOFF:
5142 case UNSPEC_INDNTPOFF:
5143 case UNSPEC_NTPOFF:
5144 case UNSPEC_DTPOFF:
5145 break;
5147 default:
5148 reason = "invalid address unspec";
5149 goto report_error;
5152 else if (flag_pic && (SYMBOLIC_CONST (disp)
5153 #if TARGET_MACHO
5154 && !machopic_operand_p (disp)
5155 #endif
5158 is_legitimate_pic:
5159 if (TARGET_64BIT && (index || base))
5161 /* foo@dtpoff(%rX) is ok. */
5162 if (GET_CODE (disp) != CONST
5163 || GET_CODE (XEXP (disp, 0)) != PLUS
5164 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5165 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5166 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5167 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5169 reason = "non-constant pic memory reference";
5170 goto report_error;
5173 else if (! legitimate_pic_address_disp_p (disp))
5175 reason = "displacement is an invalid pic construct";
5176 goto report_error;
5179 /* This code used to verify that a symbolic pic displacement
5180 includes the pic_offset_table_rtx register.
5182 While this is good idea, unfortunately these constructs may
5183 be created by "adds using lea" optimization for incorrect
5184 code like:
5186 int a;
5187 int foo(int i)
5189 return *(&a+i);
5192 This code is nonsensical, but results in addressing
5193 GOT table with pic_offset_table_rtx base. We can't
5194 just refuse it easily, since it gets matched by
5195 "addsi3" pattern, that later gets split to lea in the
5196 case output register differs from input. While this
5197 can be handled by separate addsi pattern for this case
5198 that never results in lea, this seems to be easier and
5199 correct fix for crash to disable this test. */
5201 else if (GET_CODE (disp) != LABEL_REF
5202 && GET_CODE (disp) != CONST_INT
5203 && (GET_CODE (disp) != CONST
5204 || !legitimate_constant_p (disp))
5205 && (GET_CODE (disp) != SYMBOL_REF
5206 || !legitimate_constant_p (disp)))
5208 reason = "displacement is not constant";
5209 goto report_error;
5211 else if (TARGET_64BIT
5212 && !x86_64_immediate_operand (disp, VOIDmode))
5214 reason = "displacement is out of range";
5215 goto report_error;
5219 /* Everything looks valid. */
5220 if (TARGET_DEBUG_ADDR)
5221 fprintf (stderr, "Success.\n");
5222 return TRUE;
5224 report_error:
5225 if (TARGET_DEBUG_ADDR)
5227 fprintf (stderr, "Error: %s\n", reason);
5228 debug_rtx (reason_rtx);
5230 return FALSE;
5233 /* Return an unique alias set for the GOT. */
5235 static HOST_WIDE_INT
5236 ix86_GOT_alias_set (void)
5238 static HOST_WIDE_INT set = -1;
5239 if (set == -1)
5240 set = new_alias_set ();
5241 return set;
5244 /* Return a legitimate reference for ORIG (an address) using the
5245 register REG. If REG is 0, a new pseudo is generated.
5247 There are two types of references that must be handled:
5249 1. Global data references must load the address from the GOT, via
5250 the PIC reg. An insn is emitted to do this load, and the reg is
5251 returned.
5253 2. Static data references, constant pool addresses, and code labels
5254 compute the address as an offset from the GOT, whose base is in
5255 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5256 differentiate them from global data objects. The returned
5257 address is the PIC reg + an unspec constant.
5259 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5260 reg also appears in the address. */
5262 static rtx
5263 legitimize_pic_address (rtx orig, rtx reg)
5265 rtx addr = orig;
5266 rtx new = orig;
5267 rtx base;
5269 #if TARGET_MACHO
5270 if (reg == 0)
5271 reg = gen_reg_rtx (Pmode);
5272 /* Use the generic Mach-O PIC machinery. */
5273 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5274 #endif
5276 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5277 new = addr;
5278 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5280 /* This symbol may be referenced via a displacement from the PIC
5281 base address (@GOTOFF). */
5283 if (reload_in_progress)
5284 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5285 if (GET_CODE (addr) == CONST)
5286 addr = XEXP (addr, 0);
5287 if (GET_CODE (addr) == PLUS)
5289 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5290 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5292 else
5293 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5294 new = gen_rtx_CONST (Pmode, new);
5295 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5297 if (reg != 0)
5299 emit_move_insn (reg, new);
5300 new = reg;
5303 else if (GET_CODE (addr) == SYMBOL_REF)
5305 if (TARGET_64BIT)
5307 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5308 new = gen_rtx_CONST (Pmode, new);
5309 new = gen_rtx_MEM (Pmode, new);
5310 MEM_READONLY_P (new) = 1;
5311 set_mem_alias_set (new, ix86_GOT_alias_set ());
5313 if (reg == 0)
5314 reg = gen_reg_rtx (Pmode);
5315 /* Use directly gen_movsi, otherwise the address is loaded
5316 into register for CSE. We don't want to CSE this addresses,
5317 instead we CSE addresses from the GOT table, so skip this. */
5318 emit_insn (gen_movsi (reg, new));
5319 new = reg;
5321 else
5323 /* This symbol must be referenced via a load from the
5324 Global Offset Table (@GOT). */
5326 if (reload_in_progress)
5327 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5328 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5329 new = gen_rtx_CONST (Pmode, new);
5330 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5331 new = gen_rtx_MEM (Pmode, new);
5332 MEM_READONLY_P (new) = 1;
5333 set_mem_alias_set (new, ix86_GOT_alias_set ());
5335 if (reg == 0)
5336 reg = gen_reg_rtx (Pmode);
5337 emit_move_insn (reg, new);
5338 new = reg;
5341 else
5343 if (GET_CODE (addr) == CONST)
5345 addr = XEXP (addr, 0);
5347 /* We must match stuff we generate before. Assume the only
5348 unspecs that can get here are ours. Not that we could do
5349 anything with them anyway.... */
5350 if (GET_CODE (addr) == UNSPEC
5351 || (GET_CODE (addr) == PLUS
5352 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5353 return orig;
5354 if (GET_CODE (addr) != PLUS)
5355 abort ();
5357 if (GET_CODE (addr) == PLUS)
5359 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5361 /* Check first to see if this is a constant offset from a @GOTOFF
5362 symbol reference. */
5363 if (local_symbolic_operand (op0, Pmode)
5364 && GET_CODE (op1) == CONST_INT)
5366 if (!TARGET_64BIT)
5368 if (reload_in_progress)
5369 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5370 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5371 UNSPEC_GOTOFF);
5372 new = gen_rtx_PLUS (Pmode, new, op1);
5373 new = gen_rtx_CONST (Pmode, new);
5374 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5376 if (reg != 0)
5378 emit_move_insn (reg, new);
5379 new = reg;
5382 else
5384 if (INTVAL (op1) < -16*1024*1024
5385 || INTVAL (op1) >= 16*1024*1024)
5386 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5389 else
5391 base = legitimize_pic_address (XEXP (addr, 0), reg);
5392 new = legitimize_pic_address (XEXP (addr, 1),
5393 base == reg ? NULL_RTX : reg);
5395 if (GET_CODE (new) == CONST_INT)
5396 new = plus_constant (base, INTVAL (new));
5397 else
5399 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5401 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5402 new = XEXP (new, 1);
5404 new = gen_rtx_PLUS (Pmode, base, new);
5409 return new;
5412 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5414 static rtx
5415 get_thread_pointer (int to_reg)
5417 rtx tp, reg, insn;
5419 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5420 if (!to_reg)
5421 return tp;
5423 reg = gen_reg_rtx (Pmode);
5424 insn = gen_rtx_SET (VOIDmode, reg, tp);
5425 insn = emit_insn (insn);
5427 return reg;
5430 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5431 false if we expect this to be used for a memory address and true if
5432 we expect to load the address into a register. */
5434 static rtx
5435 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5437 rtx dest, base, off, pic;
5438 int type;
5440 switch (model)
5442 case TLS_MODEL_GLOBAL_DYNAMIC:
5443 dest = gen_reg_rtx (Pmode);
5444 if (TARGET_64BIT)
5446 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5448 start_sequence ();
5449 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5450 insns = get_insns ();
5451 end_sequence ();
5453 emit_libcall_block (insns, dest, rax, x);
5455 else
5456 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5457 break;
5459 case TLS_MODEL_LOCAL_DYNAMIC:
5460 base = gen_reg_rtx (Pmode);
5461 if (TARGET_64BIT)
5463 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5465 start_sequence ();
5466 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5467 insns = get_insns ();
5468 end_sequence ();
5470 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5471 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5472 emit_libcall_block (insns, base, rax, note);
5474 else
5475 emit_insn (gen_tls_local_dynamic_base_32 (base));
5477 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5478 off = gen_rtx_CONST (Pmode, off);
5480 return gen_rtx_PLUS (Pmode, base, off);
5482 case TLS_MODEL_INITIAL_EXEC:
5483 if (TARGET_64BIT)
5485 pic = NULL;
5486 type = UNSPEC_GOTNTPOFF;
5488 else if (flag_pic)
5490 if (reload_in_progress)
5491 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5492 pic = pic_offset_table_rtx;
5493 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5495 else if (!TARGET_GNU_TLS)
5497 pic = gen_reg_rtx (Pmode);
5498 emit_insn (gen_set_got (pic));
5499 type = UNSPEC_GOTTPOFF;
5501 else
5503 pic = NULL;
5504 type = UNSPEC_INDNTPOFF;
5507 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5508 off = gen_rtx_CONST (Pmode, off);
5509 if (pic)
5510 off = gen_rtx_PLUS (Pmode, pic, off);
5511 off = gen_rtx_MEM (Pmode, off);
5512 MEM_READONLY_P (off) = 1;
5513 set_mem_alias_set (off, ix86_GOT_alias_set ());
5515 if (TARGET_64BIT || TARGET_GNU_TLS)
5517 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5518 off = force_reg (Pmode, off);
5519 return gen_rtx_PLUS (Pmode, base, off);
5521 else
5523 base = get_thread_pointer (true);
5524 dest = gen_reg_rtx (Pmode);
5525 emit_insn (gen_subsi3 (dest, base, off));
5527 break;
5529 case TLS_MODEL_LOCAL_EXEC:
5530 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5531 (TARGET_64BIT || TARGET_GNU_TLS)
5532 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5533 off = gen_rtx_CONST (Pmode, off);
5535 if (TARGET_64BIT || TARGET_GNU_TLS)
5537 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5538 return gen_rtx_PLUS (Pmode, base, off);
5540 else
5542 base = get_thread_pointer (true);
5543 dest = gen_reg_rtx (Pmode);
5544 emit_insn (gen_subsi3 (dest, base, off));
5546 break;
5548 default:
5549 abort ();
5552 return dest;
5555 /* Try machine-dependent ways of modifying an illegitimate address
5556 to be legitimate. If we find one, return the new, valid address.
5557 This macro is used in only one place: `memory_address' in explow.c.
5559 OLDX is the address as it was before break_out_memory_refs was called.
5560 In some cases it is useful to look at this to decide what needs to be done.
5562 MODE and WIN are passed so that this macro can use
5563 GO_IF_LEGITIMATE_ADDRESS.
5565 It is always safe for this macro to do nothing. It exists to recognize
5566 opportunities to optimize the output.
5568 For the 80386, we handle X+REG by loading X into a register R and
5569 using R+REG. R will go in a general reg and indexing will be used.
5570 However, if REG is a broken-out memory address or multiplication,
5571 nothing needs to be done because REG can certainly go in a general reg.
5573 When -fpic is used, special handling is needed for symbolic references.
5574 See comments by legitimize_pic_address in i386.c for details. */
5577 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5579 int changed = 0;
5580 unsigned log;
5582 if (TARGET_DEBUG_ADDR)
5584 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5585 GET_MODE_NAME (mode));
5586 debug_rtx (x);
5589 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5590 if (log)
5591 return legitimize_tls_address (x, log, false);
5592 if (GET_CODE (x) == CONST
5593 && GET_CODE (XEXP (x, 0)) == PLUS
5594 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5595 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5597 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5598 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5601 if (flag_pic && SYMBOLIC_CONST (x))
5602 return legitimize_pic_address (x, 0);
5604 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5605 if (GET_CODE (x) == ASHIFT
5606 && GET_CODE (XEXP (x, 1)) == CONST_INT
5607 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5609 changed = 1;
5610 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5611 GEN_INT (1 << log));
5614 if (GET_CODE (x) == PLUS)
5616 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5618 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5619 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5620 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5622 changed = 1;
5623 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5624 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5625 GEN_INT (1 << log));
5628 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5629 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5630 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5632 changed = 1;
5633 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5634 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5635 GEN_INT (1 << log));
5638 /* Put multiply first if it isn't already. */
5639 if (GET_CODE (XEXP (x, 1)) == MULT)
5641 rtx tmp = XEXP (x, 0);
5642 XEXP (x, 0) = XEXP (x, 1);
5643 XEXP (x, 1) = tmp;
5644 changed = 1;
5647 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5648 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5649 created by virtual register instantiation, register elimination, and
5650 similar optimizations. */
5651 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5653 changed = 1;
5654 x = gen_rtx_PLUS (Pmode,
5655 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5656 XEXP (XEXP (x, 1), 0)),
5657 XEXP (XEXP (x, 1), 1));
5660 /* Canonicalize
5661 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5662 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5663 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5664 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5665 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5666 && CONSTANT_P (XEXP (x, 1)))
5668 rtx constant;
5669 rtx other = NULL_RTX;
5671 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5673 constant = XEXP (x, 1);
5674 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5676 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5678 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5679 other = XEXP (x, 1);
5681 else
5682 constant = 0;
5684 if (constant)
5686 changed = 1;
5687 x = gen_rtx_PLUS (Pmode,
5688 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5689 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5690 plus_constant (other, INTVAL (constant)));
5694 if (changed && legitimate_address_p (mode, x, FALSE))
5695 return x;
5697 if (GET_CODE (XEXP (x, 0)) == MULT)
5699 changed = 1;
5700 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5703 if (GET_CODE (XEXP (x, 1)) == MULT)
5705 changed = 1;
5706 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5709 if (changed
5710 && GET_CODE (XEXP (x, 1)) == REG
5711 && GET_CODE (XEXP (x, 0)) == REG)
5712 return x;
5714 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5716 changed = 1;
5717 x = legitimize_pic_address (x, 0);
5720 if (changed && legitimate_address_p (mode, x, FALSE))
5721 return x;
5723 if (GET_CODE (XEXP (x, 0)) == REG)
5725 rtx temp = gen_reg_rtx (Pmode);
5726 rtx val = force_operand (XEXP (x, 1), temp);
5727 if (val != temp)
5728 emit_move_insn (temp, val);
5730 XEXP (x, 1) = temp;
5731 return x;
5734 else if (GET_CODE (XEXP (x, 1)) == REG)
5736 rtx temp = gen_reg_rtx (Pmode);
5737 rtx val = force_operand (XEXP (x, 0), temp);
5738 if (val != temp)
5739 emit_move_insn (temp, val);
5741 XEXP (x, 0) = temp;
5742 return x;
5746 return x;
5749 /* Print an integer constant expression in assembler syntax. Addition
5750 and subtraction are the only arithmetic that may appear in these
5751 expressions. FILE is the stdio stream to write to, X is the rtx, and
5752 CODE is the operand print code from the output string. */
5754 static void
5755 output_pic_addr_const (FILE *file, rtx x, int code)
5757 char buf[256];
5759 switch (GET_CODE (x))
5761 case PC:
5762 if (flag_pic)
5763 putc ('.', file);
5764 else
5765 abort ();
5766 break;
5768 case SYMBOL_REF:
5769 /* Mark the decl as referenced so that cgraph will output the function. */
5770 if (SYMBOL_REF_DECL (x))
5771 mark_decl_referenced (SYMBOL_REF_DECL (x));
5773 assemble_name (file, XSTR (x, 0));
5774 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5775 fputs ("@PLT", file);
5776 break;
5778 case LABEL_REF:
5779 x = XEXP (x, 0);
5780 /* FALLTHRU */
5781 case CODE_LABEL:
5782 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5783 assemble_name (asm_out_file, buf);
5784 break;
5786 case CONST_INT:
5787 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5788 break;
5790 case CONST:
5791 /* This used to output parentheses around the expression,
5792 but that does not work on the 386 (either ATT or BSD assembler). */
5793 output_pic_addr_const (file, XEXP (x, 0), code);
5794 break;
5796 case CONST_DOUBLE:
5797 if (GET_MODE (x) == VOIDmode)
5799 /* We can use %d if the number is <32 bits and positive. */
5800 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5801 fprintf (file, "0x%lx%08lx",
5802 (unsigned long) CONST_DOUBLE_HIGH (x),
5803 (unsigned long) CONST_DOUBLE_LOW (x));
5804 else
5805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5807 else
5808 /* We can't handle floating point constants;
5809 PRINT_OPERAND must handle them. */
5810 output_operand_lossage ("floating constant misused");
5811 break;
5813 case PLUS:
5814 /* Some assemblers need integer constants to appear first. */
5815 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5817 output_pic_addr_const (file, XEXP (x, 0), code);
5818 putc ('+', file);
5819 output_pic_addr_const (file, XEXP (x, 1), code);
5821 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5823 output_pic_addr_const (file, XEXP (x, 1), code);
5824 putc ('+', file);
5825 output_pic_addr_const (file, XEXP (x, 0), code);
5827 else
5828 abort ();
5829 break;
5831 case MINUS:
5832 if (!TARGET_MACHO)
5833 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5834 output_pic_addr_const (file, XEXP (x, 0), code);
5835 putc ('-', file);
5836 output_pic_addr_const (file, XEXP (x, 1), code);
5837 if (!TARGET_MACHO)
5838 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5839 break;
5841 case UNSPEC:
5842 if (XVECLEN (x, 0) != 1)
5843 abort ();
5844 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5845 switch (XINT (x, 1))
5847 case UNSPEC_GOT:
5848 fputs ("@GOT", file);
5849 break;
5850 case UNSPEC_GOTOFF:
5851 fputs ("@GOTOFF", file);
5852 break;
5853 case UNSPEC_GOTPCREL:
5854 fputs ("@GOTPCREL(%rip)", file);
5855 break;
5856 case UNSPEC_GOTTPOFF:
5857 /* FIXME: This might be @TPOFF in Sun ld too. */
5858 fputs ("@GOTTPOFF", file);
5859 break;
5860 case UNSPEC_TPOFF:
5861 fputs ("@TPOFF", file);
5862 break;
5863 case UNSPEC_NTPOFF:
5864 if (TARGET_64BIT)
5865 fputs ("@TPOFF", file);
5866 else
5867 fputs ("@NTPOFF", file);
5868 break;
5869 case UNSPEC_DTPOFF:
5870 fputs ("@DTPOFF", file);
5871 break;
5872 case UNSPEC_GOTNTPOFF:
5873 if (TARGET_64BIT)
5874 fputs ("@GOTTPOFF(%rip)", file);
5875 else
5876 fputs ("@GOTNTPOFF", file);
5877 break;
5878 case UNSPEC_INDNTPOFF:
5879 fputs ("@INDNTPOFF", file);
5880 break;
5881 default:
5882 output_operand_lossage ("invalid UNSPEC as operand");
5883 break;
5885 break;
5887 default:
5888 output_operand_lossage ("invalid expression as operand");
5892 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5893 We need to handle our special PIC relocations. */
5895 void
5896 i386_dwarf_output_addr_const (FILE *file, rtx x)
5898 #ifdef ASM_QUAD
5899 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5900 #else
5901 if (TARGET_64BIT)
5902 abort ();
5903 fprintf (file, "%s", ASM_LONG);
5904 #endif
5905 if (flag_pic)
5906 output_pic_addr_const (file, x, '\0');
5907 else
5908 output_addr_const (file, x);
5909 fputc ('\n', file);
5912 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5913 We need to emit DTP-relative relocations. */
5915 void
5916 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5918 fputs (ASM_LONG, file);
5919 output_addr_const (file, x);
5920 fputs ("@DTPOFF", file);
5921 switch (size)
5923 case 4:
5924 break;
5925 case 8:
5926 fputs (", 0", file);
5927 break;
5928 default:
5929 abort ();
5933 /* In the name of slightly smaller debug output, and to cater to
5934 general assembler losage, recognize PIC+GOTOFF and turn it back
5935 into a direct symbol reference. */
5937 static rtx
5938 ix86_delegitimize_address (rtx orig_x)
5940 rtx x = orig_x, y;
5942 if (GET_CODE (x) == MEM)
5943 x = XEXP (x, 0);
5945 if (TARGET_64BIT)
5947 if (GET_CODE (x) != CONST
5948 || GET_CODE (XEXP (x, 0)) != UNSPEC
5949 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5950 || GET_CODE (orig_x) != MEM)
5951 return orig_x;
5952 return XVECEXP (XEXP (x, 0), 0, 0);
5955 if (GET_CODE (x) != PLUS
5956 || GET_CODE (XEXP (x, 1)) != CONST)
5957 return orig_x;
5959 if (GET_CODE (XEXP (x, 0)) == REG
5960 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5961 /* %ebx + GOT/GOTOFF */
5962 y = NULL;
5963 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5965 /* %ebx + %reg * scale + GOT/GOTOFF */
5966 y = XEXP (x, 0);
5967 if (GET_CODE (XEXP (y, 0)) == REG
5968 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5969 y = XEXP (y, 1);
5970 else if (GET_CODE (XEXP (y, 1)) == REG
5971 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5972 y = XEXP (y, 0);
5973 else
5974 return orig_x;
5975 if (GET_CODE (y) != REG
5976 && GET_CODE (y) != MULT
5977 && GET_CODE (y) != ASHIFT)
5978 return orig_x;
5980 else
5981 return orig_x;
5983 x = XEXP (XEXP (x, 1), 0);
5984 if (GET_CODE (x) == UNSPEC
5985 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5986 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5988 if (y)
5989 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5990 return XVECEXP (x, 0, 0);
5993 if (GET_CODE (x) == PLUS
5994 && GET_CODE (XEXP (x, 0)) == UNSPEC
5995 && GET_CODE (XEXP (x, 1)) == CONST_INT
5996 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5997 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5998 && GET_CODE (orig_x) != MEM)))
6000 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6001 if (y)
6002 return gen_rtx_PLUS (Pmode, y, x);
6003 return x;
6006 return orig_x;
6009 static void
6010 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6011 int fp, FILE *file)
6013 const char *suffix;
6015 if (mode == CCFPmode || mode == CCFPUmode)
6017 enum rtx_code second_code, bypass_code;
6018 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6019 if (bypass_code != NIL || second_code != NIL)
6020 abort ();
6021 code = ix86_fp_compare_code_to_integer (code);
6022 mode = CCmode;
6024 if (reverse)
6025 code = reverse_condition (code);
6027 switch (code)
6029 case EQ:
6030 suffix = "e";
6031 break;
6032 case NE:
6033 suffix = "ne";
6034 break;
6035 case GT:
6036 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6037 abort ();
6038 suffix = "g";
6039 break;
6040 case GTU:
6041 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6042 Those same assemblers have the same but opposite losage on cmov. */
6043 if (mode != CCmode)
6044 abort ();
6045 suffix = fp ? "nbe" : "a";
6046 break;
6047 case LT:
6048 if (mode == CCNOmode || mode == CCGOCmode)
6049 suffix = "s";
6050 else if (mode == CCmode || mode == CCGCmode)
6051 suffix = "l";
6052 else
6053 abort ();
6054 break;
6055 case LTU:
6056 if (mode != CCmode)
6057 abort ();
6058 suffix = "b";
6059 break;
6060 case GE:
6061 if (mode == CCNOmode || mode == CCGOCmode)
6062 suffix = "ns";
6063 else if (mode == CCmode || mode == CCGCmode)
6064 suffix = "ge";
6065 else
6066 abort ();
6067 break;
6068 case GEU:
6069 /* ??? As above. */
6070 if (mode != CCmode)
6071 abort ();
6072 suffix = fp ? "nb" : "ae";
6073 break;
6074 case LE:
6075 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6076 abort ();
6077 suffix = "le";
6078 break;
6079 case LEU:
6080 if (mode != CCmode)
6081 abort ();
6082 suffix = "be";
6083 break;
6084 case UNORDERED:
6085 suffix = fp ? "u" : "p";
6086 break;
6087 case ORDERED:
6088 suffix = fp ? "nu" : "np";
6089 break;
6090 default:
6091 abort ();
6093 fputs (suffix, file);
6096 /* Print the name of register X to FILE based on its machine mode and number.
6097 If CODE is 'w', pretend the mode is HImode.
6098 If CODE is 'b', pretend the mode is QImode.
6099 If CODE is 'k', pretend the mode is SImode.
6100 If CODE is 'q', pretend the mode is DImode.
6101 If CODE is 'h', pretend the reg is the `high' byte register.
6102 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6104 void
6105 print_reg (rtx x, int code, FILE *file)
6107 if (REGNO (x) == ARG_POINTER_REGNUM
6108 || REGNO (x) == FRAME_POINTER_REGNUM
6109 || REGNO (x) == FLAGS_REG
6110 || REGNO (x) == FPSR_REG)
6111 abort ();
6113 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6114 putc ('%', file);
6116 if (code == 'w' || MMX_REG_P (x))
6117 code = 2;
6118 else if (code == 'b')
6119 code = 1;
6120 else if (code == 'k')
6121 code = 4;
6122 else if (code == 'q')
6123 code = 8;
6124 else if (code == 'y')
6125 code = 3;
6126 else if (code == 'h')
6127 code = 0;
6128 else
6129 code = GET_MODE_SIZE (GET_MODE (x));
6131 /* Irritatingly, AMD extended registers use different naming convention
6132 from the normal registers. */
6133 if (REX_INT_REG_P (x))
6135 if (!TARGET_64BIT)
6136 abort ();
6137 switch (code)
6139 case 0:
6140 error ("extended registers have no high halves");
6141 break;
6142 case 1:
6143 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6144 break;
6145 case 2:
6146 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6147 break;
6148 case 4:
6149 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6150 break;
6151 case 8:
6152 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6153 break;
6154 default:
6155 error ("unsupported operand size for extended register");
6156 break;
6158 return;
6160 switch (code)
6162 case 3:
6163 if (STACK_TOP_P (x))
6165 fputs ("st(0)", file);
6166 break;
6168 /* FALLTHRU */
6169 case 8:
6170 case 4:
6171 case 12:
6172 if (! ANY_FP_REG_P (x))
6173 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6174 /* FALLTHRU */
6175 case 16:
6176 case 2:
6177 normal:
6178 fputs (hi_reg_name[REGNO (x)], file);
6179 break;
6180 case 1:
6181 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6182 goto normal;
6183 fputs (qi_reg_name[REGNO (x)], file);
6184 break;
6185 case 0:
6186 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6187 goto normal;
6188 fputs (qi_high_reg_name[REGNO (x)], file);
6189 break;
6190 default:
6191 abort ();
6195 /* Locate some local-dynamic symbol still in use by this function
6196 so that we can print its name in some tls_local_dynamic_base
6197 pattern. */
6199 static const char *
6200 get_some_local_dynamic_name (void)
6202 rtx insn;
6204 if (cfun->machine->some_ld_name)
6205 return cfun->machine->some_ld_name;
6207 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6208 if (INSN_P (insn)
6209 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6210 return cfun->machine->some_ld_name;
6212 abort ();
6215 static int
6216 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6218 rtx x = *px;
6220 if (GET_CODE (x) == SYMBOL_REF
6221 && local_dynamic_symbolic_operand (x, Pmode))
6223 cfun->machine->some_ld_name = XSTR (x, 0);
6224 return 1;
6227 return 0;
6230 /* Meaning of CODE:
6231 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6232 C -- print opcode suffix for set/cmov insn.
6233 c -- like C, but print reversed condition
6234 F,f -- likewise, but for floating-point.
6235 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6236 otherwise nothing
6237 R -- print the prefix for register names.
6238 z -- print the opcode suffix for the size of the current operand.
6239 * -- print a star (in certain assembler syntax)
6240 A -- print an absolute memory reference.
6241 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6242 s -- print a shift double count, followed by the assemblers argument
6243 delimiter.
6244 b -- print the QImode name of the register for the indicated operand.
6245 %b0 would print %al if operands[0] is reg 0.
6246 w -- likewise, print the HImode name of the register.
6247 k -- likewise, print the SImode name of the register.
6248 q -- likewise, print the DImode name of the register.
6249 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6250 y -- print "st(0)" instead of "st" as a register.
6251 D -- print condition for SSE cmp instruction.
6252 P -- if PIC, print an @PLT suffix.
6253 X -- don't print any sort of PIC '@' suffix for a symbol.
6254 & -- print some in-use local-dynamic symbol name.
6257 void
6258 print_operand (FILE *file, rtx x, int code)
6260 if (code)
6262 switch (code)
6264 case '*':
6265 if (ASSEMBLER_DIALECT == ASM_ATT)
6266 putc ('*', file);
6267 return;
6269 case '&':
6270 assemble_name (file, get_some_local_dynamic_name ());
6271 return;
6273 case 'A':
6274 if (ASSEMBLER_DIALECT == ASM_ATT)
6275 putc ('*', file);
6276 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6278 /* Intel syntax. For absolute addresses, registers should not
6279 be surrounded by braces. */
6280 if (GET_CODE (x) != REG)
6282 putc ('[', file);
6283 PRINT_OPERAND (file, x, 0);
6284 putc (']', file);
6285 return;
6288 else
6289 abort ();
6291 PRINT_OPERAND (file, x, 0);
6292 return;
6295 case 'L':
6296 if (ASSEMBLER_DIALECT == ASM_ATT)
6297 putc ('l', file);
6298 return;
6300 case 'W':
6301 if (ASSEMBLER_DIALECT == ASM_ATT)
6302 putc ('w', file);
6303 return;
6305 case 'B':
6306 if (ASSEMBLER_DIALECT == ASM_ATT)
6307 putc ('b', file);
6308 return;
6310 case 'Q':
6311 if (ASSEMBLER_DIALECT == ASM_ATT)
6312 putc ('l', file);
6313 return;
6315 case 'S':
6316 if (ASSEMBLER_DIALECT == ASM_ATT)
6317 putc ('s', file);
6318 return;
6320 case 'T':
6321 if (ASSEMBLER_DIALECT == ASM_ATT)
6322 putc ('t', file);
6323 return;
6325 case 'z':
6326 /* 387 opcodes don't get size suffixes if the operands are
6327 registers. */
6328 if (STACK_REG_P (x))
6329 return;
6331 /* Likewise if using Intel opcodes. */
6332 if (ASSEMBLER_DIALECT == ASM_INTEL)
6333 return;
6335 /* This is the size of op from size of operand. */
6336 switch (GET_MODE_SIZE (GET_MODE (x)))
6338 case 2:
6339 #ifdef HAVE_GAS_FILDS_FISTS
6340 putc ('s', file);
6341 #endif
6342 return;
6344 case 4:
6345 if (GET_MODE (x) == SFmode)
6347 putc ('s', file);
6348 return;
6350 else
6351 putc ('l', file);
6352 return;
6354 case 12:
6355 case 16:
6356 putc ('t', file);
6357 return;
6359 case 8:
6360 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6362 #ifdef GAS_MNEMONICS
6363 putc ('q', file);
6364 #else
6365 putc ('l', file);
6366 putc ('l', file);
6367 #endif
6369 else
6370 putc ('l', file);
6371 return;
6373 default:
6374 abort ();
6377 case 'b':
6378 case 'w':
6379 case 'k':
6380 case 'q':
6381 case 'h':
6382 case 'y':
6383 case 'X':
6384 case 'P':
6385 break;
6387 case 's':
6388 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6390 PRINT_OPERAND (file, x, 0);
6391 putc (',', file);
6393 return;
6395 case 'D':
6396 /* Little bit of braindamage here. The SSE compare instructions
6397 does use completely different names for the comparisons that the
6398 fp conditional moves. */
6399 switch (GET_CODE (x))
6401 case EQ:
6402 case UNEQ:
6403 fputs ("eq", file);
6404 break;
6405 case LT:
6406 case UNLT:
6407 fputs ("lt", file);
6408 break;
6409 case LE:
6410 case UNLE:
6411 fputs ("le", file);
6412 break;
6413 case UNORDERED:
6414 fputs ("unord", file);
6415 break;
6416 case NE:
6417 case LTGT:
6418 fputs ("neq", file);
6419 break;
6420 case UNGE:
6421 case GE:
6422 fputs ("nlt", file);
6423 break;
6424 case UNGT:
6425 case GT:
6426 fputs ("nle", file);
6427 break;
6428 case ORDERED:
6429 fputs ("ord", file);
6430 break;
6431 default:
6432 abort ();
6433 break;
6435 return;
6436 case 'O':
6437 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6438 if (ASSEMBLER_DIALECT == ASM_ATT)
6440 switch (GET_MODE (x))
6442 case HImode: putc ('w', file); break;
6443 case SImode:
6444 case SFmode: putc ('l', file); break;
6445 case DImode:
6446 case DFmode: putc ('q', file); break;
6447 default: abort ();
6449 putc ('.', file);
6451 #endif
6452 return;
6453 case 'C':
6454 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6455 return;
6456 case 'F':
6457 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6458 if (ASSEMBLER_DIALECT == ASM_ATT)
6459 putc ('.', file);
6460 #endif
6461 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6462 return;
6464 /* Like above, but reverse condition */
6465 case 'c':
6466 /* Check to see if argument to %c is really a constant
6467 and not a condition code which needs to be reversed. */
6468 if (!COMPARISON_P (x))
6470 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6471 return;
6473 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6474 return;
6475 case 'f':
6476 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6477 if (ASSEMBLER_DIALECT == ASM_ATT)
6478 putc ('.', file);
6479 #endif
6480 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6481 return;
6482 case '+':
6484 rtx x;
6486 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6487 return;
6489 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6490 if (x)
6492 int pred_val = INTVAL (XEXP (x, 0));
6494 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6495 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6497 int taken = pred_val > REG_BR_PROB_BASE / 2;
6498 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6500 /* Emit hints only in the case default branch prediction
6501 heuristics would fail. */
6502 if (taken != cputaken)
6504 /* We use 3e (DS) prefix for taken branches and
6505 2e (CS) prefix for not taken branches. */
6506 if (taken)
6507 fputs ("ds ; ", file);
6508 else
6509 fputs ("cs ; ", file);
6513 return;
6515 default:
6516 output_operand_lossage ("invalid operand code `%c'", code);
6520 if (GET_CODE (x) == REG)
6521 print_reg (x, code, file);
6523 else if (GET_CODE (x) == MEM)
6525 /* No `byte ptr' prefix for call instructions. */
6526 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6528 const char * size;
6529 switch (GET_MODE_SIZE (GET_MODE (x)))
6531 case 1: size = "BYTE"; break;
6532 case 2: size = "WORD"; break;
6533 case 4: size = "DWORD"; break;
6534 case 8: size = "QWORD"; break;
6535 case 12: size = "XWORD"; break;
6536 case 16: size = "XMMWORD"; break;
6537 default:
6538 abort ();
6541 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6542 if (code == 'b')
6543 size = "BYTE";
6544 else if (code == 'w')
6545 size = "WORD";
6546 else if (code == 'k')
6547 size = "DWORD";
6549 fputs (size, file);
6550 fputs (" PTR ", file);
6553 x = XEXP (x, 0);
6554 /* Avoid (%rip) for call operands. */
6555 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6556 && GET_CODE (x) != CONST_INT)
6557 output_addr_const (file, x);
6558 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6559 output_operand_lossage ("invalid constraints for operand");
6560 else
6561 output_address (x);
6564 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6566 REAL_VALUE_TYPE r;
6567 long l;
6569 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6570 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6572 if (ASSEMBLER_DIALECT == ASM_ATT)
6573 putc ('$', file);
6574 fprintf (file, "0x%08lx", l);
6577 /* These float cases don't actually occur as immediate operands. */
6578 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6580 char dstr[30];
6582 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6583 fprintf (file, "%s", dstr);
6586 else if (GET_CODE (x) == CONST_DOUBLE
6587 && GET_MODE (x) == XFmode)
6589 char dstr[30];
6591 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6592 fprintf (file, "%s", dstr);
6595 else
6597 if (code != 'P')
6599 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6601 if (ASSEMBLER_DIALECT == ASM_ATT)
6602 putc ('$', file);
6604 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6605 || GET_CODE (x) == LABEL_REF)
6607 if (ASSEMBLER_DIALECT == ASM_ATT)
6608 putc ('$', file);
6609 else
6610 fputs ("OFFSET FLAT:", file);
6613 if (GET_CODE (x) == CONST_INT)
6614 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6615 else if (flag_pic)
6616 output_pic_addr_const (file, x, code);
6617 else
6618 output_addr_const (file, x);
6622 /* Print a memory operand whose address is ADDR. */
6624 void
6625 print_operand_address (FILE *file, rtx addr)
6627 struct ix86_address parts;
6628 rtx base, index, disp;
6629 int scale;
6631 if (! ix86_decompose_address (addr, &parts))
6632 abort ();
6634 base = parts.base;
6635 index = parts.index;
6636 disp = parts.disp;
6637 scale = parts.scale;
6639 switch (parts.seg)
6641 case SEG_DEFAULT:
6642 break;
6643 case SEG_FS:
6644 case SEG_GS:
6645 if (USER_LABEL_PREFIX[0] == 0)
6646 putc ('%', file);
6647 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6648 break;
6649 default:
6650 abort ();
6653 if (!base && !index)
6655 /* Displacement only requires special attention. */
6657 if (GET_CODE (disp) == CONST_INT)
6659 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6661 if (USER_LABEL_PREFIX[0] == 0)
6662 putc ('%', file);
6663 fputs ("ds:", file);
6665 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6667 else if (flag_pic)
6668 output_pic_addr_const (file, disp, 0);
6669 else
6670 output_addr_const (file, disp);
6672 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6673 if (TARGET_64BIT
6674 && ((GET_CODE (disp) == SYMBOL_REF
6675 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6676 || GET_CODE (disp) == LABEL_REF
6677 || (GET_CODE (disp) == CONST
6678 && GET_CODE (XEXP (disp, 0)) == PLUS
6679 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6680 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6681 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6682 fputs ("(%rip)", file);
6684 else
6686 if (ASSEMBLER_DIALECT == ASM_ATT)
6688 if (disp)
6690 if (flag_pic)
6691 output_pic_addr_const (file, disp, 0);
6692 else if (GET_CODE (disp) == LABEL_REF)
6693 output_asm_label (disp);
6694 else
6695 output_addr_const (file, disp);
6698 putc ('(', file);
6699 if (base)
6700 print_reg (base, 0, file);
6701 if (index)
6703 putc (',', file);
6704 print_reg (index, 0, file);
6705 if (scale != 1)
6706 fprintf (file, ",%d", scale);
6708 putc (')', file);
6710 else
6712 rtx offset = NULL_RTX;
6714 if (disp)
6716 /* Pull out the offset of a symbol; print any symbol itself. */
6717 if (GET_CODE (disp) == CONST
6718 && GET_CODE (XEXP (disp, 0)) == PLUS
6719 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6721 offset = XEXP (XEXP (disp, 0), 1);
6722 disp = gen_rtx_CONST (VOIDmode,
6723 XEXP (XEXP (disp, 0), 0));
6726 if (flag_pic)
6727 output_pic_addr_const (file, disp, 0);
6728 else if (GET_CODE (disp) == LABEL_REF)
6729 output_asm_label (disp);
6730 else if (GET_CODE (disp) == CONST_INT)
6731 offset = disp;
6732 else
6733 output_addr_const (file, disp);
6736 putc ('[', file);
6737 if (base)
6739 print_reg (base, 0, file);
6740 if (offset)
6742 if (INTVAL (offset) >= 0)
6743 putc ('+', file);
6744 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6747 else if (offset)
6748 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6749 else
6750 putc ('0', file);
6752 if (index)
6754 putc ('+', file);
6755 print_reg (index, 0, file);
6756 if (scale != 1)
6757 fprintf (file, "*%d", scale);
6759 putc (']', file);
6764 bool
6765 output_addr_const_extra (FILE *file, rtx x)
6767 rtx op;
6769 if (GET_CODE (x) != UNSPEC)
6770 return false;
6772 op = XVECEXP (x, 0, 0);
6773 switch (XINT (x, 1))
6775 case UNSPEC_GOTTPOFF:
6776 output_addr_const (file, op);
6777 /* FIXME: This might be @TPOFF in Sun ld. */
6778 fputs ("@GOTTPOFF", file);
6779 break;
6780 case UNSPEC_TPOFF:
6781 output_addr_const (file, op);
6782 fputs ("@TPOFF", file);
6783 break;
6784 case UNSPEC_NTPOFF:
6785 output_addr_const (file, op);
6786 if (TARGET_64BIT)
6787 fputs ("@TPOFF", file);
6788 else
6789 fputs ("@NTPOFF", file);
6790 break;
6791 case UNSPEC_DTPOFF:
6792 output_addr_const (file, op);
6793 fputs ("@DTPOFF", file);
6794 break;
6795 case UNSPEC_GOTNTPOFF:
6796 output_addr_const (file, op);
6797 if (TARGET_64BIT)
6798 fputs ("@GOTTPOFF(%rip)", file);
6799 else
6800 fputs ("@GOTNTPOFF", file);
6801 break;
6802 case UNSPEC_INDNTPOFF:
6803 output_addr_const (file, op);
6804 fputs ("@INDNTPOFF", file);
6805 break;
6807 default:
6808 return false;
6811 return true;
6814 /* Split one or more DImode RTL references into pairs of SImode
6815 references. The RTL can be REG, offsettable MEM, integer constant, or
6816 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6817 split and "num" is its length. lo_half and hi_half are output arrays
6818 that parallel "operands". */
6820 void
6821 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6823 while (num--)
6825 rtx op = operands[num];
6827 /* simplify_subreg refuse to split volatile memory addresses,
6828 but we still have to handle it. */
6829 if (GET_CODE (op) == MEM)
6831 lo_half[num] = adjust_address (op, SImode, 0);
6832 hi_half[num] = adjust_address (op, SImode, 4);
6834 else
6836 lo_half[num] = simplify_gen_subreg (SImode, op,
6837 GET_MODE (op) == VOIDmode
6838 ? DImode : GET_MODE (op), 0);
6839 hi_half[num] = simplify_gen_subreg (SImode, op,
6840 GET_MODE (op) == VOIDmode
6841 ? DImode : GET_MODE (op), 4);
6845 /* Split one or more TImode RTL references into pairs of SImode
6846 references. The RTL can be REG, offsettable MEM, integer constant, or
6847 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6848 split and "num" is its length. lo_half and hi_half are output arrays
6849 that parallel "operands". */
6851 void
6852 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6854 while (num--)
6856 rtx op = operands[num];
6858 /* simplify_subreg refuse to split volatile memory addresses, but we
6859 still have to handle it. */
6860 if (GET_CODE (op) == MEM)
6862 lo_half[num] = adjust_address (op, DImode, 0);
6863 hi_half[num] = adjust_address (op, DImode, 8);
6865 else
6867 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6868 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6873 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6874 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6875 is the expression of the binary operation. The output may either be
6876 emitted here, or returned to the caller, like all output_* functions.
6878 There is no guarantee that the operands are the same mode, as they
6879 might be within FLOAT or FLOAT_EXTEND expressions. */
6881 #ifndef SYSV386_COMPAT
6882 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6883 wants to fix the assemblers because that causes incompatibility
6884 with gcc. No-one wants to fix gcc because that causes
6885 incompatibility with assemblers... You can use the option of
6886 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6887 #define SYSV386_COMPAT 1
6888 #endif
6890 const char *
6891 output_387_binary_op (rtx insn, rtx *operands)
6893 static char buf[30];
6894 const char *p;
6895 const char *ssep;
6896 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6898 #ifdef ENABLE_CHECKING
6899 /* Even if we do not want to check the inputs, this documents input
6900 constraints. Which helps in understanding the following code. */
6901 if (STACK_REG_P (operands[0])
6902 && ((REG_P (operands[1])
6903 && REGNO (operands[0]) == REGNO (operands[1])
6904 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6905 || (REG_P (operands[2])
6906 && REGNO (operands[0]) == REGNO (operands[2])
6907 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6908 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6909 ; /* ok */
6910 else if (!is_sse)
6911 abort ();
6912 #endif
6914 switch (GET_CODE (operands[3]))
6916 case PLUS:
6917 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6918 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6919 p = "fiadd";
6920 else
6921 p = "fadd";
6922 ssep = "add";
6923 break;
6925 case MINUS:
6926 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6927 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6928 p = "fisub";
6929 else
6930 p = "fsub";
6931 ssep = "sub";
6932 break;
6934 case MULT:
6935 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6936 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6937 p = "fimul";
6938 else
6939 p = "fmul";
6940 ssep = "mul";
6941 break;
6943 case DIV:
6944 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6945 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6946 p = "fidiv";
6947 else
6948 p = "fdiv";
6949 ssep = "div";
6950 break;
6952 default:
6953 abort ();
6956 if (is_sse)
6958 strcpy (buf, ssep);
6959 if (GET_MODE (operands[0]) == SFmode)
6960 strcat (buf, "ss\t{%2, %0|%0, %2}");
6961 else
6962 strcat (buf, "sd\t{%2, %0|%0, %2}");
6963 return buf;
6965 strcpy (buf, p);
6967 switch (GET_CODE (operands[3]))
6969 case MULT:
6970 case PLUS:
6971 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6973 rtx temp = operands[2];
6974 operands[2] = operands[1];
6975 operands[1] = temp;
6978 /* know operands[0] == operands[1]. */
6980 if (GET_CODE (operands[2]) == MEM)
6982 p = "%z2\t%2";
6983 break;
6986 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6988 if (STACK_TOP_P (operands[0]))
6989 /* How is it that we are storing to a dead operand[2]?
6990 Well, presumably operands[1] is dead too. We can't
6991 store the result to st(0) as st(0) gets popped on this
6992 instruction. Instead store to operands[2] (which I
6993 think has to be st(1)). st(1) will be popped later.
6994 gcc <= 2.8.1 didn't have this check and generated
6995 assembly code that the Unixware assembler rejected. */
6996 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6997 else
6998 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6999 break;
7002 if (STACK_TOP_P (operands[0]))
7003 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7004 else
7005 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7006 break;
7008 case MINUS:
7009 case DIV:
7010 if (GET_CODE (operands[1]) == MEM)
7012 p = "r%z1\t%1";
7013 break;
7016 if (GET_CODE (operands[2]) == MEM)
7018 p = "%z2\t%2";
7019 break;
7022 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7024 #if SYSV386_COMPAT
7025 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7026 derived assemblers, confusingly reverse the direction of
7027 the operation for fsub{r} and fdiv{r} when the
7028 destination register is not st(0). The Intel assembler
7029 doesn't have this brain damage. Read !SYSV386_COMPAT to
7030 figure out what the hardware really does. */
7031 if (STACK_TOP_P (operands[0]))
7032 p = "{p\t%0, %2|rp\t%2, %0}";
7033 else
7034 p = "{rp\t%2, %0|p\t%0, %2}";
7035 #else
7036 if (STACK_TOP_P (operands[0]))
7037 /* As above for fmul/fadd, we can't store to st(0). */
7038 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7039 else
7040 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7041 #endif
7042 break;
7045 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7047 #if SYSV386_COMPAT
7048 if (STACK_TOP_P (operands[0]))
7049 p = "{rp\t%0, %1|p\t%1, %0}";
7050 else
7051 p = "{p\t%1, %0|rp\t%0, %1}";
7052 #else
7053 if (STACK_TOP_P (operands[0]))
7054 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7055 else
7056 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7057 #endif
7058 break;
7061 if (STACK_TOP_P (operands[0]))
7063 if (STACK_TOP_P (operands[1]))
7064 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7065 else
7066 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7067 break;
7069 else if (STACK_TOP_P (operands[1]))
7071 #if SYSV386_COMPAT
7072 p = "{\t%1, %0|r\t%0, %1}";
7073 #else
7074 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7075 #endif
7077 else
7079 #if SYSV386_COMPAT
7080 p = "{r\t%2, %0|\t%0, %2}";
7081 #else
7082 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7083 #endif
7085 break;
7087 default:
7088 abort ();
7091 strcat (buf, p);
7092 return buf;
7095 /* Output code to initialize control word copies used by
7096 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7097 is set to control word rounding downwards. */
7098 void
7099 emit_i387_cw_initialization (rtx normal, rtx round_down)
7101 rtx reg = gen_reg_rtx (HImode);
7103 emit_insn (gen_x86_fnstcw_1 (normal));
7104 emit_move_insn (reg, normal);
7105 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7106 && !TARGET_64BIT)
7107 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7108 else
7109 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7110 emit_move_insn (round_down, reg);
7113 /* Output code for INSN to convert a float to a signed int. OPERANDS
7114 are the insn operands. The output may be [HSD]Imode and the input
7115 operand may be [SDX]Fmode. */
7117 const char *
7118 output_fix_trunc (rtx insn, rtx *operands)
7120 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7121 int dimode_p = GET_MODE (operands[0]) == DImode;
7123 /* Jump through a hoop or two for DImode, since the hardware has no
7124 non-popping instruction. We used to do this a different way, but
7125 that was somewhat fragile and broke with post-reload splitters. */
7126 if (dimode_p && !stack_top_dies)
7127 output_asm_insn ("fld\t%y1", operands);
7129 if (!STACK_TOP_P (operands[1]))
7130 abort ();
7132 if (GET_CODE (operands[0]) != MEM)
7133 abort ();
7135 output_asm_insn ("fldcw\t%3", operands);
7136 if (stack_top_dies || dimode_p)
7137 output_asm_insn ("fistp%z0\t%0", operands);
7138 else
7139 output_asm_insn ("fist%z0\t%0", operands);
7140 output_asm_insn ("fldcw\t%2", operands);
7142 return "";
7145 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7146 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7147 when fucom should be used. */
7149 const char *
7150 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7152 int stack_top_dies;
7153 rtx cmp_op0 = operands[0];
7154 rtx cmp_op1 = operands[1];
7155 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7157 if (eflags_p == 2)
7159 cmp_op0 = cmp_op1;
7160 cmp_op1 = operands[2];
7162 if (is_sse)
7164 if (GET_MODE (operands[0]) == SFmode)
7165 if (unordered_p)
7166 return "ucomiss\t{%1, %0|%0, %1}";
7167 else
7168 return "comiss\t{%1, %0|%0, %1}";
7169 else
7170 if (unordered_p)
7171 return "ucomisd\t{%1, %0|%0, %1}";
7172 else
7173 return "comisd\t{%1, %0|%0, %1}";
7176 if (! STACK_TOP_P (cmp_op0))
7177 abort ();
7179 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7181 if (STACK_REG_P (cmp_op1)
7182 && stack_top_dies
7183 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7184 && REGNO (cmp_op1) != FIRST_STACK_REG)
7186 /* If both the top of the 387 stack dies, and the other operand
7187 is also a stack register that dies, then this must be a
7188 `fcompp' float compare */
7190 if (eflags_p == 1)
7192 /* There is no double popping fcomi variant. Fortunately,
7193 eflags is immune from the fstp's cc clobbering. */
7194 if (unordered_p)
7195 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7196 else
7197 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7198 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7200 else
7202 if (eflags_p == 2)
7204 if (unordered_p)
7205 return "fucompp\n\tfnstsw\t%0";
7206 else
7207 return "fcompp\n\tfnstsw\t%0";
7209 else
7211 if (unordered_p)
7212 return "fucompp";
7213 else
7214 return "fcompp";
7218 else
7220 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7222 static const char * const alt[24] =
7224 "fcom%z1\t%y1",
7225 "fcomp%z1\t%y1",
7226 "fucom%z1\t%y1",
7227 "fucomp%z1\t%y1",
7229 "ficom%z1\t%y1",
7230 "ficomp%z1\t%y1",
7231 NULL,
7232 NULL,
7234 "fcomi\t{%y1, %0|%0, %y1}",
7235 "fcomip\t{%y1, %0|%0, %y1}",
7236 "fucomi\t{%y1, %0|%0, %y1}",
7237 "fucomip\t{%y1, %0|%0, %y1}",
7239 NULL,
7240 NULL,
7241 NULL,
7242 NULL,
7244 "fcom%z2\t%y2\n\tfnstsw\t%0",
7245 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7246 "fucom%z2\t%y2\n\tfnstsw\t%0",
7247 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7249 "ficom%z2\t%y2\n\tfnstsw\t%0",
7250 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7251 NULL,
7252 NULL
7255 int mask;
7256 const char *ret;
7258 mask = eflags_p << 3;
7259 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7260 mask |= unordered_p << 1;
7261 mask |= stack_top_dies;
7263 if (mask >= 24)
7264 abort ();
7265 ret = alt[mask];
7266 if (ret == NULL)
7267 abort ();
7269 return ret;
7273 void
7274 ix86_output_addr_vec_elt (FILE *file, int value)
7276 const char *directive = ASM_LONG;
7278 if (TARGET_64BIT)
7280 #ifdef ASM_QUAD
7281 directive = ASM_QUAD;
7282 #else
7283 abort ();
7284 #endif
7287 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7290 void
7291 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7293 if (TARGET_64BIT)
7294 fprintf (file, "%s%s%d-%s%d\n",
7295 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7296 else if (HAVE_AS_GOTOFF_IN_DATA)
7297 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7298 #if TARGET_MACHO
7299 else if (TARGET_MACHO)
7301 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7302 machopic_output_function_base_name (file);
7303 fprintf(file, "\n");
7305 #endif
7306 else
7307 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7308 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7311 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7312 for the target. */
7314 void
7315 ix86_expand_clear (rtx dest)
7317 rtx tmp;
7319 /* We play register width games, which are only valid after reload. */
7320 if (!reload_completed)
7321 abort ();
7323 /* Avoid HImode and its attendant prefix byte. */
7324 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7325 dest = gen_rtx_REG (SImode, REGNO (dest));
7327 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7329 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7330 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7332 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7333 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7336 emit_insn (tmp);
7339 /* X is an unchanging MEM. If it is a constant pool reference, return
7340 the constant pool rtx, else NULL. */
7343 maybe_get_pool_constant (rtx x)
7345 x = ix86_delegitimize_address (XEXP (x, 0));
7347 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7348 return get_pool_constant (x);
7350 return NULL_RTX;
7353 void
7354 ix86_expand_move (enum machine_mode mode, rtx operands[])
7356 int strict = (reload_in_progress || reload_completed);
7357 rtx op0, op1;
7358 enum tls_model model;
7360 op0 = operands[0];
7361 op1 = operands[1];
7363 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7364 if (model)
7366 op1 = legitimize_tls_address (op1, model, true);
7367 op1 = force_operand (op1, op0);
7368 if (op1 == op0)
7369 return;
7372 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7374 #if TARGET_MACHO
7375 if (MACHOPIC_PURE)
7377 rtx temp = ((reload_in_progress
7378 || ((op0 && GET_CODE (op0) == REG)
7379 && mode == Pmode))
7380 ? op0 : gen_reg_rtx (Pmode));
7381 op1 = machopic_indirect_data_reference (op1, temp);
7382 op1 = machopic_legitimize_pic_address (op1, mode,
7383 temp == op1 ? 0 : temp);
7385 else if (MACHOPIC_INDIRECT)
7386 op1 = machopic_indirect_data_reference (op1, 0);
7387 if (op0 == op1)
7388 return;
7389 #else
7390 if (GET_CODE (op0) == MEM)
7391 op1 = force_reg (Pmode, op1);
7392 else
7393 op1 = legitimize_address (op1, op1, Pmode);
7394 #endif /* TARGET_MACHO */
7396 else
7398 if (GET_CODE (op0) == MEM
7399 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7400 || !push_operand (op0, mode))
7401 && GET_CODE (op1) == MEM)
7402 op1 = force_reg (mode, op1);
7404 if (push_operand (op0, mode)
7405 && ! general_no_elim_operand (op1, mode))
7406 op1 = copy_to_mode_reg (mode, op1);
7408 /* Force large constants in 64bit compilation into register
7409 to get them CSEed. */
7410 if (TARGET_64BIT && mode == DImode
7411 && immediate_operand (op1, mode)
7412 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7413 && !register_operand (op0, mode)
7414 && optimize && !reload_completed && !reload_in_progress)
7415 op1 = copy_to_mode_reg (mode, op1);
7417 if (FLOAT_MODE_P (mode))
7419 /* If we are loading a floating point constant to a register,
7420 force the value to memory now, since we'll get better code
7421 out the back end. */
7423 if (strict)
7425 else if (GET_CODE (op1) == CONST_DOUBLE)
7427 op1 = validize_mem (force_const_mem (mode, op1));
7428 if (!register_operand (op0, mode))
7430 rtx temp = gen_reg_rtx (mode);
7431 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7432 emit_move_insn (op0, temp);
7433 return;
7439 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7442 void
7443 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7445 /* Force constants other than zero into memory. We do not know how
7446 the instructions used to build constants modify the upper 64 bits
7447 of the register, once we have that information we may be able
7448 to handle some of them more efficiently. */
7449 if ((reload_in_progress | reload_completed) == 0
7450 && register_operand (operands[0], mode)
7451 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7452 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7454 /* Make operand1 a register if it isn't already. */
7455 if (!no_new_pseudos
7456 && !register_operand (operands[0], mode)
7457 && !register_operand (operands[1], mode))
7459 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7460 emit_move_insn (operands[0], temp);
7461 return;
7464 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7467 /* Attempt to expand a binary operator. Make the expansion closer to the
7468 actual machine, then just general_operand, which will allow 3 separate
7469 memory references (one output, two input) in a single insn. */
7471 void
7472 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7473 rtx operands[])
7475 int matching_memory;
7476 rtx src1, src2, dst, op, clob;
7478 dst = operands[0];
7479 src1 = operands[1];
7480 src2 = operands[2];
7482 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7483 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7484 && (rtx_equal_p (dst, src2)
7485 || immediate_operand (src1, mode)))
7487 rtx temp = src1;
7488 src1 = src2;
7489 src2 = temp;
7492 /* If the destination is memory, and we do not have matching source
7493 operands, do things in registers. */
7494 matching_memory = 0;
7495 if (GET_CODE (dst) == MEM)
7497 if (rtx_equal_p (dst, src1))
7498 matching_memory = 1;
7499 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7500 && rtx_equal_p (dst, src2))
7501 matching_memory = 2;
7502 else
7503 dst = gen_reg_rtx (mode);
7506 /* Both source operands cannot be in memory. */
7507 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7509 if (matching_memory != 2)
7510 src2 = force_reg (mode, src2);
7511 else
7512 src1 = force_reg (mode, src1);
7515 /* If the operation is not commutable, source 1 cannot be a constant
7516 or non-matching memory. */
7517 if ((CONSTANT_P (src1)
7518 || (!matching_memory && GET_CODE (src1) == MEM))
7519 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7520 src1 = force_reg (mode, src1);
7522 /* If optimizing, copy to regs to improve CSE */
7523 if (optimize && ! no_new_pseudos)
7525 if (GET_CODE (dst) == MEM)
7526 dst = gen_reg_rtx (mode);
7527 if (GET_CODE (src1) == MEM)
7528 src1 = force_reg (mode, src1);
7529 if (GET_CODE (src2) == MEM)
7530 src2 = force_reg (mode, src2);
7533 /* Emit the instruction. */
7535 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7536 if (reload_in_progress)
7538 /* Reload doesn't know about the flags register, and doesn't know that
7539 it doesn't want to clobber it. We can only do this with PLUS. */
7540 if (code != PLUS)
7541 abort ();
7542 emit_insn (op);
7544 else
7546 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7547 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7550 /* Fix up the destination if needed. */
7551 if (dst != operands[0])
7552 emit_move_insn (operands[0], dst);
7555 /* Return TRUE or FALSE depending on whether the binary operator meets the
7556 appropriate constraints. */
7559 ix86_binary_operator_ok (enum rtx_code code,
7560 enum machine_mode mode ATTRIBUTE_UNUSED,
7561 rtx operands[3])
7563 /* Both source operands cannot be in memory. */
7564 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7565 return 0;
7566 /* If the operation is not commutable, source 1 cannot be a constant. */
7567 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7568 return 0;
7569 /* If the destination is memory, we must have a matching source operand. */
7570 if (GET_CODE (operands[0]) == MEM
7571 && ! (rtx_equal_p (operands[0], operands[1])
7572 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7573 && rtx_equal_p (operands[0], operands[2]))))
7574 return 0;
7575 /* If the operation is not commutable and the source 1 is memory, we must
7576 have a matching destination. */
7577 if (GET_CODE (operands[1]) == MEM
7578 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7579 && ! rtx_equal_p (operands[0], operands[1]))
7580 return 0;
7581 return 1;
7584 /* Attempt to expand a unary operator. Make the expansion closer to the
7585 actual machine, then just general_operand, which will allow 2 separate
7586 memory references (one output, one input) in a single insn. */
7588 void
7589 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7590 rtx operands[])
7592 int matching_memory;
7593 rtx src, dst, op, clob;
7595 dst = operands[0];
7596 src = operands[1];
7598 /* If the destination is memory, and we do not have matching source
7599 operands, do things in registers. */
7600 matching_memory = 0;
7601 if (GET_CODE (dst) == MEM)
7603 if (rtx_equal_p (dst, src))
7604 matching_memory = 1;
7605 else
7606 dst = gen_reg_rtx (mode);
7609 /* When source operand is memory, destination must match. */
7610 if (!matching_memory && GET_CODE (src) == MEM)
7611 src = force_reg (mode, src);
7613 /* If optimizing, copy to regs to improve CSE */
7614 if (optimize && ! no_new_pseudos)
7616 if (GET_CODE (dst) == MEM)
7617 dst = gen_reg_rtx (mode);
7618 if (GET_CODE (src) == MEM)
7619 src = force_reg (mode, src);
7622 /* Emit the instruction. */
7624 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7625 if (reload_in_progress || code == NOT)
7627 /* Reload doesn't know about the flags register, and doesn't know that
7628 it doesn't want to clobber it. */
7629 if (code != NOT)
7630 abort ();
7631 emit_insn (op);
7633 else
7635 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7636 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7639 /* Fix up the destination if needed. */
7640 if (dst != operands[0])
7641 emit_move_insn (operands[0], dst);
7644 /* Return TRUE or FALSE depending on whether the unary operator meets the
7645 appropriate constraints. */
7648 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7649 enum machine_mode mode ATTRIBUTE_UNUSED,
7650 rtx operands[2] ATTRIBUTE_UNUSED)
7652 /* If one of operands is memory, source and destination must match. */
7653 if ((GET_CODE (operands[0]) == MEM
7654 || GET_CODE (operands[1]) == MEM)
7655 && ! rtx_equal_p (operands[0], operands[1]))
7656 return FALSE;
7657 return TRUE;
7660 /* Return TRUE or FALSE depending on whether the first SET in INSN
7661 has source and destination with matching CC modes, and that the
7662 CC mode is at least as constrained as REQ_MODE. */
7665 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7667 rtx set;
7668 enum machine_mode set_mode;
7670 set = PATTERN (insn);
7671 if (GET_CODE (set) == PARALLEL)
7672 set = XVECEXP (set, 0, 0);
7673 if (GET_CODE (set) != SET)
7674 abort ();
7675 if (GET_CODE (SET_SRC (set)) != COMPARE)
7676 abort ();
7678 set_mode = GET_MODE (SET_DEST (set));
7679 switch (set_mode)
7681 case CCNOmode:
7682 if (req_mode != CCNOmode
7683 && (req_mode != CCmode
7684 || XEXP (SET_SRC (set), 1) != const0_rtx))
7685 return 0;
7686 break;
7687 case CCmode:
7688 if (req_mode == CCGCmode)
7689 return 0;
7690 /* FALLTHRU */
7691 case CCGCmode:
7692 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7693 return 0;
7694 /* FALLTHRU */
7695 case CCGOCmode:
7696 if (req_mode == CCZmode)
7697 return 0;
7698 /* FALLTHRU */
7699 case CCZmode:
7700 break;
7702 default:
7703 abort ();
7706 return (GET_MODE (SET_SRC (set)) == set_mode);
7709 /* Generate insn patterns to do an integer compare of OPERANDS. */
7711 static rtx
7712 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7714 enum machine_mode cmpmode;
7715 rtx tmp, flags;
7717 cmpmode = SELECT_CC_MODE (code, op0, op1);
7718 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7720 /* This is very simple, but making the interface the same as in the
7721 FP case makes the rest of the code easier. */
7722 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7723 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7725 /* Return the test that should be put into the flags user, i.e.
7726 the bcc, scc, or cmov instruction. */
7727 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7730 /* Figure out whether to use ordered or unordered fp comparisons.
7731 Return the appropriate mode to use. */
7733 enum machine_mode
7734 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7736 /* ??? In order to make all comparisons reversible, we do all comparisons
7737 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7738 all forms trapping and nontrapping comparisons, we can make inequality
7739 comparisons trapping again, since it results in better code when using
7740 FCOM based compares. */
7741 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7744 enum machine_mode
7745 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7747 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7748 return ix86_fp_compare_mode (code);
7749 switch (code)
7751 /* Only zero flag is needed. */
7752 case EQ: /* ZF=0 */
7753 case NE: /* ZF!=0 */
7754 return CCZmode;
7755 /* Codes needing carry flag. */
7756 case GEU: /* CF=0 */
7757 case GTU: /* CF=0 & ZF=0 */
7758 case LTU: /* CF=1 */
7759 case LEU: /* CF=1 | ZF=1 */
7760 return CCmode;
7761 /* Codes possibly doable only with sign flag when
7762 comparing against zero. */
7763 case GE: /* SF=OF or SF=0 */
7764 case LT: /* SF<>OF or SF=1 */
7765 if (op1 == const0_rtx)
7766 return CCGOCmode;
7767 else
7768 /* For other cases Carry flag is not required. */
7769 return CCGCmode;
7770 /* Codes doable only with sign flag when comparing
7771 against zero, but we miss jump instruction for it
7772 so we need to use relational tests against overflow
7773 that thus needs to be zero. */
7774 case GT: /* ZF=0 & SF=OF */
7775 case LE: /* ZF=1 | SF<>OF */
7776 if (op1 == const0_rtx)
7777 return CCNOmode;
7778 else
7779 return CCGCmode;
7780 /* strcmp pattern do (use flags) and combine may ask us for proper
7781 mode. */
7782 case USE:
7783 return CCmode;
7784 default:
7785 abort ();
7789 /* Return the fixed registers used for condition codes. */
7791 static bool
7792 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7794 *p1 = FLAGS_REG;
7795 *p2 = FPSR_REG;
7796 return true;
7799 /* If two condition code modes are compatible, return a condition code
7800 mode which is compatible with both. Otherwise, return
7801 VOIDmode. */
7803 static enum machine_mode
7804 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7806 if (m1 == m2)
7807 return m1;
7809 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7810 return VOIDmode;
7812 if ((m1 == CCGCmode && m2 == CCGOCmode)
7813 || (m1 == CCGOCmode && m2 == CCGCmode))
7814 return CCGCmode;
7816 switch (m1)
7818 default:
7819 abort ();
7821 case CCmode:
7822 case CCGCmode:
7823 case CCGOCmode:
7824 case CCNOmode:
7825 case CCZmode:
7826 switch (m2)
7828 default:
7829 return VOIDmode;
7831 case CCmode:
7832 case CCGCmode:
7833 case CCGOCmode:
7834 case CCNOmode:
7835 case CCZmode:
7836 return CCmode;
7839 case CCFPmode:
7840 case CCFPUmode:
7841 /* These are only compatible with themselves, which we already
7842 checked above. */
7843 return VOIDmode;
7847 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7850 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
7852 enum rtx_code swapped_code = swap_condition (code);
7853 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7854 || (ix86_fp_comparison_cost (swapped_code)
7855 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7858 /* Swap, force into registers, or otherwise massage the two operands
7859 to a fp comparison. The operands are updated in place; the new
7860 comparison code is returned. */
7862 static enum rtx_code
7863 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
7865 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7866 rtx op0 = *pop0, op1 = *pop1;
7867 enum machine_mode op_mode = GET_MODE (op0);
7868 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7870 /* All of the unordered compare instructions only work on registers.
7871 The same is true of the XFmode compare instructions. The same is
7872 true of the fcomi compare instructions. */
7874 if (!is_sse
7875 && (fpcmp_mode == CCFPUmode
7876 || op_mode == XFmode
7877 || ix86_use_fcomi_compare (code)))
7879 op0 = force_reg (op_mode, op0);
7880 op1 = force_reg (op_mode, op1);
7882 else
7884 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7885 things around if they appear profitable, otherwise force op0
7886 into a register. */
7888 if (standard_80387_constant_p (op0) == 0
7889 || (GET_CODE (op0) == MEM
7890 && ! (standard_80387_constant_p (op1) == 0
7891 || GET_CODE (op1) == MEM)))
7893 rtx tmp;
7894 tmp = op0, op0 = op1, op1 = tmp;
7895 code = swap_condition (code);
7898 if (GET_CODE (op0) != REG)
7899 op0 = force_reg (op_mode, op0);
7901 if (CONSTANT_P (op1))
7903 if (standard_80387_constant_p (op1))
7904 op1 = force_reg (op_mode, op1);
7905 else
7906 op1 = validize_mem (force_const_mem (op_mode, op1));
7910 /* Try to rearrange the comparison to make it cheaper. */
7911 if (ix86_fp_comparison_cost (code)
7912 > ix86_fp_comparison_cost (swap_condition (code))
7913 && (GET_CODE (op1) == REG || !no_new_pseudos))
7915 rtx tmp;
7916 tmp = op0, op0 = op1, op1 = tmp;
7917 code = swap_condition (code);
7918 if (GET_CODE (op0) != REG)
7919 op0 = force_reg (op_mode, op0);
7922 *pop0 = op0;
7923 *pop1 = op1;
7924 return code;
7927 /* Convert comparison codes we use to represent FP comparison to integer
7928 code that will result in proper branch. Return UNKNOWN if no such code
7929 is available. */
7931 enum rtx_code
7932 ix86_fp_compare_code_to_integer (enum rtx_code code)
7934 switch (code)
7936 case GT:
7937 return GTU;
7938 case GE:
7939 return GEU;
7940 case ORDERED:
7941 case UNORDERED:
7942 return code;
7943 break;
7944 case UNEQ:
7945 return EQ;
7946 break;
7947 case UNLT:
7948 return LTU;
7949 break;
7950 case UNLE:
7951 return LEU;
7952 break;
7953 case LTGT:
7954 return NE;
7955 break;
7956 default:
7957 return UNKNOWN;
7961 /* Split comparison code CODE into comparisons we can do using branch
7962 instructions. BYPASS_CODE is comparison code for branch that will
7963 branch around FIRST_CODE and SECOND_CODE. If some of branches
7964 is not required, set value to NIL.
7965 We never require more than two branches. */
7967 void
7968 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
7969 enum rtx_code *first_code,
7970 enum rtx_code *second_code)
7972 *first_code = code;
7973 *bypass_code = NIL;
7974 *second_code = NIL;
7976 /* The fcomi comparison sets flags as follows:
7978 cmp ZF PF CF
7979 > 0 0 0
7980 < 0 0 1
7981 = 1 0 0
7982 un 1 1 1 */
7984 switch (code)
7986 case GT: /* GTU - CF=0 & ZF=0 */
7987 case GE: /* GEU - CF=0 */
7988 case ORDERED: /* PF=0 */
7989 case UNORDERED: /* PF=1 */
7990 case UNEQ: /* EQ - ZF=1 */
7991 case UNLT: /* LTU - CF=1 */
7992 case UNLE: /* LEU - CF=1 | ZF=1 */
7993 case LTGT: /* EQ - ZF=0 */
7994 break;
7995 case LT: /* LTU - CF=1 - fails on unordered */
7996 *first_code = UNLT;
7997 *bypass_code = UNORDERED;
7998 break;
7999 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8000 *first_code = UNLE;
8001 *bypass_code = UNORDERED;
8002 break;
8003 case EQ: /* EQ - ZF=1 - fails on unordered */
8004 *first_code = UNEQ;
8005 *bypass_code = UNORDERED;
8006 break;
8007 case NE: /* NE - ZF=0 - fails on unordered */
8008 *first_code = LTGT;
8009 *second_code = UNORDERED;
8010 break;
8011 case UNGE: /* GEU - CF=0 - fails on unordered */
8012 *first_code = GE;
8013 *second_code = UNORDERED;
8014 break;
8015 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8016 *first_code = GT;
8017 *second_code = UNORDERED;
8018 break;
8019 default:
8020 abort ();
8022 if (!TARGET_IEEE_FP)
8024 *second_code = NIL;
8025 *bypass_code = NIL;
8029 /* Return cost of comparison done fcom + arithmetics operations on AX.
8030 All following functions do use number of instructions as a cost metrics.
8031 In future this should be tweaked to compute bytes for optimize_size and
8032 take into account performance of various instructions on various CPUs. */
8033 static int
8034 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8036 if (!TARGET_IEEE_FP)
8037 return 4;
8038 /* The cost of code output by ix86_expand_fp_compare. */
8039 switch (code)
8041 case UNLE:
8042 case UNLT:
8043 case LTGT:
8044 case GT:
8045 case GE:
8046 case UNORDERED:
8047 case ORDERED:
8048 case UNEQ:
8049 return 4;
8050 break;
8051 case LT:
8052 case NE:
8053 case EQ:
8054 case UNGE:
8055 return 5;
8056 break;
8057 case LE:
8058 case UNGT:
8059 return 6;
8060 break;
8061 default:
8062 abort ();
8066 /* Return cost of comparison done using fcomi operation.
8067 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8068 static int
8069 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8071 enum rtx_code bypass_code, first_code, second_code;
8072 /* Return arbitrarily high cost when instruction is not supported - this
8073 prevents gcc from using it. */
8074 if (!TARGET_CMOVE)
8075 return 1024;
8076 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8077 return (bypass_code != NIL || second_code != NIL) + 2;
8080 /* Return cost of comparison done using sahf operation.
8081 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8082 static int
8083 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8085 enum rtx_code bypass_code, first_code, second_code;
8086 /* Return arbitrarily high cost when instruction is not preferred - this
8087 avoids gcc from using it. */
8088 if (!TARGET_USE_SAHF && !optimize_size)
8089 return 1024;
8090 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8091 return (bypass_code != NIL || second_code != NIL) + 3;
8094 /* Compute cost of the comparison done using any method.
8095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8096 static int
8097 ix86_fp_comparison_cost (enum rtx_code code)
8099 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8100 int min;
8102 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8103 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8105 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8106 if (min > sahf_cost)
8107 min = sahf_cost;
8108 if (min > fcomi_cost)
8109 min = fcomi_cost;
8110 return min;
8113 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8115 static rtx
8116 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8117 rtx *second_test, rtx *bypass_test)
8119 enum machine_mode fpcmp_mode, intcmp_mode;
8120 rtx tmp, tmp2;
8121 int cost = ix86_fp_comparison_cost (code);
8122 enum rtx_code bypass_code, first_code, second_code;
8124 fpcmp_mode = ix86_fp_compare_mode (code);
8125 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8127 if (second_test)
8128 *second_test = NULL_RTX;
8129 if (bypass_test)
8130 *bypass_test = NULL_RTX;
8132 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8134 /* Do fcomi/sahf based test when profitable. */
8135 if ((bypass_code == NIL || bypass_test)
8136 && (second_code == NIL || second_test)
8137 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8139 if (TARGET_CMOVE)
8141 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8142 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8143 tmp);
8144 emit_insn (tmp);
8146 else
8148 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8149 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8150 if (!scratch)
8151 scratch = gen_reg_rtx (HImode);
8152 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8153 emit_insn (gen_x86_sahf_1 (scratch));
8156 /* The FP codes work out to act like unsigned. */
8157 intcmp_mode = fpcmp_mode;
8158 code = first_code;
8159 if (bypass_code != NIL)
8160 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8161 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8162 const0_rtx);
8163 if (second_code != NIL)
8164 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8165 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8166 const0_rtx);
8168 else
8170 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8171 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8172 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8173 if (!scratch)
8174 scratch = gen_reg_rtx (HImode);
8175 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8177 /* In the unordered case, we have to check C2 for NaN's, which
8178 doesn't happen to work out to anything nice combination-wise.
8179 So do some bit twiddling on the value we've got in AH to come
8180 up with an appropriate set of condition codes. */
8182 intcmp_mode = CCNOmode;
8183 switch (code)
8185 case GT:
8186 case UNGT:
8187 if (code == GT || !TARGET_IEEE_FP)
8189 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8190 code = EQ;
8192 else
8194 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8195 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8196 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8197 intcmp_mode = CCmode;
8198 code = GEU;
8200 break;
8201 case LT:
8202 case UNLT:
8203 if (code == LT && TARGET_IEEE_FP)
8205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8206 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8207 intcmp_mode = CCmode;
8208 code = EQ;
8210 else
8212 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8213 code = NE;
8215 break;
8216 case GE:
8217 case UNGE:
8218 if (code == GE || !TARGET_IEEE_FP)
8220 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8221 code = EQ;
8223 else
8225 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8226 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8227 GEN_INT (0x01)));
8228 code = NE;
8230 break;
8231 case LE:
8232 case UNLE:
8233 if (code == LE && TARGET_IEEE_FP)
8235 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8236 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8237 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8238 intcmp_mode = CCmode;
8239 code = LTU;
8241 else
8243 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8244 code = NE;
8246 break;
8247 case EQ:
8248 case UNEQ:
8249 if (code == EQ && TARGET_IEEE_FP)
8251 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8252 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8253 intcmp_mode = CCmode;
8254 code = EQ;
8256 else
8258 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8259 code = NE;
8260 break;
8262 break;
8263 case NE:
8264 case LTGT:
8265 if (code == NE && TARGET_IEEE_FP)
8267 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8268 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8269 GEN_INT (0x40)));
8270 code = NE;
8272 else
8274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8275 code = EQ;
8277 break;
8279 case UNORDERED:
8280 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8281 code = NE;
8282 break;
8283 case ORDERED:
8284 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8285 code = EQ;
8286 break;
8288 default:
8289 abort ();
8293 /* Return the test that should be put into the flags user, i.e.
8294 the bcc, scc, or cmov instruction. */
8295 return gen_rtx_fmt_ee (code, VOIDmode,
8296 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8297 const0_rtx);
8301 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8303 rtx op0, op1, ret;
8304 op0 = ix86_compare_op0;
8305 op1 = ix86_compare_op1;
8307 if (second_test)
8308 *second_test = NULL_RTX;
8309 if (bypass_test)
8310 *bypass_test = NULL_RTX;
8312 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8313 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8314 second_test, bypass_test);
8315 else
8316 ret = ix86_expand_int_compare (code, op0, op1);
8318 return ret;
8321 /* Return true if the CODE will result in nontrivial jump sequence. */
8322 bool
8323 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8325 enum rtx_code bypass_code, first_code, second_code;
8326 if (!TARGET_CMOVE)
8327 return true;
8328 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8329 return bypass_code != NIL || second_code != NIL;
8332 void
8333 ix86_expand_branch (enum rtx_code code, rtx label)
8335 rtx tmp;
8337 switch (GET_MODE (ix86_compare_op0))
8339 case QImode:
8340 case HImode:
8341 case SImode:
8342 simple:
8343 tmp = ix86_expand_compare (code, NULL, NULL);
8344 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8345 gen_rtx_LABEL_REF (VOIDmode, label),
8346 pc_rtx);
8347 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8348 return;
8350 case SFmode:
8351 case DFmode:
8352 case XFmode:
8354 rtvec vec;
8355 int use_fcomi;
8356 enum rtx_code bypass_code, first_code, second_code;
8358 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8359 &ix86_compare_op1);
8361 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8363 /* Check whether we will use the natural sequence with one jump. If
8364 so, we can expand jump early. Otherwise delay expansion by
8365 creating compound insn to not confuse optimizers. */
8366 if (bypass_code == NIL && second_code == NIL
8367 && TARGET_CMOVE)
8369 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8370 gen_rtx_LABEL_REF (VOIDmode, label),
8371 pc_rtx, NULL_RTX);
8373 else
8375 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8376 ix86_compare_op0, ix86_compare_op1);
8377 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8378 gen_rtx_LABEL_REF (VOIDmode, label),
8379 pc_rtx);
8380 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8382 use_fcomi = ix86_use_fcomi_compare (code);
8383 vec = rtvec_alloc (3 + !use_fcomi);
8384 RTVEC_ELT (vec, 0) = tmp;
8385 RTVEC_ELT (vec, 1)
8386 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8387 RTVEC_ELT (vec, 2)
8388 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8389 if (! use_fcomi)
8390 RTVEC_ELT (vec, 3)
8391 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8393 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8395 return;
8398 case DImode:
8399 if (TARGET_64BIT)
8400 goto simple;
8401 /* Expand DImode branch into multiple compare+branch. */
8403 rtx lo[2], hi[2], label2;
8404 enum rtx_code code1, code2, code3;
8406 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8408 tmp = ix86_compare_op0;
8409 ix86_compare_op0 = ix86_compare_op1;
8410 ix86_compare_op1 = tmp;
8411 code = swap_condition (code);
8413 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8414 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8416 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8417 avoid two branches. This costs one extra insn, so disable when
8418 optimizing for size. */
8420 if ((code == EQ || code == NE)
8421 && (!optimize_size
8422 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8424 rtx xor0, xor1;
8426 xor1 = hi[0];
8427 if (hi[1] != const0_rtx)
8428 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8429 NULL_RTX, 0, OPTAB_WIDEN);
8431 xor0 = lo[0];
8432 if (lo[1] != const0_rtx)
8433 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8434 NULL_RTX, 0, OPTAB_WIDEN);
8436 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8437 NULL_RTX, 0, OPTAB_WIDEN);
8439 ix86_compare_op0 = tmp;
8440 ix86_compare_op1 = const0_rtx;
8441 ix86_expand_branch (code, label);
8442 return;
8445 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8446 op1 is a constant and the low word is zero, then we can just
8447 examine the high word. */
8449 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8450 switch (code)
8452 case LT: case LTU: case GE: case GEU:
8453 ix86_compare_op0 = hi[0];
8454 ix86_compare_op1 = hi[1];
8455 ix86_expand_branch (code, label);
8456 return;
8457 default:
8458 break;
8461 /* Otherwise, we need two or three jumps. */
8463 label2 = gen_label_rtx ();
8465 code1 = code;
8466 code2 = swap_condition (code);
8467 code3 = unsigned_condition (code);
8469 switch (code)
8471 case LT: case GT: case LTU: case GTU:
8472 break;
8474 case LE: code1 = LT; code2 = GT; break;
8475 case GE: code1 = GT; code2 = LT; break;
8476 case LEU: code1 = LTU; code2 = GTU; break;
8477 case GEU: code1 = GTU; code2 = LTU; break;
8479 case EQ: code1 = NIL; code2 = NE; break;
8480 case NE: code2 = NIL; break;
8482 default:
8483 abort ();
8487 * a < b =>
8488 * if (hi(a) < hi(b)) goto true;
8489 * if (hi(a) > hi(b)) goto false;
8490 * if (lo(a) < lo(b)) goto true;
8491 * false:
8494 ix86_compare_op0 = hi[0];
8495 ix86_compare_op1 = hi[1];
8497 if (code1 != NIL)
8498 ix86_expand_branch (code1, label);
8499 if (code2 != NIL)
8500 ix86_expand_branch (code2, label2);
8502 ix86_compare_op0 = lo[0];
8503 ix86_compare_op1 = lo[1];
8504 ix86_expand_branch (code3, label);
8506 if (code2 != NIL)
8507 emit_label (label2);
8508 return;
8511 default:
8512 abort ();
8516 /* Split branch based on floating point condition. */
8517 void
8518 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8519 rtx target1, rtx target2, rtx tmp)
8521 rtx second, bypass;
8522 rtx label = NULL_RTX;
8523 rtx condition;
8524 int bypass_probability = -1, second_probability = -1, probability = -1;
8525 rtx i;
8527 if (target2 != pc_rtx)
8529 rtx tmp = target2;
8530 code = reverse_condition_maybe_unordered (code);
8531 target2 = target1;
8532 target1 = tmp;
8535 condition = ix86_expand_fp_compare (code, op1, op2,
8536 tmp, &second, &bypass);
8538 if (split_branch_probability >= 0)
8540 /* Distribute the probabilities across the jumps.
8541 Assume the BYPASS and SECOND to be always test
8542 for UNORDERED. */
8543 probability = split_branch_probability;
8545 /* Value of 1 is low enough to make no need for probability
8546 to be updated. Later we may run some experiments and see
8547 if unordered values are more frequent in practice. */
8548 if (bypass)
8549 bypass_probability = 1;
8550 if (second)
8551 second_probability = 1;
8553 if (bypass != NULL_RTX)
8555 label = gen_label_rtx ();
8556 i = emit_jump_insn (gen_rtx_SET
8557 (VOIDmode, pc_rtx,
8558 gen_rtx_IF_THEN_ELSE (VOIDmode,
8559 bypass,
8560 gen_rtx_LABEL_REF (VOIDmode,
8561 label),
8562 pc_rtx)));
8563 if (bypass_probability >= 0)
8564 REG_NOTES (i)
8565 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8566 GEN_INT (bypass_probability),
8567 REG_NOTES (i));
8569 i = emit_jump_insn (gen_rtx_SET
8570 (VOIDmode, pc_rtx,
8571 gen_rtx_IF_THEN_ELSE (VOIDmode,
8572 condition, target1, target2)));
8573 if (probability >= 0)
8574 REG_NOTES (i)
8575 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8576 GEN_INT (probability),
8577 REG_NOTES (i));
8578 if (second != NULL_RTX)
8580 i = emit_jump_insn (gen_rtx_SET
8581 (VOIDmode, pc_rtx,
8582 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8583 target2)));
8584 if (second_probability >= 0)
8585 REG_NOTES (i)
8586 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8587 GEN_INT (second_probability),
8588 REG_NOTES (i));
8590 if (label != NULL_RTX)
8591 emit_label (label);
8595 ix86_expand_setcc (enum rtx_code code, rtx dest)
8597 rtx ret, tmp, tmpreg, equiv;
8598 rtx second_test, bypass_test;
8600 if (GET_MODE (ix86_compare_op0) == DImode
8601 && !TARGET_64BIT)
8602 return 0; /* FAIL */
8604 if (GET_MODE (dest) != QImode)
8605 abort ();
8607 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8608 PUT_MODE (ret, QImode);
8610 tmp = dest;
8611 tmpreg = dest;
8613 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8614 if (bypass_test || second_test)
8616 rtx test = second_test;
8617 int bypass = 0;
8618 rtx tmp2 = gen_reg_rtx (QImode);
8619 if (bypass_test)
8621 if (second_test)
8622 abort ();
8623 test = bypass_test;
8624 bypass = 1;
8625 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8627 PUT_MODE (test, QImode);
8628 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8630 if (bypass)
8631 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8632 else
8633 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8636 /* Attach a REG_EQUAL note describing the comparison result. */
8637 equiv = simplify_gen_relational (code, QImode,
8638 GET_MODE (ix86_compare_op0),
8639 ix86_compare_op0, ix86_compare_op1);
8640 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8642 return 1; /* DONE */
8645 /* Expand comparison setting or clearing carry flag. Return true when
8646 successful and set pop for the operation. */
8647 static bool
8648 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8650 enum machine_mode mode =
8651 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8653 /* Do not handle DImode compares that go trought special path. Also we can't
8654 deal with FP compares yet. This is possible to add. */
8655 if ((mode == DImode && !TARGET_64BIT))
8656 return false;
8657 if (FLOAT_MODE_P (mode))
8659 rtx second_test = NULL, bypass_test = NULL;
8660 rtx compare_op, compare_seq;
8662 /* Shortcut: following common codes never translate into carry flag compares. */
8663 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8664 || code == ORDERED || code == UNORDERED)
8665 return false;
8667 /* These comparisons require zero flag; swap operands so they won't. */
8668 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8669 && !TARGET_IEEE_FP)
8671 rtx tmp = op0;
8672 op0 = op1;
8673 op1 = tmp;
8674 code = swap_condition (code);
8677 /* Try to expand the comparison and verify that we end up with carry flag
8678 based comparison. This is fails to be true only when we decide to expand
8679 comparison using arithmetic that is not too common scenario. */
8680 start_sequence ();
8681 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8682 &second_test, &bypass_test);
8683 compare_seq = get_insns ();
8684 end_sequence ();
8686 if (second_test || bypass_test)
8687 return false;
8688 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8689 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8690 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8691 else
8692 code = GET_CODE (compare_op);
8693 if (code != LTU && code != GEU)
8694 return false;
8695 emit_insn (compare_seq);
8696 *pop = compare_op;
8697 return true;
8699 if (!INTEGRAL_MODE_P (mode))
8700 return false;
8701 switch (code)
8703 case LTU:
8704 case GEU:
8705 break;
8707 /* Convert a==0 into (unsigned)a<1. */
8708 case EQ:
8709 case NE:
8710 if (op1 != const0_rtx)
8711 return false;
8712 op1 = const1_rtx;
8713 code = (code == EQ ? LTU : GEU);
8714 break;
8716 /* Convert a>b into b<a or a>=b-1. */
8717 case GTU:
8718 case LEU:
8719 if (GET_CODE (op1) == CONST_INT)
8721 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8722 /* Bail out on overflow. We still can swap operands but that
8723 would force loading of the constant into register. */
8724 if (op1 == const0_rtx
8725 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8726 return false;
8727 code = (code == GTU ? GEU : LTU);
8729 else
8731 rtx tmp = op1;
8732 op1 = op0;
8733 op0 = tmp;
8734 code = (code == GTU ? LTU : GEU);
8736 break;
8738 /* Convert a>=0 into (unsigned)a<0x80000000. */
8739 case LT:
8740 case GE:
8741 if (mode == DImode || op1 != const0_rtx)
8742 return false;
8743 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8744 code = (code == LT ? GEU : LTU);
8745 break;
8746 case LE:
8747 case GT:
8748 if (mode == DImode || op1 != constm1_rtx)
8749 return false;
8750 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8751 code = (code == LE ? GEU : LTU);
8752 break;
8754 default:
8755 return false;
8757 /* Swapping operands may cause constant to appear as first operand. */
8758 if (!nonimmediate_operand (op0, VOIDmode))
8760 if (no_new_pseudos)
8761 return false;
8762 op0 = force_reg (mode, op0);
8764 ix86_compare_op0 = op0;
8765 ix86_compare_op1 = op1;
8766 *pop = ix86_expand_compare (code, NULL, NULL);
8767 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8768 abort ();
8769 return true;
8773 ix86_expand_int_movcc (rtx operands[])
8775 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8776 rtx compare_seq, compare_op;
8777 rtx second_test, bypass_test;
8778 enum machine_mode mode = GET_MODE (operands[0]);
8779 bool sign_bit_compare_p = false;;
8781 start_sequence ();
8782 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8783 compare_seq = get_insns ();
8784 end_sequence ();
8786 compare_code = GET_CODE (compare_op);
8788 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8789 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8790 sign_bit_compare_p = true;
8792 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8793 HImode insns, we'd be swallowed in word prefix ops. */
8795 if ((mode != HImode || TARGET_FAST_PREFIX)
8796 && (mode != DImode || TARGET_64BIT)
8797 && GET_CODE (operands[2]) == CONST_INT
8798 && GET_CODE (operands[3]) == CONST_INT)
8800 rtx out = operands[0];
8801 HOST_WIDE_INT ct = INTVAL (operands[2]);
8802 HOST_WIDE_INT cf = INTVAL (operands[3]);
8803 HOST_WIDE_INT diff;
8805 diff = ct - cf;
8806 /* Sign bit compares are better done using shifts than we do by using
8807 sbb. */
8808 if (sign_bit_compare_p
8809 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8810 ix86_compare_op1, &compare_op))
8812 /* Detect overlap between destination and compare sources. */
8813 rtx tmp = out;
8815 if (!sign_bit_compare_p)
8817 bool fpcmp = false;
8819 compare_code = GET_CODE (compare_op);
8821 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8822 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8824 fpcmp = true;
8825 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8828 /* To simplify rest of code, restrict to the GEU case. */
8829 if (compare_code == LTU)
8831 HOST_WIDE_INT tmp = ct;
8832 ct = cf;
8833 cf = tmp;
8834 compare_code = reverse_condition (compare_code);
8835 code = reverse_condition (code);
8837 else
8839 if (fpcmp)
8840 PUT_CODE (compare_op,
8841 reverse_condition_maybe_unordered
8842 (GET_CODE (compare_op)));
8843 else
8844 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
8846 diff = ct - cf;
8848 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8849 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8850 tmp = gen_reg_rtx (mode);
8852 if (mode == DImode)
8853 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
8854 else
8855 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
8857 else
8859 if (code == GT || code == GE)
8860 code = reverse_condition (code);
8861 else
8863 HOST_WIDE_INT tmp = ct;
8864 ct = cf;
8865 cf = tmp;
8866 diff = ct - cf;
8868 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
8869 ix86_compare_op1, VOIDmode, 0, -1);
8872 if (diff == 1)
8875 * cmpl op0,op1
8876 * sbbl dest,dest
8877 * [addl dest, ct]
8879 * Size 5 - 8.
8881 if (ct)
8882 tmp = expand_simple_binop (mode, PLUS,
8883 tmp, GEN_INT (ct),
8884 copy_rtx (tmp), 1, OPTAB_DIRECT);
8886 else if (cf == -1)
8889 * cmpl op0,op1
8890 * sbbl dest,dest
8891 * orl $ct, dest
8893 * Size 8.
8895 tmp = expand_simple_binop (mode, IOR,
8896 tmp, GEN_INT (ct),
8897 copy_rtx (tmp), 1, OPTAB_DIRECT);
8899 else if (diff == -1 && ct)
8902 * cmpl op0,op1
8903 * sbbl dest,dest
8904 * notl dest
8905 * [addl dest, cf]
8907 * Size 8 - 11.
8909 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
8910 if (cf)
8911 tmp = expand_simple_binop (mode, PLUS,
8912 copy_rtx (tmp), GEN_INT (cf),
8913 copy_rtx (tmp), 1, OPTAB_DIRECT);
8915 else
8918 * cmpl op0,op1
8919 * sbbl dest,dest
8920 * [notl dest]
8921 * andl cf - ct, dest
8922 * [addl dest, ct]
8924 * Size 8 - 11.
8927 if (cf == 0)
8929 cf = ct;
8930 ct = 0;
8931 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
8934 tmp = expand_simple_binop (mode, AND,
8935 copy_rtx (tmp),
8936 gen_int_mode (cf - ct, mode),
8937 copy_rtx (tmp), 1, OPTAB_DIRECT);
8938 if (ct)
8939 tmp = expand_simple_binop (mode, PLUS,
8940 copy_rtx (tmp), GEN_INT (ct),
8941 copy_rtx (tmp), 1, OPTAB_DIRECT);
8944 if (!rtx_equal_p (tmp, out))
8945 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
8947 return 1; /* DONE */
8950 if (diff < 0)
8952 HOST_WIDE_INT tmp;
8953 tmp = ct, ct = cf, cf = tmp;
8954 diff = -diff;
8955 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8957 /* We may be reversing unordered compare to normal compare, that
8958 is not valid in general (we may convert non-trapping condition
8959 to trapping one), however on i386 we currently emit all
8960 comparisons unordered. */
8961 compare_code = reverse_condition_maybe_unordered (compare_code);
8962 code = reverse_condition_maybe_unordered (code);
8964 else
8966 compare_code = reverse_condition (compare_code);
8967 code = reverse_condition (code);
8971 compare_code = NIL;
8972 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8973 && GET_CODE (ix86_compare_op1) == CONST_INT)
8975 if (ix86_compare_op1 == const0_rtx
8976 && (code == LT || code == GE))
8977 compare_code = code;
8978 else if (ix86_compare_op1 == constm1_rtx)
8980 if (code == LE)
8981 compare_code = LT;
8982 else if (code == GT)
8983 compare_code = GE;
8987 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8988 if (compare_code != NIL
8989 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8990 && (cf == -1 || ct == -1))
8992 /* If lea code below could be used, only optimize
8993 if it results in a 2 insn sequence. */
8995 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8996 || diff == 3 || diff == 5 || diff == 9)
8997 || (compare_code == LT && ct == -1)
8998 || (compare_code == GE && cf == -1))
9001 * notl op1 (if necessary)
9002 * sarl $31, op1
9003 * orl cf, op1
9005 if (ct != -1)
9007 cf = ct;
9008 ct = -1;
9009 code = reverse_condition (code);
9012 out = emit_store_flag (out, code, ix86_compare_op0,
9013 ix86_compare_op1, VOIDmode, 0, -1);
9015 out = expand_simple_binop (mode, IOR,
9016 out, GEN_INT (cf),
9017 out, 1, OPTAB_DIRECT);
9018 if (out != operands[0])
9019 emit_move_insn (operands[0], out);
9021 return 1; /* DONE */
9026 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9027 || diff == 3 || diff == 5 || diff == 9)
9028 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9029 && (mode != DImode
9030 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9033 * xorl dest,dest
9034 * cmpl op1,op2
9035 * setcc dest
9036 * lea cf(dest*(ct-cf)),dest
9038 * Size 14.
9040 * This also catches the degenerate setcc-only case.
9043 rtx tmp;
9044 int nops;
9046 out = emit_store_flag (out, code, ix86_compare_op0,
9047 ix86_compare_op1, VOIDmode, 0, 1);
9049 nops = 0;
9050 /* On x86_64 the lea instruction operates on Pmode, so we need
9051 to get arithmetics done in proper mode to match. */
9052 if (diff == 1)
9053 tmp = copy_rtx (out);
9054 else
9056 rtx out1;
9057 out1 = copy_rtx (out);
9058 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9059 nops++;
9060 if (diff & 1)
9062 tmp = gen_rtx_PLUS (mode, tmp, out1);
9063 nops++;
9066 if (cf != 0)
9068 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9069 nops++;
9071 if (!rtx_equal_p (tmp, out))
9073 if (nops == 1)
9074 out = force_operand (tmp, copy_rtx (out));
9075 else
9076 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9078 if (!rtx_equal_p (out, operands[0]))
9079 emit_move_insn (operands[0], copy_rtx (out));
9081 return 1; /* DONE */
9085 * General case: Jumpful:
9086 * xorl dest,dest cmpl op1, op2
9087 * cmpl op1, op2 movl ct, dest
9088 * setcc dest jcc 1f
9089 * decl dest movl cf, dest
9090 * andl (cf-ct),dest 1:
9091 * addl ct,dest
9093 * Size 20. Size 14.
9095 * This is reasonably steep, but branch mispredict costs are
9096 * high on modern cpus, so consider failing only if optimizing
9097 * for space.
9100 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9101 && BRANCH_COST >= 2)
9103 if (cf == 0)
9105 cf = ct;
9106 ct = 0;
9107 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9108 /* We may be reversing unordered compare to normal compare,
9109 that is not valid in general (we may convert non-trapping
9110 condition to trapping one), however on i386 we currently
9111 emit all comparisons unordered. */
9112 code = reverse_condition_maybe_unordered (code);
9113 else
9115 code = reverse_condition (code);
9116 if (compare_code != NIL)
9117 compare_code = reverse_condition (compare_code);
9121 if (compare_code != NIL)
9123 /* notl op1 (if needed)
9124 sarl $31, op1
9125 andl (cf-ct), op1
9126 addl ct, op1
9128 For x < 0 (resp. x <= -1) there will be no notl,
9129 so if possible swap the constants to get rid of the
9130 complement.
9131 True/false will be -1/0 while code below (store flag
9132 followed by decrement) is 0/-1, so the constants need
9133 to be exchanged once more. */
9135 if (compare_code == GE || !cf)
9137 code = reverse_condition (code);
9138 compare_code = LT;
9140 else
9142 HOST_WIDE_INT tmp = cf;
9143 cf = ct;
9144 ct = tmp;
9147 out = emit_store_flag (out, code, ix86_compare_op0,
9148 ix86_compare_op1, VOIDmode, 0, -1);
9150 else
9152 out = emit_store_flag (out, code, ix86_compare_op0,
9153 ix86_compare_op1, VOIDmode, 0, 1);
9155 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9156 copy_rtx (out), 1, OPTAB_DIRECT);
9159 out = expand_simple_binop (mode, AND, copy_rtx (out),
9160 gen_int_mode (cf - ct, mode),
9161 copy_rtx (out), 1, OPTAB_DIRECT);
9162 if (ct)
9163 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9164 copy_rtx (out), 1, OPTAB_DIRECT);
9165 if (!rtx_equal_p (out, operands[0]))
9166 emit_move_insn (operands[0], copy_rtx (out));
9168 return 1; /* DONE */
9172 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9174 /* Try a few things more with specific constants and a variable. */
9176 optab op;
9177 rtx var, orig_out, out, tmp;
9179 if (BRANCH_COST <= 2)
9180 return 0; /* FAIL */
9182 /* If one of the two operands is an interesting constant, load a
9183 constant with the above and mask it in with a logical operation. */
9185 if (GET_CODE (operands[2]) == CONST_INT)
9187 var = operands[3];
9188 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9189 operands[3] = constm1_rtx, op = and_optab;
9190 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9191 operands[3] = const0_rtx, op = ior_optab;
9192 else
9193 return 0; /* FAIL */
9195 else if (GET_CODE (operands[3]) == CONST_INT)
9197 var = operands[2];
9198 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9199 operands[2] = constm1_rtx, op = and_optab;
9200 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9201 operands[2] = const0_rtx, op = ior_optab;
9202 else
9203 return 0; /* FAIL */
9205 else
9206 return 0; /* FAIL */
9208 orig_out = operands[0];
9209 tmp = gen_reg_rtx (mode);
9210 operands[0] = tmp;
9212 /* Recurse to get the constant loaded. */
9213 if (ix86_expand_int_movcc (operands) == 0)
9214 return 0; /* FAIL */
9216 /* Mask in the interesting variable. */
9217 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9218 OPTAB_WIDEN);
9219 if (!rtx_equal_p (out, orig_out))
9220 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9222 return 1; /* DONE */
9226 * For comparison with above,
9228 * movl cf,dest
9229 * movl ct,tmp
9230 * cmpl op1,op2
9231 * cmovcc tmp,dest
9233 * Size 15.
9236 if (! nonimmediate_operand (operands[2], mode))
9237 operands[2] = force_reg (mode, operands[2]);
9238 if (! nonimmediate_operand (operands[3], mode))
9239 operands[3] = force_reg (mode, operands[3]);
9241 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9243 rtx tmp = gen_reg_rtx (mode);
9244 emit_move_insn (tmp, operands[3]);
9245 operands[3] = tmp;
9247 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9249 rtx tmp = gen_reg_rtx (mode);
9250 emit_move_insn (tmp, operands[2]);
9251 operands[2] = tmp;
9254 if (! register_operand (operands[2], VOIDmode)
9255 && (mode == QImode
9256 || ! register_operand (operands[3], VOIDmode)))
9257 operands[2] = force_reg (mode, operands[2]);
9259 if (mode == QImode
9260 && ! register_operand (operands[3], VOIDmode))
9261 operands[3] = force_reg (mode, operands[3]);
9263 emit_insn (compare_seq);
9264 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9265 gen_rtx_IF_THEN_ELSE (mode,
9266 compare_op, operands[2],
9267 operands[3])));
9268 if (bypass_test)
9269 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9270 gen_rtx_IF_THEN_ELSE (mode,
9271 bypass_test,
9272 copy_rtx (operands[3]),
9273 copy_rtx (operands[0]))));
9274 if (second_test)
9275 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9276 gen_rtx_IF_THEN_ELSE (mode,
9277 second_test,
9278 copy_rtx (operands[2]),
9279 copy_rtx (operands[0]))));
9281 return 1; /* DONE */
9285 ix86_expand_fp_movcc (rtx operands[])
9287 enum rtx_code code;
9288 rtx tmp;
9289 rtx compare_op, second_test, bypass_test;
9291 /* For SF/DFmode conditional moves based on comparisons
9292 in same mode, we may want to use SSE min/max instructions. */
9293 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9294 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9295 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9296 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9297 && (!TARGET_IEEE_FP
9298 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9299 /* We may be called from the post-reload splitter. */
9300 && (!REG_P (operands[0])
9301 || SSE_REG_P (operands[0])
9302 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9304 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9305 code = GET_CODE (operands[1]);
9307 /* See if we have (cross) match between comparison operands and
9308 conditional move operands. */
9309 if (rtx_equal_p (operands[2], op1))
9311 rtx tmp = op0;
9312 op0 = op1;
9313 op1 = tmp;
9314 code = reverse_condition_maybe_unordered (code);
9316 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9318 /* Check for min operation. */
9319 if (code == LT || code == UNLE)
9321 if (code == UNLE)
9323 rtx tmp = op0;
9324 op0 = op1;
9325 op1 = tmp;
9327 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9328 if (memory_operand (op0, VOIDmode))
9329 op0 = force_reg (GET_MODE (operands[0]), op0);
9330 if (GET_MODE (operands[0]) == SFmode)
9331 emit_insn (gen_minsf3 (operands[0], op0, op1));
9332 else
9333 emit_insn (gen_mindf3 (operands[0], op0, op1));
9334 return 1;
9336 /* Check for max operation. */
9337 if (code == GT || code == UNGE)
9339 if (code == UNGE)
9341 rtx tmp = op0;
9342 op0 = op1;
9343 op1 = tmp;
9345 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9346 if (memory_operand (op0, VOIDmode))
9347 op0 = force_reg (GET_MODE (operands[0]), op0);
9348 if (GET_MODE (operands[0]) == SFmode)
9349 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9350 else
9351 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9352 return 1;
9355 /* Manage condition to be sse_comparison_operator. In case we are
9356 in non-ieee mode, try to canonicalize the destination operand
9357 to be first in the comparison - this helps reload to avoid extra
9358 moves. */
9359 if (!sse_comparison_operator (operands[1], VOIDmode)
9360 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9362 rtx tmp = ix86_compare_op0;
9363 ix86_compare_op0 = ix86_compare_op1;
9364 ix86_compare_op1 = tmp;
9365 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9366 VOIDmode, ix86_compare_op0,
9367 ix86_compare_op1);
9369 /* Similarly try to manage result to be first operand of conditional
9370 move. We also don't support the NE comparison on SSE, so try to
9371 avoid it. */
9372 if ((rtx_equal_p (operands[0], operands[3])
9373 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9374 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9376 rtx tmp = operands[2];
9377 operands[2] = operands[3];
9378 operands[3] = tmp;
9379 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9380 (GET_CODE (operands[1])),
9381 VOIDmode, ix86_compare_op0,
9382 ix86_compare_op1);
9384 if (GET_MODE (operands[0]) == SFmode)
9385 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9386 operands[2], operands[3],
9387 ix86_compare_op0, ix86_compare_op1));
9388 else
9389 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9390 operands[2], operands[3],
9391 ix86_compare_op0, ix86_compare_op1));
9392 return 1;
9395 /* The floating point conditional move instructions don't directly
9396 support conditions resulting from a signed integer comparison. */
9398 code = GET_CODE (operands[1]);
9399 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9401 /* The floating point conditional move instructions don't directly
9402 support signed integer comparisons. */
9404 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9406 if (second_test != NULL || bypass_test != NULL)
9407 abort ();
9408 tmp = gen_reg_rtx (QImode);
9409 ix86_expand_setcc (code, tmp);
9410 code = NE;
9411 ix86_compare_op0 = tmp;
9412 ix86_compare_op1 = const0_rtx;
9413 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9415 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9417 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9418 emit_move_insn (tmp, operands[3]);
9419 operands[3] = tmp;
9421 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9423 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9424 emit_move_insn (tmp, operands[2]);
9425 operands[2] = tmp;
9428 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9429 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9430 compare_op,
9431 operands[2],
9432 operands[3])));
9433 if (bypass_test)
9434 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9435 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9436 bypass_test,
9437 operands[3],
9438 operands[0])));
9439 if (second_test)
9440 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9441 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9442 second_test,
9443 operands[2],
9444 operands[0])));
9446 return 1;
9449 /* Expand conditional increment or decrement using adb/sbb instructions.
9450 The default case using setcc followed by the conditional move can be
9451 done by generic code. */
9453 ix86_expand_int_addcc (rtx operands[])
9455 enum rtx_code code = GET_CODE (operands[1]);
9456 rtx compare_op;
9457 rtx val = const0_rtx;
9458 bool fpcmp = false;
9459 enum machine_mode mode = GET_MODE (operands[0]);
9461 if (operands[3] != const1_rtx
9462 && operands[3] != constm1_rtx)
9463 return 0;
9464 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9465 ix86_compare_op1, &compare_op))
9466 return 0;
9467 code = GET_CODE (compare_op);
9469 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9470 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9472 fpcmp = true;
9473 code = ix86_fp_compare_code_to_integer (code);
9476 if (code != LTU)
9478 val = constm1_rtx;
9479 if (fpcmp)
9480 PUT_CODE (compare_op,
9481 reverse_condition_maybe_unordered
9482 (GET_CODE (compare_op)));
9483 else
9484 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9486 PUT_MODE (compare_op, mode);
9488 /* Construct either adc or sbb insn. */
9489 if ((code == LTU) == (operands[3] == constm1_rtx))
9491 switch (GET_MODE (operands[0]))
9493 case QImode:
9494 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9495 break;
9496 case HImode:
9497 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9498 break;
9499 case SImode:
9500 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9501 break;
9502 case DImode:
9503 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9504 break;
9505 default:
9506 abort ();
9509 else
9511 switch (GET_MODE (operands[0]))
9513 case QImode:
9514 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9515 break;
9516 case HImode:
9517 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9518 break;
9519 case SImode:
9520 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9521 break;
9522 case DImode:
9523 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9524 break;
9525 default:
9526 abort ();
9529 return 1; /* DONE */
9533 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9534 works for floating pointer parameters and nonoffsetable memories.
9535 For pushes, it returns just stack offsets; the values will be saved
9536 in the right order. Maximally three parts are generated. */
9538 static int
9539 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9541 int size;
9543 if (!TARGET_64BIT)
9544 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9545 else
9546 size = (GET_MODE_SIZE (mode) + 4) / 8;
9548 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9549 abort ();
9550 if (size < 2 || size > 3)
9551 abort ();
9553 /* Optimize constant pool reference to immediates. This is used by fp
9554 moves, that force all constants to memory to allow combining. */
9555 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9557 rtx tmp = maybe_get_pool_constant (operand);
9558 if (tmp)
9559 operand = tmp;
9562 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9564 /* The only non-offsetable memories we handle are pushes. */
9565 if (! push_operand (operand, VOIDmode))
9566 abort ();
9568 operand = copy_rtx (operand);
9569 PUT_MODE (operand, Pmode);
9570 parts[0] = parts[1] = parts[2] = operand;
9572 else if (!TARGET_64BIT)
9574 if (mode == DImode)
9575 split_di (&operand, 1, &parts[0], &parts[1]);
9576 else
9578 if (REG_P (operand))
9580 if (!reload_completed)
9581 abort ();
9582 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9583 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9584 if (size == 3)
9585 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9587 else if (offsettable_memref_p (operand))
9589 operand = adjust_address (operand, SImode, 0);
9590 parts[0] = operand;
9591 parts[1] = adjust_address (operand, SImode, 4);
9592 if (size == 3)
9593 parts[2] = adjust_address (operand, SImode, 8);
9595 else if (GET_CODE (operand) == CONST_DOUBLE)
9597 REAL_VALUE_TYPE r;
9598 long l[4];
9600 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9601 switch (mode)
9603 case XFmode:
9604 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9605 parts[2] = gen_int_mode (l[2], SImode);
9606 break;
9607 case DFmode:
9608 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9609 break;
9610 default:
9611 abort ();
9613 parts[1] = gen_int_mode (l[1], SImode);
9614 parts[0] = gen_int_mode (l[0], SImode);
9616 else
9617 abort ();
9620 else
9622 if (mode == TImode)
9623 split_ti (&operand, 1, &parts[0], &parts[1]);
9624 if (mode == XFmode || mode == TFmode)
9626 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9627 if (REG_P (operand))
9629 if (!reload_completed)
9630 abort ();
9631 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9632 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9634 else if (offsettable_memref_p (operand))
9636 operand = adjust_address (operand, DImode, 0);
9637 parts[0] = operand;
9638 parts[1] = adjust_address (operand, upper_mode, 8);
9640 else if (GET_CODE (operand) == CONST_DOUBLE)
9642 REAL_VALUE_TYPE r;
9643 long l[3];
9645 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9646 real_to_target (l, &r, mode);
9647 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9648 if (HOST_BITS_PER_WIDE_INT >= 64)
9649 parts[0]
9650 = gen_int_mode
9651 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9652 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9653 DImode);
9654 else
9655 parts[0] = immed_double_const (l[0], l[1], DImode);
9656 if (upper_mode == SImode)
9657 parts[1] = gen_int_mode (l[2], SImode);
9658 else if (HOST_BITS_PER_WIDE_INT >= 64)
9659 parts[1]
9660 = gen_int_mode
9661 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9662 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9663 DImode);
9664 else
9665 parts[1] = immed_double_const (l[2], l[3], DImode);
9667 else
9668 abort ();
9672 return size;
9675 /* Emit insns to perform a move or push of DI, DF, and XF values.
9676 Return false when normal moves are needed; true when all required
9677 insns have been emitted. Operands 2-4 contain the input values
9678 int the correct order; operands 5-7 contain the output values. */
9680 void
9681 ix86_split_long_move (rtx operands[])
9683 rtx part[2][3];
9684 int nparts;
9685 int push = 0;
9686 int collisions = 0;
9687 enum machine_mode mode = GET_MODE (operands[0]);
9689 /* The DFmode expanders may ask us to move double.
9690 For 64bit target this is single move. By hiding the fact
9691 here we simplify i386.md splitters. */
9692 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9694 /* Optimize constant pool reference to immediates. This is used by
9695 fp moves, that force all constants to memory to allow combining. */
9697 if (GET_CODE (operands[1]) == MEM
9698 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9699 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9700 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9701 if (push_operand (operands[0], VOIDmode))
9703 operands[0] = copy_rtx (operands[0]);
9704 PUT_MODE (operands[0], Pmode);
9706 else
9707 operands[0] = gen_lowpart (DImode, operands[0]);
9708 operands[1] = gen_lowpart (DImode, operands[1]);
9709 emit_move_insn (operands[0], operands[1]);
9710 return;
9713 /* The only non-offsettable memory we handle is push. */
9714 if (push_operand (operands[0], VOIDmode))
9715 push = 1;
9716 else if (GET_CODE (operands[0]) == MEM
9717 && ! offsettable_memref_p (operands[0]))
9718 abort ();
9720 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9721 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9723 /* When emitting push, take care for source operands on the stack. */
9724 if (push && GET_CODE (operands[1]) == MEM
9725 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9727 if (nparts == 3)
9728 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9729 XEXP (part[1][2], 0));
9730 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9731 XEXP (part[1][1], 0));
9734 /* We need to do copy in the right order in case an address register
9735 of the source overlaps the destination. */
9736 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9738 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9739 collisions++;
9740 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9741 collisions++;
9742 if (nparts == 3
9743 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9744 collisions++;
9746 /* Collision in the middle part can be handled by reordering. */
9747 if (collisions == 1 && nparts == 3
9748 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9750 rtx tmp;
9751 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9752 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9755 /* If there are more collisions, we can't handle it by reordering.
9756 Do an lea to the last part and use only one colliding move. */
9757 else if (collisions > 1)
9759 rtx base;
9761 collisions = 1;
9763 base = part[0][nparts - 1];
9765 /* Handle the case when the last part isn't valid for lea.
9766 Happens in 64-bit mode storing the 12-byte XFmode. */
9767 if (GET_MODE (base) != Pmode)
9768 base = gen_rtx_REG (Pmode, REGNO (base));
9770 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9771 part[1][0] = replace_equiv_address (part[1][0], base);
9772 part[1][1] = replace_equiv_address (part[1][1],
9773 plus_constant (base, UNITS_PER_WORD));
9774 if (nparts == 3)
9775 part[1][2] = replace_equiv_address (part[1][2],
9776 plus_constant (base, 8));
9780 if (push)
9782 if (!TARGET_64BIT)
9784 if (nparts == 3)
9786 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9787 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
9788 emit_move_insn (part[0][2], part[1][2]);
9791 else
9793 /* In 64bit mode we don't have 32bit push available. In case this is
9794 register, it is OK - we will just use larger counterpart. We also
9795 retype memory - these comes from attempt to avoid REX prefix on
9796 moving of second half of TFmode value. */
9797 if (GET_MODE (part[1][1]) == SImode)
9799 if (GET_CODE (part[1][1]) == MEM)
9800 part[1][1] = adjust_address (part[1][1], DImode, 0);
9801 else if (REG_P (part[1][1]))
9802 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9803 else
9804 abort ();
9805 if (GET_MODE (part[1][0]) == SImode)
9806 part[1][0] = part[1][1];
9809 emit_move_insn (part[0][1], part[1][1]);
9810 emit_move_insn (part[0][0], part[1][0]);
9811 return;
9814 /* Choose correct order to not overwrite the source before it is copied. */
9815 if ((REG_P (part[0][0])
9816 && REG_P (part[1][1])
9817 && (REGNO (part[0][0]) == REGNO (part[1][1])
9818 || (nparts == 3
9819 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9820 || (collisions > 0
9821 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9823 if (nparts == 3)
9825 operands[2] = part[0][2];
9826 operands[3] = part[0][1];
9827 operands[4] = part[0][0];
9828 operands[5] = part[1][2];
9829 operands[6] = part[1][1];
9830 operands[7] = part[1][0];
9832 else
9834 operands[2] = part[0][1];
9835 operands[3] = part[0][0];
9836 operands[5] = part[1][1];
9837 operands[6] = part[1][0];
9840 else
9842 if (nparts == 3)
9844 operands[2] = part[0][0];
9845 operands[3] = part[0][1];
9846 operands[4] = part[0][2];
9847 operands[5] = part[1][0];
9848 operands[6] = part[1][1];
9849 operands[7] = part[1][2];
9851 else
9853 operands[2] = part[0][0];
9854 operands[3] = part[0][1];
9855 operands[5] = part[1][0];
9856 operands[6] = part[1][1];
9859 emit_move_insn (operands[2], operands[5]);
9860 emit_move_insn (operands[3], operands[6]);
9861 if (nparts == 3)
9862 emit_move_insn (operands[4], operands[7]);
9864 return;
9867 void
9868 ix86_split_ashldi (rtx *operands, rtx scratch)
9870 rtx low[2], high[2];
9871 int count;
9873 if (GET_CODE (operands[2]) == CONST_INT)
9875 split_di (operands, 2, low, high);
9876 count = INTVAL (operands[2]) & 63;
9878 if (count >= 32)
9880 emit_move_insn (high[0], low[1]);
9881 emit_move_insn (low[0], const0_rtx);
9883 if (count > 32)
9884 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9886 else
9888 if (!rtx_equal_p (operands[0], operands[1]))
9889 emit_move_insn (operands[0], operands[1]);
9890 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9891 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9894 else
9896 if (!rtx_equal_p (operands[0], operands[1]))
9897 emit_move_insn (operands[0], operands[1]);
9899 split_di (operands, 1, low, high);
9901 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9902 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9904 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9906 if (! no_new_pseudos)
9907 scratch = force_reg (SImode, const0_rtx);
9908 else
9909 emit_move_insn (scratch, const0_rtx);
9911 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9912 scratch));
9914 else
9915 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9919 void
9920 ix86_split_ashrdi (rtx *operands, rtx scratch)
9922 rtx low[2], high[2];
9923 int count;
9925 if (GET_CODE (operands[2]) == CONST_INT)
9927 split_di (operands, 2, low, high);
9928 count = INTVAL (operands[2]) & 63;
9930 if (count == 63)
9932 emit_move_insn (high[0], high[1]);
9933 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9934 emit_move_insn (low[0], high[0]);
9937 else if (count >= 32)
9939 emit_move_insn (low[0], high[1]);
9941 if (! reload_completed)
9942 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9943 else
9945 emit_move_insn (high[0], low[0]);
9946 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9949 if (count > 32)
9950 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9952 else
9954 if (!rtx_equal_p (operands[0], operands[1]))
9955 emit_move_insn (operands[0], operands[1]);
9956 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9957 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9960 else
9962 if (!rtx_equal_p (operands[0], operands[1]))
9963 emit_move_insn (operands[0], operands[1]);
9965 split_di (operands, 1, low, high);
9967 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9968 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9970 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9972 if (! no_new_pseudos)
9973 scratch = gen_reg_rtx (SImode);
9974 emit_move_insn (scratch, high[0]);
9975 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9976 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9977 scratch));
9979 else
9980 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9984 void
9985 ix86_split_lshrdi (rtx *operands, rtx scratch)
9987 rtx low[2], high[2];
9988 int count;
9990 if (GET_CODE (operands[2]) == CONST_INT)
9992 split_di (operands, 2, low, high);
9993 count = INTVAL (operands[2]) & 63;
9995 if (count >= 32)
9997 emit_move_insn (low[0], high[1]);
9998 emit_move_insn (high[0], const0_rtx);
10000 if (count > 32)
10001 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10003 else
10005 if (!rtx_equal_p (operands[0], operands[1]))
10006 emit_move_insn (operands[0], operands[1]);
10007 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10008 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10011 else
10013 if (!rtx_equal_p (operands[0], operands[1]))
10014 emit_move_insn (operands[0], operands[1]);
10016 split_di (operands, 1, low, high);
10018 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10019 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10021 /* Heh. By reversing the arguments, we can reuse this pattern. */
10022 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10024 if (! no_new_pseudos)
10025 scratch = force_reg (SImode, const0_rtx);
10026 else
10027 emit_move_insn (scratch, const0_rtx);
10029 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10030 scratch));
10032 else
10033 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10037 /* Helper function for the string operations below. Dest VARIABLE whether
10038 it is aligned to VALUE bytes. If true, jump to the label. */
10039 static rtx
10040 ix86_expand_aligntest (rtx variable, int value)
10042 rtx label = gen_label_rtx ();
10043 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10044 if (GET_MODE (variable) == DImode)
10045 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10046 else
10047 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10048 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10049 1, label);
10050 return label;
10053 /* Adjust COUNTER by the VALUE. */
10054 static void
10055 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10057 if (GET_MODE (countreg) == DImode)
10058 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10059 else
10060 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10063 /* Zero extend possibly SImode EXP to Pmode register. */
10065 ix86_zero_extend_to_Pmode (rtx exp)
10067 rtx r;
10068 if (GET_MODE (exp) == VOIDmode)
10069 return force_reg (Pmode, exp);
10070 if (GET_MODE (exp) == Pmode)
10071 return copy_to_mode_reg (Pmode, exp);
10072 r = gen_reg_rtx (Pmode);
10073 emit_insn (gen_zero_extendsidi2 (r, exp));
10074 return r;
10077 /* Expand string move (memcpy) operation. Use i386 string operations when
10078 profitable. expand_clrmem contains similar code. */
10080 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10082 rtx srcreg, destreg, countreg, srcexp, destexp;
10083 enum machine_mode counter_mode;
10084 HOST_WIDE_INT align = 0;
10085 unsigned HOST_WIDE_INT count = 0;
10087 if (GET_CODE (align_exp) == CONST_INT)
10088 align = INTVAL (align_exp);
10090 /* Can't use any of this if the user has appropriated esi or edi. */
10091 if (global_regs[4] || global_regs[5])
10092 return 0;
10094 /* This simple hack avoids all inlining code and simplifies code below. */
10095 if (!TARGET_ALIGN_STRINGOPS)
10096 align = 64;
10098 if (GET_CODE (count_exp) == CONST_INT)
10100 count = INTVAL (count_exp);
10101 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10102 return 0;
10105 /* Figure out proper mode for counter. For 32bits it is always SImode,
10106 for 64bits use SImode when possible, otherwise DImode.
10107 Set count to number of bytes copied when known at compile time. */
10108 if (!TARGET_64BIT
10109 || GET_MODE (count_exp) == SImode
10110 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10111 counter_mode = SImode;
10112 else
10113 counter_mode = DImode;
10115 if (counter_mode != SImode && counter_mode != DImode)
10116 abort ();
10118 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10119 if (destreg != XEXP (dst, 0))
10120 dst = replace_equiv_address_nv (dst, destreg);
10121 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10122 if (srcreg != XEXP (src, 0))
10123 src = replace_equiv_address_nv (src, srcreg);
10125 /* When optimizing for size emit simple rep ; movsb instruction for
10126 counts not divisible by 4. */
10128 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10130 emit_insn (gen_cld ());
10131 countreg = ix86_zero_extend_to_Pmode (count_exp);
10132 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10133 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10134 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10135 destexp, srcexp));
10138 /* For constant aligned (or small unaligned) copies use rep movsl
10139 followed by code copying the rest. For PentiumPro ensure 8 byte
10140 alignment to allow rep movsl acceleration. */
10142 else if (count != 0
10143 && (align >= 8
10144 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10145 || optimize_size || count < (unsigned int) 64))
10147 unsigned HOST_WIDE_INT offset = 0;
10148 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10149 rtx srcmem, dstmem;
10151 emit_insn (gen_cld ());
10152 if (count & ~(size - 1))
10154 countreg = copy_to_mode_reg (counter_mode,
10155 GEN_INT ((count >> (size == 4 ? 2 : 3))
10156 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10157 countreg = ix86_zero_extend_to_Pmode (countreg);
10159 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10160 GEN_INT (size == 4 ? 2 : 3));
10161 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10162 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10164 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10165 countreg, destexp, srcexp));
10166 offset = count & ~(size - 1);
10168 if (size == 8 && (count & 0x04))
10170 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10171 offset);
10172 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10173 offset);
10174 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10175 offset += 4;
10177 if (count & 0x02)
10179 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10180 offset);
10181 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10182 offset);
10183 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10184 offset += 2;
10186 if (count & 0x01)
10188 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10189 offset);
10190 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10191 offset);
10192 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10195 /* The generic code based on the glibc implementation:
10196 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10197 allowing accelerated copying there)
10198 - copy the data using rep movsl
10199 - copy the rest. */
10200 else
10202 rtx countreg2;
10203 rtx label = NULL;
10204 rtx srcmem, dstmem;
10205 int desired_alignment = (TARGET_PENTIUMPRO
10206 && (count == 0 || count >= (unsigned int) 260)
10207 ? 8 : UNITS_PER_WORD);
10208 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10209 dst = change_address (dst, BLKmode, destreg);
10210 src = change_address (src, BLKmode, srcreg);
10212 /* In case we don't know anything about the alignment, default to
10213 library version, since it is usually equally fast and result in
10214 shorter code.
10216 Also emit call when we know that the count is large and call overhead
10217 will not be important. */
10218 if (!TARGET_INLINE_ALL_STRINGOPS
10219 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10220 return 0;
10222 if (TARGET_SINGLE_STRINGOP)
10223 emit_insn (gen_cld ());
10225 countreg2 = gen_reg_rtx (Pmode);
10226 countreg = copy_to_mode_reg (counter_mode, count_exp);
10228 /* We don't use loops to align destination and to copy parts smaller
10229 than 4 bytes, because gcc is able to optimize such code better (in
10230 the case the destination or the count really is aligned, gcc is often
10231 able to predict the branches) and also it is friendlier to the
10232 hardware branch prediction.
10234 Using loops is beneficial for generic case, because we can
10235 handle small counts using the loops. Many CPUs (such as Athlon)
10236 have large REP prefix setup costs.
10238 This is quite costly. Maybe we can revisit this decision later or
10239 add some customizability to this code. */
10241 if (count == 0 && align < desired_alignment)
10243 label = gen_label_rtx ();
10244 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10245 LEU, 0, counter_mode, 1, label);
10247 if (align <= 1)
10249 rtx label = ix86_expand_aligntest (destreg, 1);
10250 srcmem = change_address (src, QImode, srcreg);
10251 dstmem = change_address (dst, QImode, destreg);
10252 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10253 ix86_adjust_counter (countreg, 1);
10254 emit_label (label);
10255 LABEL_NUSES (label) = 1;
10257 if (align <= 2)
10259 rtx label = ix86_expand_aligntest (destreg, 2);
10260 srcmem = change_address (src, HImode, srcreg);
10261 dstmem = change_address (dst, HImode, destreg);
10262 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10263 ix86_adjust_counter (countreg, 2);
10264 emit_label (label);
10265 LABEL_NUSES (label) = 1;
10267 if (align <= 4 && desired_alignment > 4)
10269 rtx label = ix86_expand_aligntest (destreg, 4);
10270 srcmem = change_address (src, SImode, srcreg);
10271 dstmem = change_address (dst, SImode, destreg);
10272 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10273 ix86_adjust_counter (countreg, 4);
10274 emit_label (label);
10275 LABEL_NUSES (label) = 1;
10278 if (label && desired_alignment > 4 && !TARGET_64BIT)
10280 emit_label (label);
10281 LABEL_NUSES (label) = 1;
10282 label = NULL_RTX;
10284 if (!TARGET_SINGLE_STRINGOP)
10285 emit_insn (gen_cld ());
10286 if (TARGET_64BIT)
10288 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10289 GEN_INT (3)));
10290 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10292 else
10294 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10295 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10297 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10298 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10299 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10300 countreg2, destexp, srcexp));
10302 if (label)
10304 emit_label (label);
10305 LABEL_NUSES (label) = 1;
10307 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10309 srcmem = change_address (src, SImode, srcreg);
10310 dstmem = change_address (dst, SImode, destreg);
10311 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10313 if ((align <= 4 || count == 0) && TARGET_64BIT)
10315 rtx label = ix86_expand_aligntest (countreg, 4);
10316 srcmem = change_address (src, SImode, srcreg);
10317 dstmem = change_address (dst, SImode, destreg);
10318 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10319 emit_label (label);
10320 LABEL_NUSES (label) = 1;
10322 if (align > 2 && count != 0 && (count & 2))
10324 srcmem = change_address (src, HImode, srcreg);
10325 dstmem = change_address (dst, HImode, destreg);
10326 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10328 if (align <= 2 || count == 0)
10330 rtx label = ix86_expand_aligntest (countreg, 2);
10331 srcmem = change_address (src, HImode, srcreg);
10332 dstmem = change_address (dst, HImode, destreg);
10333 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10334 emit_label (label);
10335 LABEL_NUSES (label) = 1;
10337 if (align > 1 && count != 0 && (count & 1))
10339 srcmem = change_address (src, QImode, srcreg);
10340 dstmem = change_address (dst, QImode, destreg);
10341 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10343 if (align <= 1 || count == 0)
10345 rtx label = ix86_expand_aligntest (countreg, 1);
10346 srcmem = change_address (src, QImode, srcreg);
10347 dstmem = change_address (dst, QImode, destreg);
10348 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10349 emit_label (label);
10350 LABEL_NUSES (label) = 1;
10354 return 1;
10357 /* Expand string clear operation (bzero). Use i386 string operations when
10358 profitable. expand_movmem contains similar code. */
10360 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10362 rtx destreg, zeroreg, countreg, destexp;
10363 enum machine_mode counter_mode;
10364 HOST_WIDE_INT align = 0;
10365 unsigned HOST_WIDE_INT count = 0;
10367 if (GET_CODE (align_exp) == CONST_INT)
10368 align = INTVAL (align_exp);
10370 /* Can't use any of this if the user has appropriated esi. */
10371 if (global_regs[4])
10372 return 0;
10374 /* This simple hack avoids all inlining code and simplifies code below. */
10375 if (!TARGET_ALIGN_STRINGOPS)
10376 align = 32;
10378 if (GET_CODE (count_exp) == CONST_INT)
10380 count = INTVAL (count_exp);
10381 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10382 return 0;
10384 /* Figure out proper mode for counter. For 32bits it is always SImode,
10385 for 64bits use SImode when possible, otherwise DImode.
10386 Set count to number of bytes copied when known at compile time. */
10387 if (!TARGET_64BIT
10388 || GET_MODE (count_exp) == SImode
10389 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10390 counter_mode = SImode;
10391 else
10392 counter_mode = DImode;
10394 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10395 if (destreg != XEXP (dst, 0))
10396 dst = replace_equiv_address_nv (dst, destreg);
10399 /* When optimizing for size emit simple rep ; movsb instruction for
10400 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10401 sequence is 7 bytes long, so if optimizing for size and count is
10402 small enough that some stosl, stosw and stosb instructions without
10403 rep are shorter, fall back into the next if. */
10405 if ((!optimize || optimize_size)
10406 && (count == 0
10407 || ((count & 0x03)
10408 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10410 emit_insn (gen_cld ());
10412 countreg = ix86_zero_extend_to_Pmode (count_exp);
10413 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10414 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10415 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10417 else if (count != 0
10418 && (align >= 8
10419 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10420 || optimize_size || count < (unsigned int) 64))
10422 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10423 unsigned HOST_WIDE_INT offset = 0;
10425 emit_insn (gen_cld ());
10427 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10428 if (count & ~(size - 1))
10430 unsigned HOST_WIDE_INT repcount;
10431 unsigned int max_nonrep;
10433 repcount = count >> (size == 4 ? 2 : 3);
10434 if (!TARGET_64BIT)
10435 repcount &= 0x3fffffff;
10437 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10438 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10439 bytes. In both cases the latter seems to be faster for small
10440 values of N. */
10441 max_nonrep = size == 4 ? 7 : 4;
10442 if (!optimize_size)
10443 switch (ix86_tune)
10445 case PROCESSOR_PENTIUM4:
10446 case PROCESSOR_NOCONA:
10447 max_nonrep = 3;
10448 break;
10449 default:
10450 break;
10453 if (repcount <= max_nonrep)
10454 while (repcount-- > 0)
10456 rtx mem = adjust_automodify_address_nv (dst,
10457 GET_MODE (zeroreg),
10458 destreg, offset);
10459 emit_insn (gen_strset (destreg, mem, zeroreg));
10460 offset += size;
10462 else
10464 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10465 countreg = ix86_zero_extend_to_Pmode (countreg);
10466 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10467 GEN_INT (size == 4 ? 2 : 3));
10468 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10469 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10470 destexp));
10471 offset = count & ~(size - 1);
10474 if (size == 8 && (count & 0x04))
10476 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10477 offset);
10478 emit_insn (gen_strset (destreg, mem,
10479 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10480 offset += 4;
10482 if (count & 0x02)
10484 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10485 offset);
10486 emit_insn (gen_strset (destreg, mem,
10487 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10488 offset += 2;
10490 if (count & 0x01)
10492 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10493 offset);
10494 emit_insn (gen_strset (destreg, mem,
10495 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10498 else
10500 rtx countreg2;
10501 rtx label = NULL;
10502 /* Compute desired alignment of the string operation. */
10503 int desired_alignment = (TARGET_PENTIUMPRO
10504 && (count == 0 || count >= (unsigned int) 260)
10505 ? 8 : UNITS_PER_WORD);
10507 /* In case we don't know anything about the alignment, default to
10508 library version, since it is usually equally fast and result in
10509 shorter code.
10511 Also emit call when we know that the count is large and call overhead
10512 will not be important. */
10513 if (!TARGET_INLINE_ALL_STRINGOPS
10514 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10515 return 0;
10517 if (TARGET_SINGLE_STRINGOP)
10518 emit_insn (gen_cld ());
10520 countreg2 = gen_reg_rtx (Pmode);
10521 countreg = copy_to_mode_reg (counter_mode, count_exp);
10522 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10523 /* Get rid of MEM_OFFSET, it won't be accurate. */
10524 dst = change_address (dst, BLKmode, destreg);
10526 if (count == 0 && align < desired_alignment)
10528 label = gen_label_rtx ();
10529 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10530 LEU, 0, counter_mode, 1, label);
10532 if (align <= 1)
10534 rtx label = ix86_expand_aligntest (destreg, 1);
10535 emit_insn (gen_strset (destreg, dst,
10536 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10537 ix86_adjust_counter (countreg, 1);
10538 emit_label (label);
10539 LABEL_NUSES (label) = 1;
10541 if (align <= 2)
10543 rtx label = ix86_expand_aligntest (destreg, 2);
10544 emit_insn (gen_strset (destreg, dst,
10545 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10546 ix86_adjust_counter (countreg, 2);
10547 emit_label (label);
10548 LABEL_NUSES (label) = 1;
10550 if (align <= 4 && desired_alignment > 4)
10552 rtx label = ix86_expand_aligntest (destreg, 4);
10553 emit_insn (gen_strset (destreg, dst,
10554 (TARGET_64BIT
10555 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10556 : zeroreg)));
10557 ix86_adjust_counter (countreg, 4);
10558 emit_label (label);
10559 LABEL_NUSES (label) = 1;
10562 if (label && desired_alignment > 4 && !TARGET_64BIT)
10564 emit_label (label);
10565 LABEL_NUSES (label) = 1;
10566 label = NULL_RTX;
10569 if (!TARGET_SINGLE_STRINGOP)
10570 emit_insn (gen_cld ());
10571 if (TARGET_64BIT)
10573 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10574 GEN_INT (3)));
10575 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10577 else
10579 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10580 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10582 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10583 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10585 if (label)
10587 emit_label (label);
10588 LABEL_NUSES (label) = 1;
10591 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10592 emit_insn (gen_strset (destreg, dst,
10593 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10594 if (TARGET_64BIT && (align <= 4 || count == 0))
10596 rtx label = ix86_expand_aligntest (countreg, 4);
10597 emit_insn (gen_strset (destreg, dst,
10598 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10599 emit_label (label);
10600 LABEL_NUSES (label) = 1;
10602 if (align > 2 && count != 0 && (count & 2))
10603 emit_insn (gen_strset (destreg, dst,
10604 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10605 if (align <= 2 || count == 0)
10607 rtx label = ix86_expand_aligntest (countreg, 2);
10608 emit_insn (gen_strset (destreg, dst,
10609 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10610 emit_label (label);
10611 LABEL_NUSES (label) = 1;
10613 if (align > 1 && count != 0 && (count & 1))
10614 emit_insn (gen_strset (destreg, dst,
10615 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10616 if (align <= 1 || count == 0)
10618 rtx label = ix86_expand_aligntest (countreg, 1);
10619 emit_insn (gen_strset (destreg, dst,
10620 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10621 emit_label (label);
10622 LABEL_NUSES (label) = 1;
10625 return 1;
10628 /* Expand strlen. */
10630 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10632 rtx addr, scratch1, scratch2, scratch3, scratch4;
10634 /* The generic case of strlen expander is long. Avoid it's
10635 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10637 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10638 && !TARGET_INLINE_ALL_STRINGOPS
10639 && !optimize_size
10640 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10641 return 0;
10643 addr = force_reg (Pmode, XEXP (src, 0));
10644 scratch1 = gen_reg_rtx (Pmode);
10646 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10647 && !optimize_size)
10649 /* Well it seems that some optimizer does not combine a call like
10650 foo(strlen(bar), strlen(bar));
10651 when the move and the subtraction is done here. It does calculate
10652 the length just once when these instructions are done inside of
10653 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10654 often used and I use one fewer register for the lifetime of
10655 output_strlen_unroll() this is better. */
10657 emit_move_insn (out, addr);
10659 ix86_expand_strlensi_unroll_1 (out, src, align);
10661 /* strlensi_unroll_1 returns the address of the zero at the end of
10662 the string, like memchr(), so compute the length by subtracting
10663 the start address. */
10664 if (TARGET_64BIT)
10665 emit_insn (gen_subdi3 (out, out, addr));
10666 else
10667 emit_insn (gen_subsi3 (out, out, addr));
10669 else
10671 rtx unspec;
10672 scratch2 = gen_reg_rtx (Pmode);
10673 scratch3 = gen_reg_rtx (Pmode);
10674 scratch4 = force_reg (Pmode, constm1_rtx);
10676 emit_move_insn (scratch3, addr);
10677 eoschar = force_reg (QImode, eoschar);
10679 emit_insn (gen_cld ());
10680 src = replace_equiv_address_nv (src, scratch3);
10682 /* If .md starts supporting :P, this can be done in .md. */
10683 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10684 scratch4), UNSPEC_SCAS);
10685 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10686 if (TARGET_64BIT)
10688 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10689 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10691 else
10693 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10694 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10697 return 1;
10700 /* Expand the appropriate insns for doing strlen if not just doing
10701 repnz; scasb
10703 out = result, initialized with the start address
10704 align_rtx = alignment of the address.
10705 scratch = scratch register, initialized with the startaddress when
10706 not aligned, otherwise undefined
10708 This is just the body. It needs the initializations mentioned above and
10709 some address computing at the end. These things are done in i386.md. */
10711 static void
10712 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
10714 int align;
10715 rtx tmp;
10716 rtx align_2_label = NULL_RTX;
10717 rtx align_3_label = NULL_RTX;
10718 rtx align_4_label = gen_label_rtx ();
10719 rtx end_0_label = gen_label_rtx ();
10720 rtx mem;
10721 rtx tmpreg = gen_reg_rtx (SImode);
10722 rtx scratch = gen_reg_rtx (SImode);
10723 rtx cmp;
10725 align = 0;
10726 if (GET_CODE (align_rtx) == CONST_INT)
10727 align = INTVAL (align_rtx);
10729 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10731 /* Is there a known alignment and is it less than 4? */
10732 if (align < 4)
10734 rtx scratch1 = gen_reg_rtx (Pmode);
10735 emit_move_insn (scratch1, out);
10736 /* Is there a known alignment and is it not 2? */
10737 if (align != 2)
10739 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10740 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10742 /* Leave just the 3 lower bits. */
10743 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10744 NULL_RTX, 0, OPTAB_WIDEN);
10746 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10747 Pmode, 1, align_4_label);
10748 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
10749 Pmode, 1, align_2_label);
10750 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
10751 Pmode, 1, align_3_label);
10753 else
10755 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10756 check if is aligned to 4 - byte. */
10758 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
10759 NULL_RTX, 0, OPTAB_WIDEN);
10761 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10762 Pmode, 1, align_4_label);
10765 mem = change_address (src, QImode, out);
10767 /* Now compare the bytes. */
10769 /* Compare the first n unaligned byte on a byte per byte basis. */
10770 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10771 QImode, 1, end_0_label);
10773 /* Increment the address. */
10774 if (TARGET_64BIT)
10775 emit_insn (gen_adddi3 (out, out, const1_rtx));
10776 else
10777 emit_insn (gen_addsi3 (out, out, const1_rtx));
10779 /* Not needed with an alignment of 2 */
10780 if (align != 2)
10782 emit_label (align_2_label);
10784 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10785 end_0_label);
10787 if (TARGET_64BIT)
10788 emit_insn (gen_adddi3 (out, out, const1_rtx));
10789 else
10790 emit_insn (gen_addsi3 (out, out, const1_rtx));
10792 emit_label (align_3_label);
10795 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10796 end_0_label);
10798 if (TARGET_64BIT)
10799 emit_insn (gen_adddi3 (out, out, const1_rtx));
10800 else
10801 emit_insn (gen_addsi3 (out, out, const1_rtx));
10804 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10805 align this loop. It gives only huge programs, but does not help to
10806 speed up. */
10807 emit_label (align_4_label);
10809 mem = change_address (src, SImode, out);
10810 emit_move_insn (scratch, mem);
10811 if (TARGET_64BIT)
10812 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10813 else
10814 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10816 /* This formula yields a nonzero result iff one of the bytes is zero.
10817 This saves three branches inside loop and many cycles. */
10819 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10820 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10821 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10822 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10823 gen_int_mode (0x80808080, SImode)));
10824 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10825 align_4_label);
10827 if (TARGET_CMOVE)
10829 rtx reg = gen_reg_rtx (SImode);
10830 rtx reg2 = gen_reg_rtx (Pmode);
10831 emit_move_insn (reg, tmpreg);
10832 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10834 /* If zero is not in the first two bytes, move two bytes forward. */
10835 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10836 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10837 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10838 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10839 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10840 reg,
10841 tmpreg)));
10842 /* Emit lea manually to avoid clobbering of flags. */
10843 emit_insn (gen_rtx_SET (SImode, reg2,
10844 gen_rtx_PLUS (Pmode, out, const2_rtx)));
10846 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10847 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10848 emit_insn (gen_rtx_SET (VOIDmode, out,
10849 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10850 reg2,
10851 out)));
10854 else
10856 rtx end_2_label = gen_label_rtx ();
10857 /* Is zero in the first two bytes? */
10859 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10860 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10861 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10862 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10863 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10864 pc_rtx);
10865 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10866 JUMP_LABEL (tmp) = end_2_label;
10868 /* Not in the first two. Move two bytes forward. */
10869 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10870 if (TARGET_64BIT)
10871 emit_insn (gen_adddi3 (out, out, const2_rtx));
10872 else
10873 emit_insn (gen_addsi3 (out, out, const2_rtx));
10875 emit_label (end_2_label);
10879 /* Avoid branch in fixing the byte. */
10880 tmpreg = gen_lowpart (QImode, tmpreg);
10881 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10882 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
10883 if (TARGET_64BIT)
10884 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
10885 else
10886 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
10888 emit_label (end_0_label);
10891 void
10892 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
10893 rtx callarg2 ATTRIBUTE_UNUSED,
10894 rtx pop, int sibcall)
10896 rtx use = NULL, call;
10898 if (pop == const0_rtx)
10899 pop = NULL;
10900 if (TARGET_64BIT && pop)
10901 abort ();
10903 #if TARGET_MACHO
10904 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10905 fnaddr = machopic_indirect_call_target (fnaddr);
10906 #else
10907 /* Static functions and indirect calls don't need the pic register. */
10908 if (! TARGET_64BIT && flag_pic
10909 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10910 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
10911 use_reg (&use, pic_offset_table_rtx);
10913 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10915 rtx al = gen_rtx_REG (QImode, 0);
10916 emit_move_insn (al, callarg2);
10917 use_reg (&use, al);
10919 #endif /* TARGET_MACHO */
10921 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10923 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10924 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10926 if (sibcall && TARGET_64BIT
10927 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
10929 rtx addr;
10930 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10931 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
10932 emit_move_insn (fnaddr, addr);
10933 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10936 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10937 if (retval)
10938 call = gen_rtx_SET (VOIDmode, retval, call);
10939 if (pop)
10941 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10942 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10943 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10946 call = emit_call_insn (call);
10947 if (use)
10948 CALL_INSN_FUNCTION_USAGE (call) = use;
10952 /* Clear stack slot assignments remembered from previous functions.
10953 This is called from INIT_EXPANDERS once before RTL is emitted for each
10954 function. */
10956 static struct machine_function *
10957 ix86_init_machine_status (void)
10959 struct machine_function *f;
10961 f = ggc_alloc_cleared (sizeof (struct machine_function));
10962 f->use_fast_prologue_epilogue_nregs = -1;
10964 return f;
10967 /* Return a MEM corresponding to a stack slot with mode MODE.
10968 Allocate a new slot if necessary.
10970 The RTL for a function can have several slots available: N is
10971 which slot to use. */
10974 assign_386_stack_local (enum machine_mode mode, int n)
10976 struct stack_local_entry *s;
10978 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10979 abort ();
10981 for (s = ix86_stack_locals; s; s = s->next)
10982 if (s->mode == mode && s->n == n)
10983 return s->rtl;
10985 s = (struct stack_local_entry *)
10986 ggc_alloc (sizeof (struct stack_local_entry));
10987 s->n = n;
10988 s->mode = mode;
10989 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10991 s->next = ix86_stack_locals;
10992 ix86_stack_locals = s;
10993 return s->rtl;
10996 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10998 static GTY(()) rtx ix86_tls_symbol;
11000 ix86_tls_get_addr (void)
11003 if (!ix86_tls_symbol)
11005 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11006 (TARGET_GNU_TLS && !TARGET_64BIT)
11007 ? "___tls_get_addr"
11008 : "__tls_get_addr");
11011 return ix86_tls_symbol;
11014 /* Calculate the length of the memory address in the instruction
11015 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11018 memory_address_length (rtx addr)
11020 struct ix86_address parts;
11021 rtx base, index, disp;
11022 int len;
11024 if (GET_CODE (addr) == PRE_DEC
11025 || GET_CODE (addr) == POST_INC
11026 || GET_CODE (addr) == PRE_MODIFY
11027 || GET_CODE (addr) == POST_MODIFY)
11028 return 0;
11030 if (! ix86_decompose_address (addr, &parts))
11031 abort ();
11033 base = parts.base;
11034 index = parts.index;
11035 disp = parts.disp;
11036 len = 0;
11038 /* Rule of thumb:
11039 - esp as the base always wants an index,
11040 - ebp as the base always wants a displacement. */
11042 /* Register Indirect. */
11043 if (base && !index && !disp)
11045 /* esp (for its index) and ebp (for its displacement) need
11046 the two-byte modrm form. */
11047 if (addr == stack_pointer_rtx
11048 || addr == arg_pointer_rtx
11049 || addr == frame_pointer_rtx
11050 || addr == hard_frame_pointer_rtx)
11051 len = 1;
11054 /* Direct Addressing. */
11055 else if (disp && !base && !index)
11056 len = 4;
11058 else
11060 /* Find the length of the displacement constant. */
11061 if (disp)
11063 if (GET_CODE (disp) == CONST_INT
11064 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11065 && base)
11066 len = 1;
11067 else
11068 len = 4;
11070 /* ebp always wants a displacement. */
11071 else if (base == hard_frame_pointer_rtx)
11072 len = 1;
11074 /* An index requires the two-byte modrm form.... */
11075 if (index
11076 /* ...like esp, which always wants an index. */
11077 || base == stack_pointer_rtx
11078 || base == arg_pointer_rtx
11079 || base == frame_pointer_rtx)
11080 len += 1;
11083 return len;
11086 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11087 is set, expect that insn have 8bit immediate alternative. */
11089 ix86_attr_length_immediate_default (rtx insn, int shortform)
11091 int len = 0;
11092 int i;
11093 extract_insn_cached (insn);
11094 for (i = recog_data.n_operands - 1; i >= 0; --i)
11095 if (CONSTANT_P (recog_data.operand[i]))
11097 if (len)
11098 abort ();
11099 if (shortform
11100 && GET_CODE (recog_data.operand[i]) == CONST_INT
11101 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11102 len = 1;
11103 else
11105 switch (get_attr_mode (insn))
11107 case MODE_QI:
11108 len+=1;
11109 break;
11110 case MODE_HI:
11111 len+=2;
11112 break;
11113 case MODE_SI:
11114 len+=4;
11115 break;
11116 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11117 case MODE_DI:
11118 len+=4;
11119 break;
11120 default:
11121 fatal_insn ("unknown insn mode", insn);
11125 return len;
11127 /* Compute default value for "length_address" attribute. */
11129 ix86_attr_length_address_default (rtx insn)
11131 int i;
11133 if (get_attr_type (insn) == TYPE_LEA)
11135 rtx set = PATTERN (insn);
11136 if (GET_CODE (set) == SET)
11138 else if (GET_CODE (set) == PARALLEL
11139 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11140 set = XVECEXP (set, 0, 0);
11141 else
11143 #ifdef ENABLE_CHECKING
11144 abort ();
11145 #endif
11146 return 0;
11149 return memory_address_length (SET_SRC (set));
11152 extract_insn_cached (insn);
11153 for (i = recog_data.n_operands - 1; i >= 0; --i)
11154 if (GET_CODE (recog_data.operand[i]) == MEM)
11156 return memory_address_length (XEXP (recog_data.operand[i], 0));
11157 break;
11159 return 0;
11162 /* Return the maximum number of instructions a cpu can issue. */
11164 static int
11165 ix86_issue_rate (void)
11167 switch (ix86_tune)
11169 case PROCESSOR_PENTIUM:
11170 case PROCESSOR_K6:
11171 return 2;
11173 case PROCESSOR_PENTIUMPRO:
11174 case PROCESSOR_PENTIUM4:
11175 case PROCESSOR_ATHLON:
11176 case PROCESSOR_K8:
11177 case PROCESSOR_NOCONA:
11178 return 3;
11180 default:
11181 return 1;
11185 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11186 by DEP_INSN and nothing set by DEP_INSN. */
11188 static int
11189 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11191 rtx set, set2;
11193 /* Simplify the test for uninteresting insns. */
11194 if (insn_type != TYPE_SETCC
11195 && insn_type != TYPE_ICMOV
11196 && insn_type != TYPE_FCMOV
11197 && insn_type != TYPE_IBR)
11198 return 0;
11200 if ((set = single_set (dep_insn)) != 0)
11202 set = SET_DEST (set);
11203 set2 = NULL_RTX;
11205 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11206 && XVECLEN (PATTERN (dep_insn), 0) == 2
11207 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11208 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11210 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11211 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11213 else
11214 return 0;
11216 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11217 return 0;
11219 /* This test is true if the dependent insn reads the flags but
11220 not any other potentially set register. */
11221 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11222 return 0;
11224 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11225 return 0;
11227 return 1;
11230 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11231 address with operands set by DEP_INSN. */
11233 static int
11234 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11236 rtx addr;
11238 if (insn_type == TYPE_LEA
11239 && TARGET_PENTIUM)
11241 addr = PATTERN (insn);
11242 if (GET_CODE (addr) == SET)
11244 else if (GET_CODE (addr) == PARALLEL
11245 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11246 addr = XVECEXP (addr, 0, 0);
11247 else
11248 abort ();
11249 addr = SET_SRC (addr);
11251 else
11253 int i;
11254 extract_insn_cached (insn);
11255 for (i = recog_data.n_operands - 1; i >= 0; --i)
11256 if (GET_CODE (recog_data.operand[i]) == MEM)
11258 addr = XEXP (recog_data.operand[i], 0);
11259 goto found;
11261 return 0;
11262 found:;
11265 return modified_in_p (addr, dep_insn);
11268 static int
11269 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11271 enum attr_type insn_type, dep_insn_type;
11272 enum attr_memory memory;
11273 rtx set, set2;
11274 int dep_insn_code_number;
11276 /* Anti and output dependencies have zero cost on all CPUs. */
11277 if (REG_NOTE_KIND (link) != 0)
11278 return 0;
11280 dep_insn_code_number = recog_memoized (dep_insn);
11282 /* If we can't recognize the insns, we can't really do anything. */
11283 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11284 return cost;
11286 insn_type = get_attr_type (insn);
11287 dep_insn_type = get_attr_type (dep_insn);
11289 switch (ix86_tune)
11291 case PROCESSOR_PENTIUM:
11292 /* Address Generation Interlock adds a cycle of latency. */
11293 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11294 cost += 1;
11296 /* ??? Compares pair with jump/setcc. */
11297 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11298 cost = 0;
11300 /* Floating point stores require value to be ready one cycle earlier. */
11301 if (insn_type == TYPE_FMOV
11302 && get_attr_memory (insn) == MEMORY_STORE
11303 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11304 cost += 1;
11305 break;
11307 case PROCESSOR_PENTIUMPRO:
11308 memory = get_attr_memory (insn);
11310 /* INT->FP conversion is expensive. */
11311 if (get_attr_fp_int_src (dep_insn))
11312 cost += 5;
11314 /* There is one cycle extra latency between an FP op and a store. */
11315 if (insn_type == TYPE_FMOV
11316 && (set = single_set (dep_insn)) != NULL_RTX
11317 && (set2 = single_set (insn)) != NULL_RTX
11318 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11319 && GET_CODE (SET_DEST (set2)) == MEM)
11320 cost += 1;
11322 /* Show ability of reorder buffer to hide latency of load by executing
11323 in parallel with previous instruction in case
11324 previous instruction is not needed to compute the address. */
11325 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11326 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11328 /* Claim moves to take one cycle, as core can issue one load
11329 at time and the next load can start cycle later. */
11330 if (dep_insn_type == TYPE_IMOV
11331 || dep_insn_type == TYPE_FMOV)
11332 cost = 1;
11333 else if (cost > 1)
11334 cost--;
11336 break;
11338 case PROCESSOR_K6:
11339 memory = get_attr_memory (insn);
11341 /* The esp dependency is resolved before the instruction is really
11342 finished. */
11343 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11344 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11345 return 1;
11347 /* INT->FP conversion is expensive. */
11348 if (get_attr_fp_int_src (dep_insn))
11349 cost += 5;
11351 /* Show ability of reorder buffer to hide latency of load by executing
11352 in parallel with previous instruction in case
11353 previous instruction is not needed to compute the address. */
11354 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11355 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11357 /* Claim moves to take one cycle, as core can issue one load
11358 at time and the next load can start cycle later. */
11359 if (dep_insn_type == TYPE_IMOV
11360 || dep_insn_type == TYPE_FMOV)
11361 cost = 1;
11362 else if (cost > 2)
11363 cost -= 2;
11364 else
11365 cost = 1;
11367 break;
11369 case PROCESSOR_ATHLON:
11370 case PROCESSOR_K8:
11371 memory = get_attr_memory (insn);
11373 /* Show ability of reorder buffer to hide latency of load by executing
11374 in parallel with previous instruction in case
11375 previous instruction is not needed to compute the address. */
11376 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11377 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11379 enum attr_unit unit = get_attr_unit (insn);
11380 int loadcost = 3;
11382 /* Because of the difference between the length of integer and
11383 floating unit pipeline preparation stages, the memory operands
11384 for floating point are cheaper.
11386 ??? For Athlon it the difference is most probably 2. */
11387 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11388 loadcost = 3;
11389 else
11390 loadcost = TARGET_ATHLON ? 2 : 0;
11392 if (cost >= loadcost)
11393 cost -= loadcost;
11394 else
11395 cost = 0;
11398 default:
11399 break;
11402 return cost;
11405 /* How many alternative schedules to try. This should be as wide as the
11406 scheduling freedom in the DFA, but no wider. Making this value too
11407 large results extra work for the scheduler. */
11409 static int
11410 ia32_multipass_dfa_lookahead (void)
11412 if (ix86_tune == PROCESSOR_PENTIUM)
11413 return 2;
11415 if (ix86_tune == PROCESSOR_PENTIUMPRO
11416 || ix86_tune == PROCESSOR_K6)
11417 return 1;
11419 else
11420 return 0;
11424 /* Compute the alignment given to a constant that is being placed in memory.
11425 EXP is the constant and ALIGN is the alignment that the object would
11426 ordinarily have.
11427 The value of this function is used instead of that alignment to align
11428 the object. */
11431 ix86_constant_alignment (tree exp, int align)
11433 if (TREE_CODE (exp) == REAL_CST)
11435 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11436 return 64;
11437 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11438 return 128;
11440 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11441 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11442 return BITS_PER_WORD;
11444 return align;
11447 /* Compute the alignment for a static variable.
11448 TYPE is the data type, and ALIGN is the alignment that
11449 the object would ordinarily have. The value of this function is used
11450 instead of that alignment to align the object. */
11453 ix86_data_alignment (tree type, int align)
11455 if (AGGREGATE_TYPE_P (type)
11456 && TYPE_SIZE (type)
11457 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11458 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11459 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11460 return 256;
11462 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11463 to 16byte boundary. */
11464 if (TARGET_64BIT)
11466 if (AGGREGATE_TYPE_P (type)
11467 && TYPE_SIZE (type)
11468 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11469 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11470 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11471 return 128;
11474 if (TREE_CODE (type) == ARRAY_TYPE)
11476 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11477 return 64;
11478 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11479 return 128;
11481 else if (TREE_CODE (type) == COMPLEX_TYPE)
11484 if (TYPE_MODE (type) == DCmode && align < 64)
11485 return 64;
11486 if (TYPE_MODE (type) == XCmode && align < 128)
11487 return 128;
11489 else if ((TREE_CODE (type) == RECORD_TYPE
11490 || TREE_CODE (type) == UNION_TYPE
11491 || TREE_CODE (type) == QUAL_UNION_TYPE)
11492 && TYPE_FIELDS (type))
11494 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11495 return 64;
11496 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11497 return 128;
11499 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11500 || TREE_CODE (type) == INTEGER_TYPE)
11502 if (TYPE_MODE (type) == DFmode && align < 64)
11503 return 64;
11504 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11505 return 128;
11508 return align;
11511 /* Compute the alignment for a local variable.
11512 TYPE is the data type, and ALIGN is the alignment that
11513 the object would ordinarily have. The value of this macro is used
11514 instead of that alignment to align the object. */
11517 ix86_local_alignment (tree type, int align)
11519 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11520 to 16byte boundary. */
11521 if (TARGET_64BIT)
11523 if (AGGREGATE_TYPE_P (type)
11524 && TYPE_SIZE (type)
11525 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11526 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11527 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11528 return 128;
11530 if (TREE_CODE (type) == ARRAY_TYPE)
11532 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11533 return 64;
11534 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11535 return 128;
11537 else if (TREE_CODE (type) == COMPLEX_TYPE)
11539 if (TYPE_MODE (type) == DCmode && align < 64)
11540 return 64;
11541 if (TYPE_MODE (type) == XCmode && align < 128)
11542 return 128;
11544 else if ((TREE_CODE (type) == RECORD_TYPE
11545 || TREE_CODE (type) == UNION_TYPE
11546 || TREE_CODE (type) == QUAL_UNION_TYPE)
11547 && TYPE_FIELDS (type))
11549 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11550 return 64;
11551 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11552 return 128;
11554 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11555 || TREE_CODE (type) == INTEGER_TYPE)
11558 if (TYPE_MODE (type) == DFmode && align < 64)
11559 return 64;
11560 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11561 return 128;
11563 return align;
11566 /* Emit RTL insns to initialize the variable parts of a trampoline.
11567 FNADDR is an RTX for the address of the function's pure code.
11568 CXT is an RTX for the static chain value for the function. */
11569 void
11570 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11572 if (!TARGET_64BIT)
11574 /* Compute offset from the end of the jmp to the target function. */
11575 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11576 plus_constant (tramp, 10),
11577 NULL_RTX, 1, OPTAB_DIRECT);
11578 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11579 gen_int_mode (0xb9, QImode));
11580 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11581 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11582 gen_int_mode (0xe9, QImode));
11583 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11585 else
11587 int offset = 0;
11588 /* Try to load address using shorter movl instead of movabs.
11589 We may want to support movq for kernel mode, but kernel does not use
11590 trampolines at the moment. */
11591 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11593 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11594 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11595 gen_int_mode (0xbb41, HImode));
11596 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11597 gen_lowpart (SImode, fnaddr));
11598 offset += 6;
11600 else
11602 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11603 gen_int_mode (0xbb49, HImode));
11604 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11605 fnaddr);
11606 offset += 10;
11608 /* Load static chain using movabs to r10. */
11609 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11610 gen_int_mode (0xba49, HImode));
11611 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11612 cxt);
11613 offset += 10;
11614 /* Jump to the r11 */
11615 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11616 gen_int_mode (0xff49, HImode));
11617 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11618 gen_int_mode (0xe3, QImode));
11619 offset += 3;
11620 if (offset > TRAMPOLINE_SIZE)
11621 abort ();
11624 #ifdef ENABLE_EXECUTE_STACK
11625 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11626 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11627 #endif
11630 #define def_builtin(MASK, NAME, TYPE, CODE) \
11631 do { \
11632 if ((MASK) & target_flags \
11633 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11634 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11635 NULL, NULL_TREE); \
11636 } while (0)
11638 struct builtin_description
11640 const unsigned int mask;
11641 const enum insn_code icode;
11642 const char *const name;
11643 const enum ix86_builtins code;
11644 const enum rtx_code comparison;
11645 const unsigned int flag;
11648 static const struct builtin_description bdesc_comi[] =
11650 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11651 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11652 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11653 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11654 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11655 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11656 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11657 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11658 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11659 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11660 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11661 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11662 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11663 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11664 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11665 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11666 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11667 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11668 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11669 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11670 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11671 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11672 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11673 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11676 static const struct builtin_description bdesc_2arg[] =
11678 /* SSE */
11679 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11680 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11681 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11682 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11683 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11684 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11685 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11686 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11688 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11689 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11690 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11691 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11692 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11693 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11694 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11695 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11696 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11697 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11698 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11699 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11700 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11701 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11702 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11703 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11704 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11705 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11706 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11707 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11709 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11710 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11711 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11712 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11714 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11715 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11716 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11717 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11719 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11720 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11721 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11722 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11723 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11725 /* MMX */
11726 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11727 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11728 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11729 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
11730 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11731 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11732 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11733 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
11735 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11736 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11737 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11738 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11739 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11740 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11741 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11742 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11744 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11745 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11746 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11748 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11749 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11750 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11751 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11753 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11754 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11756 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11757 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11758 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11759 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11760 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11761 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11763 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11764 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11765 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11766 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11768 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11769 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11770 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11771 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11772 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11773 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11775 /* Special. */
11776 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11777 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11778 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11780 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11781 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11782 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
11784 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11785 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11786 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11787 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11788 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11789 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11791 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11792 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11793 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11794 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11795 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11796 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11798 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11799 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11800 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11801 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11803 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11804 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11806 /* SSE2 */
11807 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11808 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11812 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11813 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11814 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11817 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11818 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11819 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11820 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11821 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11822 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11823 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11824 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11825 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11826 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11827 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11828 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11829 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11830 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11831 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11832 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11833 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11834 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11835 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11837 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11839 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11843 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11845 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11848 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11851 /* SSE2 MMX */
11852 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11855 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11858 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11859 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11861 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11862 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11863 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11864 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11865 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11866 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11867 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11868 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11874 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11875 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11878 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11879 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11881 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11882 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11883 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11884 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11885 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11886 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11888 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11889 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11891 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11894 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11913 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11914 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11915 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11922 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11923 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11924 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11927 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11928 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11929 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11933 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11934 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
11938 /* SSE3 MMX */
11939 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
11940 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
11941 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
11942 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
11943 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
11944 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
11947 static const struct builtin_description bdesc_1arg[] =
11949 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11950 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11952 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11953 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11954 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11956 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11957 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11958 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
11959 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11960 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11961 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
11963 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11964 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11965 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11966 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
11968 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11970 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11973 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11974 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11975 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11976 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11977 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11979 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11981 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11982 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11983 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
11984 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
11986 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11987 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11988 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
11992 /* SSE3 */
11993 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
11994 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
11995 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
11998 void
11999 ix86_init_builtins (void)
12001 if (TARGET_MMX)
12002 ix86_init_mmx_sse_builtins ();
12005 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12006 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12007 builtins. */
12008 static void
12009 ix86_init_mmx_sse_builtins (void)
12011 const struct builtin_description * d;
12012 size_t i;
12014 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12015 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12016 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12017 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12018 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12019 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12020 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12021 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12022 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12023 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12025 tree pchar_type_node = build_pointer_type (char_type_node);
12026 tree pcchar_type_node = build_pointer_type (
12027 build_type_variant (char_type_node, 1, 0));
12028 tree pfloat_type_node = build_pointer_type (float_type_node);
12029 tree pcfloat_type_node = build_pointer_type (
12030 build_type_variant (float_type_node, 1, 0));
12031 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12032 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12033 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12035 /* Comparisons. */
12036 tree int_ftype_v4sf_v4sf
12037 = build_function_type_list (integer_type_node,
12038 V4SF_type_node, V4SF_type_node, NULL_TREE);
12039 tree v4si_ftype_v4sf_v4sf
12040 = build_function_type_list (V4SI_type_node,
12041 V4SF_type_node, V4SF_type_node, NULL_TREE);
12042 /* MMX/SSE/integer conversions. */
12043 tree int_ftype_v4sf
12044 = build_function_type_list (integer_type_node,
12045 V4SF_type_node, NULL_TREE);
12046 tree int64_ftype_v4sf
12047 = build_function_type_list (long_long_integer_type_node,
12048 V4SF_type_node, NULL_TREE);
12049 tree int_ftype_v8qi
12050 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12051 tree v4sf_ftype_v4sf_int
12052 = build_function_type_list (V4SF_type_node,
12053 V4SF_type_node, integer_type_node, NULL_TREE);
12054 tree v4sf_ftype_v4sf_int64
12055 = build_function_type_list (V4SF_type_node,
12056 V4SF_type_node, long_long_integer_type_node,
12057 NULL_TREE);
12058 tree v4sf_ftype_v4sf_v2si
12059 = build_function_type_list (V4SF_type_node,
12060 V4SF_type_node, V2SI_type_node, NULL_TREE);
12061 tree int_ftype_v4hi_int
12062 = build_function_type_list (integer_type_node,
12063 V4HI_type_node, integer_type_node, NULL_TREE);
12064 tree v4hi_ftype_v4hi_int_int
12065 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12066 integer_type_node, integer_type_node,
12067 NULL_TREE);
12068 /* Miscellaneous. */
12069 tree v8qi_ftype_v4hi_v4hi
12070 = build_function_type_list (V8QI_type_node,
12071 V4HI_type_node, V4HI_type_node, NULL_TREE);
12072 tree v4hi_ftype_v2si_v2si
12073 = build_function_type_list (V4HI_type_node,
12074 V2SI_type_node, V2SI_type_node, NULL_TREE);
12075 tree v4sf_ftype_v4sf_v4sf_int
12076 = build_function_type_list (V4SF_type_node,
12077 V4SF_type_node, V4SF_type_node,
12078 integer_type_node, NULL_TREE);
12079 tree v2si_ftype_v4hi_v4hi
12080 = build_function_type_list (V2SI_type_node,
12081 V4HI_type_node, V4HI_type_node, NULL_TREE);
12082 tree v4hi_ftype_v4hi_int
12083 = build_function_type_list (V4HI_type_node,
12084 V4HI_type_node, integer_type_node, NULL_TREE);
12085 tree v4hi_ftype_v4hi_di
12086 = build_function_type_list (V4HI_type_node,
12087 V4HI_type_node, long_long_unsigned_type_node,
12088 NULL_TREE);
12089 tree v2si_ftype_v2si_di
12090 = build_function_type_list (V2SI_type_node,
12091 V2SI_type_node, long_long_unsigned_type_node,
12092 NULL_TREE);
12093 tree void_ftype_void
12094 = build_function_type (void_type_node, void_list_node);
12095 tree void_ftype_unsigned
12096 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12097 tree void_ftype_unsigned_unsigned
12098 = build_function_type_list (void_type_node, unsigned_type_node,
12099 unsigned_type_node, NULL_TREE);
12100 tree void_ftype_pcvoid_unsigned_unsigned
12101 = build_function_type_list (void_type_node, const_ptr_type_node,
12102 unsigned_type_node, unsigned_type_node,
12103 NULL_TREE);
12104 tree unsigned_ftype_void
12105 = build_function_type (unsigned_type_node, void_list_node);
12106 tree di_ftype_void
12107 = build_function_type (long_long_unsigned_type_node, void_list_node);
12108 tree v4sf_ftype_void
12109 = build_function_type (V4SF_type_node, void_list_node);
12110 tree v2si_ftype_v4sf
12111 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12112 /* Loads/stores. */
12113 tree void_ftype_v8qi_v8qi_pchar
12114 = build_function_type_list (void_type_node,
12115 V8QI_type_node, V8QI_type_node,
12116 pchar_type_node, NULL_TREE);
12117 tree v4sf_ftype_pcfloat
12118 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12119 /* @@@ the type is bogus */
12120 tree v4sf_ftype_v4sf_pv2si
12121 = build_function_type_list (V4SF_type_node,
12122 V4SF_type_node, pv2si_type_node, NULL_TREE);
12123 tree void_ftype_pv2si_v4sf
12124 = build_function_type_list (void_type_node,
12125 pv2si_type_node, V4SF_type_node, NULL_TREE);
12126 tree void_ftype_pfloat_v4sf
12127 = build_function_type_list (void_type_node,
12128 pfloat_type_node, V4SF_type_node, NULL_TREE);
12129 tree void_ftype_pdi_di
12130 = build_function_type_list (void_type_node,
12131 pdi_type_node, long_long_unsigned_type_node,
12132 NULL_TREE);
12133 tree void_ftype_pv2di_v2di
12134 = build_function_type_list (void_type_node,
12135 pv2di_type_node, V2DI_type_node, NULL_TREE);
12136 /* Normal vector unops. */
12137 tree v4sf_ftype_v4sf
12138 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12140 /* Normal vector binops. */
12141 tree v4sf_ftype_v4sf_v4sf
12142 = build_function_type_list (V4SF_type_node,
12143 V4SF_type_node, V4SF_type_node, NULL_TREE);
12144 tree v8qi_ftype_v8qi_v8qi
12145 = build_function_type_list (V8QI_type_node,
12146 V8QI_type_node, V8QI_type_node, NULL_TREE);
12147 tree v4hi_ftype_v4hi_v4hi
12148 = build_function_type_list (V4HI_type_node,
12149 V4HI_type_node, V4HI_type_node, NULL_TREE);
12150 tree v2si_ftype_v2si_v2si
12151 = build_function_type_list (V2SI_type_node,
12152 V2SI_type_node, V2SI_type_node, NULL_TREE);
12153 tree di_ftype_di_di
12154 = build_function_type_list (long_long_unsigned_type_node,
12155 long_long_unsigned_type_node,
12156 long_long_unsigned_type_node, NULL_TREE);
12158 tree v2si_ftype_v2sf
12159 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12160 tree v2sf_ftype_v2si
12161 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12162 tree v2si_ftype_v2si
12163 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12164 tree v2sf_ftype_v2sf
12165 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12166 tree v2sf_ftype_v2sf_v2sf
12167 = build_function_type_list (V2SF_type_node,
12168 V2SF_type_node, V2SF_type_node, NULL_TREE);
12169 tree v2si_ftype_v2sf_v2sf
12170 = build_function_type_list (V2SI_type_node,
12171 V2SF_type_node, V2SF_type_node, NULL_TREE);
12172 tree pint_type_node = build_pointer_type (integer_type_node);
12173 tree pcint_type_node = build_pointer_type (
12174 build_type_variant (integer_type_node, 1, 0));
12175 tree pdouble_type_node = build_pointer_type (double_type_node);
12176 tree pcdouble_type_node = build_pointer_type (
12177 build_type_variant (double_type_node, 1, 0));
12178 tree int_ftype_v2df_v2df
12179 = build_function_type_list (integer_type_node,
12180 V2DF_type_node, V2DF_type_node, NULL_TREE);
12182 tree ti_ftype_void
12183 = build_function_type (intTI_type_node, void_list_node);
12184 tree v2di_ftype_void
12185 = build_function_type (V2DI_type_node, void_list_node);
12186 tree ti_ftype_ti_ti
12187 = build_function_type_list (intTI_type_node,
12188 intTI_type_node, intTI_type_node, NULL_TREE);
12189 tree void_ftype_pcvoid
12190 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12191 tree v2di_ftype_di
12192 = build_function_type_list (V2DI_type_node,
12193 long_long_unsigned_type_node, NULL_TREE);
12194 tree di_ftype_v2di
12195 = build_function_type_list (long_long_unsigned_type_node,
12196 V2DI_type_node, NULL_TREE);
12197 tree v4sf_ftype_v4si
12198 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12199 tree v4si_ftype_v4sf
12200 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12201 tree v2df_ftype_v4si
12202 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12203 tree v4si_ftype_v2df
12204 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12205 tree v2si_ftype_v2df
12206 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12207 tree v4sf_ftype_v2df
12208 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12209 tree v2df_ftype_v2si
12210 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12211 tree v2df_ftype_v4sf
12212 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12213 tree int_ftype_v2df
12214 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12215 tree int64_ftype_v2df
12216 = build_function_type_list (long_long_integer_type_node,
12217 V2DF_type_node, NULL_TREE);
12218 tree v2df_ftype_v2df_int
12219 = build_function_type_list (V2DF_type_node,
12220 V2DF_type_node, integer_type_node, NULL_TREE);
12221 tree v2df_ftype_v2df_int64
12222 = build_function_type_list (V2DF_type_node,
12223 V2DF_type_node, long_long_integer_type_node,
12224 NULL_TREE);
12225 tree v4sf_ftype_v4sf_v2df
12226 = build_function_type_list (V4SF_type_node,
12227 V4SF_type_node, V2DF_type_node, NULL_TREE);
12228 tree v2df_ftype_v2df_v4sf
12229 = build_function_type_list (V2DF_type_node,
12230 V2DF_type_node, V4SF_type_node, NULL_TREE);
12231 tree v2df_ftype_v2df_v2df_int
12232 = build_function_type_list (V2DF_type_node,
12233 V2DF_type_node, V2DF_type_node,
12234 integer_type_node,
12235 NULL_TREE);
12236 tree v2df_ftype_v2df_pv2si
12237 = build_function_type_list (V2DF_type_node,
12238 V2DF_type_node, pv2si_type_node, NULL_TREE);
12239 tree void_ftype_pv2si_v2df
12240 = build_function_type_list (void_type_node,
12241 pv2si_type_node, V2DF_type_node, NULL_TREE);
12242 tree void_ftype_pdouble_v2df
12243 = build_function_type_list (void_type_node,
12244 pdouble_type_node, V2DF_type_node, NULL_TREE);
12245 tree void_ftype_pint_int
12246 = build_function_type_list (void_type_node,
12247 pint_type_node, integer_type_node, NULL_TREE);
12248 tree void_ftype_v16qi_v16qi_pchar
12249 = build_function_type_list (void_type_node,
12250 V16QI_type_node, V16QI_type_node,
12251 pchar_type_node, NULL_TREE);
12252 tree v2df_ftype_pcdouble
12253 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12254 tree v2df_ftype_v2df_v2df
12255 = build_function_type_list (V2DF_type_node,
12256 V2DF_type_node, V2DF_type_node, NULL_TREE);
12257 tree v16qi_ftype_v16qi_v16qi
12258 = build_function_type_list (V16QI_type_node,
12259 V16QI_type_node, V16QI_type_node, NULL_TREE);
12260 tree v8hi_ftype_v8hi_v8hi
12261 = build_function_type_list (V8HI_type_node,
12262 V8HI_type_node, V8HI_type_node, NULL_TREE);
12263 tree v4si_ftype_v4si_v4si
12264 = build_function_type_list (V4SI_type_node,
12265 V4SI_type_node, V4SI_type_node, NULL_TREE);
12266 tree v2di_ftype_v2di_v2di
12267 = build_function_type_list (V2DI_type_node,
12268 V2DI_type_node, V2DI_type_node, NULL_TREE);
12269 tree v2di_ftype_v2df_v2df
12270 = build_function_type_list (V2DI_type_node,
12271 V2DF_type_node, V2DF_type_node, NULL_TREE);
12272 tree v2df_ftype_v2df
12273 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12274 tree v2df_ftype_double
12275 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12276 tree v2df_ftype_double_double
12277 = build_function_type_list (V2DF_type_node,
12278 double_type_node, double_type_node, NULL_TREE);
12279 tree int_ftype_v8hi_int
12280 = build_function_type_list (integer_type_node,
12281 V8HI_type_node, integer_type_node, NULL_TREE);
12282 tree v8hi_ftype_v8hi_int_int
12283 = build_function_type_list (V8HI_type_node,
12284 V8HI_type_node, integer_type_node,
12285 integer_type_node, NULL_TREE);
12286 tree v2di_ftype_v2di_int
12287 = build_function_type_list (V2DI_type_node,
12288 V2DI_type_node, integer_type_node, NULL_TREE);
12289 tree v4si_ftype_v4si_int
12290 = build_function_type_list (V4SI_type_node,
12291 V4SI_type_node, integer_type_node, NULL_TREE);
12292 tree v8hi_ftype_v8hi_int
12293 = build_function_type_list (V8HI_type_node,
12294 V8HI_type_node, integer_type_node, NULL_TREE);
12295 tree v8hi_ftype_v8hi_v2di
12296 = build_function_type_list (V8HI_type_node,
12297 V8HI_type_node, V2DI_type_node, NULL_TREE);
12298 tree v4si_ftype_v4si_v2di
12299 = build_function_type_list (V4SI_type_node,
12300 V4SI_type_node, V2DI_type_node, NULL_TREE);
12301 tree v4si_ftype_v8hi_v8hi
12302 = build_function_type_list (V4SI_type_node,
12303 V8HI_type_node, V8HI_type_node, NULL_TREE);
12304 tree di_ftype_v8qi_v8qi
12305 = build_function_type_list (long_long_unsigned_type_node,
12306 V8QI_type_node, V8QI_type_node, NULL_TREE);
12307 tree di_ftype_v2si_v2si
12308 = build_function_type_list (long_long_unsigned_type_node,
12309 V2SI_type_node, V2SI_type_node, NULL_TREE);
12310 tree v2di_ftype_v16qi_v16qi
12311 = build_function_type_list (V2DI_type_node,
12312 V16QI_type_node, V16QI_type_node, NULL_TREE);
12313 tree v2di_ftype_v4si_v4si
12314 = build_function_type_list (V2DI_type_node,
12315 V4SI_type_node, V4SI_type_node, NULL_TREE);
12316 tree int_ftype_v16qi
12317 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12318 tree v16qi_ftype_pcchar
12319 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12320 tree void_ftype_pchar_v16qi
12321 = build_function_type_list (void_type_node,
12322 pchar_type_node, V16QI_type_node, NULL_TREE);
12323 tree v4si_ftype_pcint
12324 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12325 tree void_ftype_pcint_v4si
12326 = build_function_type_list (void_type_node,
12327 pcint_type_node, V4SI_type_node, NULL_TREE);
12328 tree v2di_ftype_v2di
12329 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12331 tree float80_type;
12332 tree float128_type;
12334 /* The __float80 type. */
12335 if (TYPE_MODE (long_double_type_node) == XFmode)
12336 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12337 "__float80");
12338 else
12340 /* The __float80 type. */
12341 float80_type = make_node (REAL_TYPE);
12342 TYPE_PRECISION (float80_type) = 80;
12343 layout_type (float80_type);
12344 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12347 float128_type = make_node (REAL_TYPE);
12348 TYPE_PRECISION (float128_type) = 128;
12349 layout_type (float128_type);
12350 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12352 /* Add all builtins that are more or less simple operations on two
12353 operands. */
12354 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12356 /* Use one of the operands; the target can have a different mode for
12357 mask-generating compares. */
12358 enum machine_mode mode;
12359 tree type;
12361 if (d->name == 0)
12362 continue;
12363 mode = insn_data[d->icode].operand[1].mode;
12365 switch (mode)
12367 case V16QImode:
12368 type = v16qi_ftype_v16qi_v16qi;
12369 break;
12370 case V8HImode:
12371 type = v8hi_ftype_v8hi_v8hi;
12372 break;
12373 case V4SImode:
12374 type = v4si_ftype_v4si_v4si;
12375 break;
12376 case V2DImode:
12377 type = v2di_ftype_v2di_v2di;
12378 break;
12379 case V2DFmode:
12380 type = v2df_ftype_v2df_v2df;
12381 break;
12382 case TImode:
12383 type = ti_ftype_ti_ti;
12384 break;
12385 case V4SFmode:
12386 type = v4sf_ftype_v4sf_v4sf;
12387 break;
12388 case V8QImode:
12389 type = v8qi_ftype_v8qi_v8qi;
12390 break;
12391 case V4HImode:
12392 type = v4hi_ftype_v4hi_v4hi;
12393 break;
12394 case V2SImode:
12395 type = v2si_ftype_v2si_v2si;
12396 break;
12397 case DImode:
12398 type = di_ftype_di_di;
12399 break;
12401 default:
12402 abort ();
12405 /* Override for comparisons. */
12406 if (d->icode == CODE_FOR_maskcmpv4sf3
12407 || d->icode == CODE_FOR_maskncmpv4sf3
12408 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12409 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12410 type = v4si_ftype_v4sf_v4sf;
12412 if (d->icode == CODE_FOR_maskcmpv2df3
12413 || d->icode == CODE_FOR_maskncmpv2df3
12414 || d->icode == CODE_FOR_vmmaskcmpv2df3
12415 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12416 type = v2di_ftype_v2df_v2df;
12418 def_builtin (d->mask, d->name, type, d->code);
12421 /* Add the remaining MMX insns with somewhat more complicated types. */
12422 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12423 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12424 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12425 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12426 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12428 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12429 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12430 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12432 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12433 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12435 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12436 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12438 /* comi/ucomi insns. */
12439 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12440 if (d->mask == MASK_SSE2)
12441 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12442 else
12443 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12445 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12446 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12447 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12449 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12450 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12451 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12452 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12453 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12454 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12455 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12456 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12457 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12458 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12459 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12461 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12462 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12464 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12466 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12467 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12468 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12469 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12470 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12471 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12473 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12474 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12475 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12476 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12478 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12479 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12480 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12481 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12483 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12485 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12487 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12488 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12489 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12490 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12491 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12492 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12494 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12496 /* Original 3DNow! */
12497 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12498 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12499 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12515 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12516 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12518 /* 3DNow! extension as used in the Athlon CPU. */
12519 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12520 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12521 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12522 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12523 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12524 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12526 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12528 /* SSE2 */
12529 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12532 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12533 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12534 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12537 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12538 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12539 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12540 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12541 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12543 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12544 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12546 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12548 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12549 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12550 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12551 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12552 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12554 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12555 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12556 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12557 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12560 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12562 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12567 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12570 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12571 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12575 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12576 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12577 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12578 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12580 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12581 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12582 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12584 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12585 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12586 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12587 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12589 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12590 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12591 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12592 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12593 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12594 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12595 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12597 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12598 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12599 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12601 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12602 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12603 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12604 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12605 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12606 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12607 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12609 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12611 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12612 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12614 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12615 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12616 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12618 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12619 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12620 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12622 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12623 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12625 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12626 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12627 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12628 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12630 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12631 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12632 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12633 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12635 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12636 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12638 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12640 /* Prescott New Instructions. */
12641 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12642 void_ftype_pcvoid_unsigned_unsigned,
12643 IX86_BUILTIN_MONITOR);
12644 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12645 void_ftype_unsigned_unsigned,
12646 IX86_BUILTIN_MWAIT);
12647 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12648 v4sf_ftype_v4sf,
12649 IX86_BUILTIN_MOVSHDUP);
12650 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12651 v4sf_ftype_v4sf,
12652 IX86_BUILTIN_MOVSLDUP);
12653 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12654 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12655 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12656 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12657 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12658 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12661 /* Errors in the source file can cause expand_expr to return const0_rtx
12662 where we expect a vector. To avoid crashing, use one of the vector
12663 clear instructions. */
12664 static rtx
12665 safe_vector_operand (rtx x, enum machine_mode mode)
12667 if (x != const0_rtx)
12668 return x;
12669 x = gen_reg_rtx (mode);
12671 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12672 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12673 : gen_rtx_SUBREG (DImode, x, 0)));
12674 else
12675 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12676 : gen_rtx_SUBREG (V4SFmode, x, 0),
12677 CONST0_RTX (V4SFmode)));
12678 return x;
12681 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12683 static rtx
12684 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
12686 rtx pat;
12687 tree arg0 = TREE_VALUE (arglist);
12688 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12689 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12690 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12691 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12692 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12693 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12695 if (VECTOR_MODE_P (mode0))
12696 op0 = safe_vector_operand (op0, mode0);
12697 if (VECTOR_MODE_P (mode1))
12698 op1 = safe_vector_operand (op1, mode1);
12700 if (! target
12701 || GET_MODE (target) != tmode
12702 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12703 target = gen_reg_rtx (tmode);
12705 if (GET_MODE (op1) == SImode && mode1 == TImode)
12707 rtx x = gen_reg_rtx (V4SImode);
12708 emit_insn (gen_sse2_loadd (x, op1));
12709 op1 = gen_lowpart (TImode, x);
12712 /* In case the insn wants input operands in modes different from
12713 the result, abort. */
12714 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12715 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
12716 abort ();
12718 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12719 op0 = copy_to_mode_reg (mode0, op0);
12720 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12721 op1 = copy_to_mode_reg (mode1, op1);
12723 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12724 yet one of the two must not be a memory. This is normally enforced
12725 by expanders, but we didn't bother to create one here. */
12726 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12727 op0 = copy_to_mode_reg (mode0, op0);
12729 pat = GEN_FCN (icode) (target, op0, op1);
12730 if (! pat)
12731 return 0;
12732 emit_insn (pat);
12733 return target;
12736 /* Subroutine of ix86_expand_builtin to take care of stores. */
12738 static rtx
12739 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
12741 rtx pat;
12742 tree arg0 = TREE_VALUE (arglist);
12743 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12744 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12745 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12746 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12747 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12749 if (VECTOR_MODE_P (mode1))
12750 op1 = safe_vector_operand (op1, mode1);
12752 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12753 op1 = copy_to_mode_reg (mode1, op1);
12755 pat = GEN_FCN (icode) (op0, op1);
12756 if (pat)
12757 emit_insn (pat);
12758 return 0;
12761 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12763 static rtx
12764 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
12765 rtx target, int do_load)
12767 rtx pat;
12768 tree arg0 = TREE_VALUE (arglist);
12769 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12770 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12771 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12773 if (! target
12774 || GET_MODE (target) != tmode
12775 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12776 target = gen_reg_rtx (tmode);
12777 if (do_load)
12778 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12779 else
12781 if (VECTOR_MODE_P (mode0))
12782 op0 = safe_vector_operand (op0, mode0);
12784 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12785 op0 = copy_to_mode_reg (mode0, op0);
12788 pat = GEN_FCN (icode) (target, op0);
12789 if (! pat)
12790 return 0;
12791 emit_insn (pat);
12792 return target;
12795 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12796 sqrtss, rsqrtss, rcpss. */
12798 static rtx
12799 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
12801 rtx pat;
12802 tree arg0 = TREE_VALUE (arglist);
12803 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12804 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12805 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12807 if (! target
12808 || GET_MODE (target) != tmode
12809 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12810 target = gen_reg_rtx (tmode);
12812 if (VECTOR_MODE_P (mode0))
12813 op0 = safe_vector_operand (op0, mode0);
12815 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12816 op0 = copy_to_mode_reg (mode0, op0);
12818 op1 = op0;
12819 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12820 op1 = copy_to_mode_reg (mode0, op1);
12822 pat = GEN_FCN (icode) (target, op0, op1);
12823 if (! pat)
12824 return 0;
12825 emit_insn (pat);
12826 return target;
12829 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12831 static rtx
12832 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
12833 rtx target)
12835 rtx pat;
12836 tree arg0 = TREE_VALUE (arglist);
12837 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12838 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12839 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12840 rtx op2;
12841 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12842 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12843 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12844 enum rtx_code comparison = d->comparison;
12846 if (VECTOR_MODE_P (mode0))
12847 op0 = safe_vector_operand (op0, mode0);
12848 if (VECTOR_MODE_P (mode1))
12849 op1 = safe_vector_operand (op1, mode1);
12851 /* Swap operands if we have a comparison that isn't available in
12852 hardware. */
12853 if (d->flag)
12855 rtx tmp = gen_reg_rtx (mode1);
12856 emit_move_insn (tmp, op1);
12857 op1 = op0;
12858 op0 = tmp;
12861 if (! target
12862 || GET_MODE (target) != tmode
12863 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12864 target = gen_reg_rtx (tmode);
12866 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12867 op0 = copy_to_mode_reg (mode0, op0);
12868 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12869 op1 = copy_to_mode_reg (mode1, op1);
12871 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12872 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12873 if (! pat)
12874 return 0;
12875 emit_insn (pat);
12876 return target;
12879 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12881 static rtx
12882 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
12883 rtx target)
12885 rtx pat;
12886 tree arg0 = TREE_VALUE (arglist);
12887 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12888 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12889 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12890 rtx op2;
12891 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12892 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12893 enum rtx_code comparison = d->comparison;
12895 if (VECTOR_MODE_P (mode0))
12896 op0 = safe_vector_operand (op0, mode0);
12897 if (VECTOR_MODE_P (mode1))
12898 op1 = safe_vector_operand (op1, mode1);
12900 /* Swap operands if we have a comparison that isn't available in
12901 hardware. */
12902 if (d->flag)
12904 rtx tmp = op1;
12905 op1 = op0;
12906 op0 = tmp;
12909 target = gen_reg_rtx (SImode);
12910 emit_move_insn (target, const0_rtx);
12911 target = gen_rtx_SUBREG (QImode, target, 0);
12913 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12914 op0 = copy_to_mode_reg (mode0, op0);
12915 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12916 op1 = copy_to_mode_reg (mode1, op1);
12918 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12919 pat = GEN_FCN (d->icode) (op0, op1);
12920 if (! pat)
12921 return 0;
12922 emit_insn (pat);
12923 emit_insn (gen_rtx_SET (VOIDmode,
12924 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12925 gen_rtx_fmt_ee (comparison, QImode,
12926 SET_DEST (pat),
12927 const0_rtx)));
12929 return SUBREG_REG (target);
12932 /* Expand an expression EXP that calls a built-in function,
12933 with result going to TARGET if that's convenient
12934 (and in mode MODE if that's convenient).
12935 SUBTARGET may be used as the target for computing one of EXP's operands.
12936 IGNORE is nonzero if the value is to be ignored. */
12939 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12940 enum machine_mode mode ATTRIBUTE_UNUSED,
12941 int ignore ATTRIBUTE_UNUSED)
12943 const struct builtin_description *d;
12944 size_t i;
12945 enum insn_code icode;
12946 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12947 tree arglist = TREE_OPERAND (exp, 1);
12948 tree arg0, arg1, arg2;
12949 rtx op0, op1, op2, pat;
12950 enum machine_mode tmode, mode0, mode1, mode2;
12951 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12953 switch (fcode)
12955 case IX86_BUILTIN_EMMS:
12956 emit_insn (gen_emms ());
12957 return 0;
12959 case IX86_BUILTIN_SFENCE:
12960 emit_insn (gen_sfence ());
12961 return 0;
12963 case IX86_BUILTIN_PEXTRW:
12964 case IX86_BUILTIN_PEXTRW128:
12965 icode = (fcode == IX86_BUILTIN_PEXTRW
12966 ? CODE_FOR_mmx_pextrw
12967 : CODE_FOR_sse2_pextrw);
12968 arg0 = TREE_VALUE (arglist);
12969 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12970 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12971 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12972 tmode = insn_data[icode].operand[0].mode;
12973 mode0 = insn_data[icode].operand[1].mode;
12974 mode1 = insn_data[icode].operand[2].mode;
12976 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12977 op0 = copy_to_mode_reg (mode0, op0);
12978 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12980 error ("selector must be an integer constant in the range 0..%i",
12981 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
12982 return gen_reg_rtx (tmode);
12984 if (target == 0
12985 || GET_MODE (target) != tmode
12986 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12987 target = gen_reg_rtx (tmode);
12988 pat = GEN_FCN (icode) (target, op0, op1);
12989 if (! pat)
12990 return 0;
12991 emit_insn (pat);
12992 return target;
12994 case IX86_BUILTIN_PINSRW:
12995 case IX86_BUILTIN_PINSRW128:
12996 icode = (fcode == IX86_BUILTIN_PINSRW
12997 ? CODE_FOR_mmx_pinsrw
12998 : CODE_FOR_sse2_pinsrw);
12999 arg0 = TREE_VALUE (arglist);
13000 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13001 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13002 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13003 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13004 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13005 tmode = insn_data[icode].operand[0].mode;
13006 mode0 = insn_data[icode].operand[1].mode;
13007 mode1 = insn_data[icode].operand[2].mode;
13008 mode2 = insn_data[icode].operand[3].mode;
13010 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13011 op0 = copy_to_mode_reg (mode0, op0);
13012 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13013 op1 = copy_to_mode_reg (mode1, op1);
13014 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13016 error ("selector must be an integer constant in the range 0..%i",
13017 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13018 return const0_rtx;
13020 if (target == 0
13021 || GET_MODE (target) != tmode
13022 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13023 target = gen_reg_rtx (tmode);
13024 pat = GEN_FCN (icode) (target, op0, op1, op2);
13025 if (! pat)
13026 return 0;
13027 emit_insn (pat);
13028 return target;
13030 case IX86_BUILTIN_MASKMOVQ:
13031 case IX86_BUILTIN_MASKMOVDQU:
13032 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13033 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13034 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13035 : CODE_FOR_sse2_maskmovdqu));
13036 /* Note the arg order is different from the operand order. */
13037 arg1 = TREE_VALUE (arglist);
13038 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13039 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13042 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13043 mode0 = insn_data[icode].operand[0].mode;
13044 mode1 = insn_data[icode].operand[1].mode;
13045 mode2 = insn_data[icode].operand[2].mode;
13047 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13048 op0 = copy_to_mode_reg (mode0, op0);
13049 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13050 op1 = copy_to_mode_reg (mode1, op1);
13051 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13052 op2 = copy_to_mode_reg (mode2, op2);
13053 pat = GEN_FCN (icode) (op0, op1, op2);
13054 if (! pat)
13055 return 0;
13056 emit_insn (pat);
13057 return 0;
13059 case IX86_BUILTIN_SQRTSS:
13060 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13061 case IX86_BUILTIN_RSQRTSS:
13062 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13063 case IX86_BUILTIN_RCPSS:
13064 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13066 case IX86_BUILTIN_LOADAPS:
13067 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13069 case IX86_BUILTIN_LOADUPS:
13070 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13072 case IX86_BUILTIN_STOREAPS:
13073 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13075 case IX86_BUILTIN_STOREUPS:
13076 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13078 case IX86_BUILTIN_LOADSS:
13079 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13081 case IX86_BUILTIN_STORESS:
13082 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13084 case IX86_BUILTIN_LOADHPS:
13085 case IX86_BUILTIN_LOADLPS:
13086 case IX86_BUILTIN_LOADHPD:
13087 case IX86_BUILTIN_LOADLPD:
13088 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13089 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13090 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13091 : CODE_FOR_sse2_movsd);
13092 arg0 = TREE_VALUE (arglist);
13093 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13094 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13095 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13096 tmode = insn_data[icode].operand[0].mode;
13097 mode0 = insn_data[icode].operand[1].mode;
13098 mode1 = insn_data[icode].operand[2].mode;
13100 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13101 op0 = copy_to_mode_reg (mode0, op0);
13102 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13103 if (target == 0
13104 || GET_MODE (target) != tmode
13105 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13106 target = gen_reg_rtx (tmode);
13107 pat = GEN_FCN (icode) (target, op0, op1);
13108 if (! pat)
13109 return 0;
13110 emit_insn (pat);
13111 return target;
13113 case IX86_BUILTIN_STOREHPS:
13114 case IX86_BUILTIN_STORELPS:
13115 case IX86_BUILTIN_STOREHPD:
13116 case IX86_BUILTIN_STORELPD:
13117 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13118 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13119 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13120 : CODE_FOR_sse2_movsd);
13121 arg0 = TREE_VALUE (arglist);
13122 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13123 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13124 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13125 mode0 = insn_data[icode].operand[1].mode;
13126 mode1 = insn_data[icode].operand[2].mode;
13128 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13129 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13130 op1 = copy_to_mode_reg (mode1, op1);
13132 pat = GEN_FCN (icode) (op0, op0, op1);
13133 if (! pat)
13134 return 0;
13135 emit_insn (pat);
13136 return 0;
13138 case IX86_BUILTIN_MOVNTPS:
13139 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13140 case IX86_BUILTIN_MOVNTQ:
13141 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13143 case IX86_BUILTIN_LDMXCSR:
13144 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13145 target = assign_386_stack_local (SImode, 0);
13146 emit_move_insn (target, op0);
13147 emit_insn (gen_ldmxcsr (target));
13148 return 0;
13150 case IX86_BUILTIN_STMXCSR:
13151 target = assign_386_stack_local (SImode, 0);
13152 emit_insn (gen_stmxcsr (target));
13153 return copy_to_mode_reg (SImode, target);
13155 case IX86_BUILTIN_SHUFPS:
13156 case IX86_BUILTIN_SHUFPD:
13157 icode = (fcode == IX86_BUILTIN_SHUFPS
13158 ? CODE_FOR_sse_shufps
13159 : CODE_FOR_sse2_shufpd);
13160 arg0 = TREE_VALUE (arglist);
13161 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13162 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13163 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13164 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13165 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13166 tmode = insn_data[icode].operand[0].mode;
13167 mode0 = insn_data[icode].operand[1].mode;
13168 mode1 = insn_data[icode].operand[2].mode;
13169 mode2 = insn_data[icode].operand[3].mode;
13171 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13172 op0 = copy_to_mode_reg (mode0, op0);
13173 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13174 op1 = copy_to_mode_reg (mode1, op1);
13175 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13177 /* @@@ better error message */
13178 error ("mask must be an immediate");
13179 return gen_reg_rtx (tmode);
13181 if (target == 0
13182 || GET_MODE (target) != tmode
13183 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13184 target = gen_reg_rtx (tmode);
13185 pat = GEN_FCN (icode) (target, op0, op1, op2);
13186 if (! pat)
13187 return 0;
13188 emit_insn (pat);
13189 return target;
13191 case IX86_BUILTIN_PSHUFW:
13192 case IX86_BUILTIN_PSHUFD:
13193 case IX86_BUILTIN_PSHUFHW:
13194 case IX86_BUILTIN_PSHUFLW:
13195 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13196 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13197 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13198 : CODE_FOR_mmx_pshufw);
13199 arg0 = TREE_VALUE (arglist);
13200 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13201 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13202 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13203 tmode = insn_data[icode].operand[0].mode;
13204 mode1 = insn_data[icode].operand[1].mode;
13205 mode2 = insn_data[icode].operand[2].mode;
13207 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13208 op0 = copy_to_mode_reg (mode1, op0);
13209 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13211 /* @@@ better error message */
13212 error ("mask must be an immediate");
13213 return const0_rtx;
13215 if (target == 0
13216 || GET_MODE (target) != tmode
13217 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13218 target = gen_reg_rtx (tmode);
13219 pat = GEN_FCN (icode) (target, op0, op1);
13220 if (! pat)
13221 return 0;
13222 emit_insn (pat);
13223 return target;
13225 case IX86_BUILTIN_PSLLDQI128:
13226 case IX86_BUILTIN_PSRLDQI128:
13227 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13228 : CODE_FOR_sse2_lshrti3);
13229 arg0 = TREE_VALUE (arglist);
13230 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13231 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13232 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13233 tmode = insn_data[icode].operand[0].mode;
13234 mode1 = insn_data[icode].operand[1].mode;
13235 mode2 = insn_data[icode].operand[2].mode;
13237 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13239 op0 = copy_to_reg (op0);
13240 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13242 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13244 error ("shift must be an immediate");
13245 return const0_rtx;
13247 target = gen_reg_rtx (V2DImode);
13248 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13249 if (! pat)
13250 return 0;
13251 emit_insn (pat);
13252 return target;
13254 case IX86_BUILTIN_FEMMS:
13255 emit_insn (gen_femms ());
13256 return NULL_RTX;
13258 case IX86_BUILTIN_PAVGUSB:
13259 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13261 case IX86_BUILTIN_PF2ID:
13262 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13264 case IX86_BUILTIN_PFACC:
13265 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13267 case IX86_BUILTIN_PFADD:
13268 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13270 case IX86_BUILTIN_PFCMPEQ:
13271 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13273 case IX86_BUILTIN_PFCMPGE:
13274 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13276 case IX86_BUILTIN_PFCMPGT:
13277 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13279 case IX86_BUILTIN_PFMAX:
13280 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13282 case IX86_BUILTIN_PFMIN:
13283 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13285 case IX86_BUILTIN_PFMUL:
13286 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13288 case IX86_BUILTIN_PFRCP:
13289 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13291 case IX86_BUILTIN_PFRCPIT1:
13292 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13294 case IX86_BUILTIN_PFRCPIT2:
13295 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13297 case IX86_BUILTIN_PFRSQIT1:
13298 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13300 case IX86_BUILTIN_PFRSQRT:
13301 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13303 case IX86_BUILTIN_PFSUB:
13304 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13306 case IX86_BUILTIN_PFSUBR:
13307 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13309 case IX86_BUILTIN_PI2FD:
13310 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13312 case IX86_BUILTIN_PMULHRW:
13313 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13315 case IX86_BUILTIN_PF2IW:
13316 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13318 case IX86_BUILTIN_PFNACC:
13319 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13321 case IX86_BUILTIN_PFPNACC:
13322 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13324 case IX86_BUILTIN_PI2FW:
13325 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13327 case IX86_BUILTIN_PSWAPDSI:
13328 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13330 case IX86_BUILTIN_PSWAPDSF:
13331 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13333 case IX86_BUILTIN_SSE_ZERO:
13334 target = gen_reg_rtx (V4SFmode);
13335 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13336 return target;
13338 case IX86_BUILTIN_MMX_ZERO:
13339 target = gen_reg_rtx (DImode);
13340 emit_insn (gen_mmx_clrdi (target));
13341 return target;
13343 case IX86_BUILTIN_CLRTI:
13344 target = gen_reg_rtx (V2DImode);
13345 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13346 return target;
13349 case IX86_BUILTIN_SQRTSD:
13350 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13351 case IX86_BUILTIN_LOADAPD:
13352 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13353 case IX86_BUILTIN_LOADUPD:
13354 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13356 case IX86_BUILTIN_STOREAPD:
13357 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13358 case IX86_BUILTIN_STOREUPD:
13359 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13361 case IX86_BUILTIN_LOADSD:
13362 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13364 case IX86_BUILTIN_STORESD:
13365 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13367 case IX86_BUILTIN_SETPD1:
13368 target = assign_386_stack_local (DFmode, 0);
13369 arg0 = TREE_VALUE (arglist);
13370 emit_move_insn (adjust_address (target, DFmode, 0),
13371 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13372 op0 = gen_reg_rtx (V2DFmode);
13373 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13374 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13375 return op0;
13377 case IX86_BUILTIN_SETPD:
13378 target = assign_386_stack_local (V2DFmode, 0);
13379 arg0 = TREE_VALUE (arglist);
13380 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13381 emit_move_insn (adjust_address (target, DFmode, 0),
13382 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13383 emit_move_insn (adjust_address (target, DFmode, 8),
13384 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13385 op0 = gen_reg_rtx (V2DFmode);
13386 emit_insn (gen_sse2_movapd (op0, target));
13387 return op0;
13389 case IX86_BUILTIN_LOADRPD:
13390 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13391 gen_reg_rtx (V2DFmode), 1);
13392 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13393 return target;
13395 case IX86_BUILTIN_LOADPD1:
13396 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13397 gen_reg_rtx (V2DFmode), 1);
13398 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13399 return target;
13401 case IX86_BUILTIN_STOREPD1:
13402 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13403 case IX86_BUILTIN_STORERPD:
13404 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13406 case IX86_BUILTIN_CLRPD:
13407 target = gen_reg_rtx (V2DFmode);
13408 emit_insn (gen_sse_clrv2df (target));
13409 return target;
13411 case IX86_BUILTIN_MFENCE:
13412 emit_insn (gen_sse2_mfence ());
13413 return 0;
13414 case IX86_BUILTIN_LFENCE:
13415 emit_insn (gen_sse2_lfence ());
13416 return 0;
13418 case IX86_BUILTIN_CLFLUSH:
13419 arg0 = TREE_VALUE (arglist);
13420 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13421 icode = CODE_FOR_sse2_clflush;
13422 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13423 op0 = copy_to_mode_reg (Pmode, op0);
13425 emit_insn (gen_sse2_clflush (op0));
13426 return 0;
13428 case IX86_BUILTIN_MOVNTPD:
13429 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13430 case IX86_BUILTIN_MOVNTDQ:
13431 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13432 case IX86_BUILTIN_MOVNTI:
13433 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13435 case IX86_BUILTIN_LOADDQA:
13436 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13437 case IX86_BUILTIN_LOADDQU:
13438 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13439 case IX86_BUILTIN_LOADD:
13440 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13442 case IX86_BUILTIN_STOREDQA:
13443 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13444 case IX86_BUILTIN_STOREDQU:
13445 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13446 case IX86_BUILTIN_STORED:
13447 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13449 case IX86_BUILTIN_MONITOR:
13450 arg0 = TREE_VALUE (arglist);
13451 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13452 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13453 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13454 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13455 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13456 if (!REG_P (op0))
13457 op0 = copy_to_mode_reg (SImode, op0);
13458 if (!REG_P (op1))
13459 op1 = copy_to_mode_reg (SImode, op1);
13460 if (!REG_P (op2))
13461 op2 = copy_to_mode_reg (SImode, op2);
13462 emit_insn (gen_monitor (op0, op1, op2));
13463 return 0;
13465 case IX86_BUILTIN_MWAIT:
13466 arg0 = TREE_VALUE (arglist);
13467 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13468 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13469 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13470 if (!REG_P (op0))
13471 op0 = copy_to_mode_reg (SImode, op0);
13472 if (!REG_P (op1))
13473 op1 = copy_to_mode_reg (SImode, op1);
13474 emit_insn (gen_mwait (op0, op1));
13475 return 0;
13477 case IX86_BUILTIN_LOADDDUP:
13478 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13480 case IX86_BUILTIN_LDDQU:
13481 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13484 default:
13485 break;
13488 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13489 if (d->code == fcode)
13491 /* Compares are treated specially. */
13492 if (d->icode == CODE_FOR_maskcmpv4sf3
13493 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13494 || d->icode == CODE_FOR_maskncmpv4sf3
13495 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13496 || d->icode == CODE_FOR_maskcmpv2df3
13497 || d->icode == CODE_FOR_vmmaskcmpv2df3
13498 || d->icode == CODE_FOR_maskncmpv2df3
13499 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13500 return ix86_expand_sse_compare (d, arglist, target);
13502 return ix86_expand_binop_builtin (d->icode, arglist, target);
13505 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13506 if (d->code == fcode)
13507 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13509 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13510 if (d->code == fcode)
13511 return ix86_expand_sse_comi (d, arglist, target);
13513 /* @@@ Should really do something sensible here. */
13514 return 0;
13517 /* Store OPERAND to the memory after reload is completed. This means
13518 that we can't easily use assign_stack_local. */
13520 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13522 rtx result;
13523 if (!reload_completed)
13524 abort ();
13525 if (TARGET_RED_ZONE)
13527 result = gen_rtx_MEM (mode,
13528 gen_rtx_PLUS (Pmode,
13529 stack_pointer_rtx,
13530 GEN_INT (-RED_ZONE_SIZE)));
13531 emit_move_insn (result, operand);
13533 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13535 switch (mode)
13537 case HImode:
13538 case SImode:
13539 operand = gen_lowpart (DImode, operand);
13540 /* FALLTHRU */
13541 case DImode:
13542 emit_insn (
13543 gen_rtx_SET (VOIDmode,
13544 gen_rtx_MEM (DImode,
13545 gen_rtx_PRE_DEC (DImode,
13546 stack_pointer_rtx)),
13547 operand));
13548 break;
13549 default:
13550 abort ();
13552 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13554 else
13556 switch (mode)
13558 case DImode:
13560 rtx operands[2];
13561 split_di (&operand, 1, operands, operands + 1);
13562 emit_insn (
13563 gen_rtx_SET (VOIDmode,
13564 gen_rtx_MEM (SImode,
13565 gen_rtx_PRE_DEC (Pmode,
13566 stack_pointer_rtx)),
13567 operands[1]));
13568 emit_insn (
13569 gen_rtx_SET (VOIDmode,
13570 gen_rtx_MEM (SImode,
13571 gen_rtx_PRE_DEC (Pmode,
13572 stack_pointer_rtx)),
13573 operands[0]));
13575 break;
13576 case HImode:
13577 /* It is better to store HImodes as SImodes. */
13578 if (!TARGET_PARTIAL_REG_STALL)
13579 operand = gen_lowpart (SImode, operand);
13580 /* FALLTHRU */
13581 case SImode:
13582 emit_insn (
13583 gen_rtx_SET (VOIDmode,
13584 gen_rtx_MEM (GET_MODE (operand),
13585 gen_rtx_PRE_DEC (SImode,
13586 stack_pointer_rtx)),
13587 operand));
13588 break;
13589 default:
13590 abort ();
13592 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13594 return result;
13597 /* Free operand from the memory. */
13598 void
13599 ix86_free_from_memory (enum machine_mode mode)
13601 if (!TARGET_RED_ZONE)
13603 int size;
13605 if (mode == DImode || TARGET_64BIT)
13606 size = 8;
13607 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13608 size = 2;
13609 else
13610 size = 4;
13611 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13612 to pop or add instruction if registers are available. */
13613 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13614 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13615 GEN_INT (size))));
13619 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13620 QImode must go into class Q_REGS.
13621 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13622 movdf to do mem-to-mem moves through integer regs. */
13623 enum reg_class
13624 ix86_preferred_reload_class (rtx x, enum reg_class class)
13626 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13627 return NO_REGS;
13628 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13630 /* SSE can't load any constant directly yet. */
13631 if (SSE_CLASS_P (class))
13632 return NO_REGS;
13633 /* Floats can load 0 and 1. */
13634 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13636 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13637 if (MAYBE_SSE_CLASS_P (class))
13638 return (reg_class_subset_p (class, GENERAL_REGS)
13639 ? GENERAL_REGS : FLOAT_REGS);
13640 else
13641 return class;
13643 /* General regs can load everything. */
13644 if (reg_class_subset_p (class, GENERAL_REGS))
13645 return GENERAL_REGS;
13646 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13647 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13648 return NO_REGS;
13650 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13651 return NO_REGS;
13652 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13653 return Q_REGS;
13654 return class;
13657 /* If we are copying between general and FP registers, we need a memory
13658 location. The same is true for SSE and MMX registers.
13660 The macro can't work reliably when one of the CLASSES is class containing
13661 registers from multiple units (SSE, MMX, integer). We avoid this by never
13662 combining those units in single alternative in the machine description.
13663 Ensure that this constraint holds to avoid unexpected surprises.
13665 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13666 enforce these sanity checks. */
13668 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13669 enum machine_mode mode, int strict)
13671 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13672 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13673 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13674 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13675 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13676 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13678 if (strict)
13679 abort ();
13680 else
13681 return 1;
13683 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13684 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13685 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13686 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13687 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
13689 /* Return the cost of moving data from a register in class CLASS1 to
13690 one in class CLASS2.
13692 It is not required that the cost always equal 2 when FROM is the same as TO;
13693 on some machines it is expensive to move between registers if they are not
13694 general registers. */
13696 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13697 enum reg_class class2)
13699 /* In case we require secondary memory, compute cost of the store followed
13700 by load. In order to avoid bad register allocation choices, we need
13701 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13703 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13705 int cost = 1;
13707 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13708 MEMORY_MOVE_COST (mode, class1, 1));
13709 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13710 MEMORY_MOVE_COST (mode, class2, 1));
13712 /* In case of copying from general_purpose_register we may emit multiple
13713 stores followed by single load causing memory size mismatch stall.
13714 Count this as arbitrarily high cost of 20. */
13715 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13716 cost += 20;
13718 /* In the case of FP/MMX moves, the registers actually overlap, and we
13719 have to switch modes in order to treat them differently. */
13720 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13721 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13722 cost += 20;
13724 return cost;
13727 /* Moves between SSE/MMX and integer unit are expensive. */
13728 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13729 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13730 return ix86_cost->mmxsse_to_integer;
13731 if (MAYBE_FLOAT_CLASS_P (class1))
13732 return ix86_cost->fp_move;
13733 if (MAYBE_SSE_CLASS_P (class1))
13734 return ix86_cost->sse_move;
13735 if (MAYBE_MMX_CLASS_P (class1))
13736 return ix86_cost->mmx_move;
13737 return 2;
13740 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13742 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
13744 /* Flags and only flags can only hold CCmode values. */
13745 if (CC_REGNO_P (regno))
13746 return GET_MODE_CLASS (mode) == MODE_CC;
13747 if (GET_MODE_CLASS (mode) == MODE_CC
13748 || GET_MODE_CLASS (mode) == MODE_RANDOM
13749 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13750 return 0;
13751 if (FP_REGNO_P (regno))
13752 return VALID_FP_MODE_P (mode);
13753 if (SSE_REGNO_P (regno))
13754 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
13755 if (MMX_REGNO_P (regno))
13756 return (TARGET_MMX
13757 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
13758 /* We handle both integer and floats in the general purpose registers.
13759 In future we should be able to handle vector modes as well. */
13760 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13761 return 0;
13762 /* Take care for QImode values - they can be in non-QI regs, but then
13763 they do cause partial register stalls. */
13764 if (regno < 4 || mode != QImode || TARGET_64BIT)
13765 return 1;
13766 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13769 /* Return the cost of moving data of mode M between a
13770 register and memory. A value of 2 is the default; this cost is
13771 relative to those in `REGISTER_MOVE_COST'.
13773 If moving between registers and memory is more expensive than
13774 between two registers, you should define this macro to express the
13775 relative cost.
13777 Model also increased moving costs of QImode registers in non
13778 Q_REGS classes.
13781 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
13783 if (FLOAT_CLASS_P (class))
13785 int index;
13786 switch (mode)
13788 case SFmode:
13789 index = 0;
13790 break;
13791 case DFmode:
13792 index = 1;
13793 break;
13794 case XFmode:
13795 index = 2;
13796 break;
13797 default:
13798 return 100;
13800 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13802 if (SSE_CLASS_P (class))
13804 int index;
13805 switch (GET_MODE_SIZE (mode))
13807 case 4:
13808 index = 0;
13809 break;
13810 case 8:
13811 index = 1;
13812 break;
13813 case 16:
13814 index = 2;
13815 break;
13816 default:
13817 return 100;
13819 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13821 if (MMX_CLASS_P (class))
13823 int index;
13824 switch (GET_MODE_SIZE (mode))
13826 case 4:
13827 index = 0;
13828 break;
13829 case 8:
13830 index = 1;
13831 break;
13832 default:
13833 return 100;
13835 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13837 switch (GET_MODE_SIZE (mode))
13839 case 1:
13840 if (in)
13841 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13842 : ix86_cost->movzbl_load);
13843 else
13844 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13845 : ix86_cost->int_store[0] + 4);
13846 break;
13847 case 2:
13848 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13849 default:
13850 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13851 if (mode == TFmode)
13852 mode = XFmode;
13853 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13854 * (((int) GET_MODE_SIZE (mode)
13855 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
13859 /* Compute a (partial) cost for rtx X. Return true if the complete
13860 cost has been computed, and false if subexpressions should be
13861 scanned. In either case, *TOTAL contains the cost result. */
13863 static bool
13864 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
13866 enum machine_mode mode = GET_MODE (x);
13868 switch (code)
13870 case CONST_INT:
13871 case CONST:
13872 case LABEL_REF:
13873 case SYMBOL_REF:
13874 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
13875 *total = 3;
13876 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
13877 *total = 2;
13878 else if (flag_pic && SYMBOLIC_CONST (x)
13879 && (!TARGET_64BIT
13880 || (!GET_CODE (x) != LABEL_REF
13881 && (GET_CODE (x) != SYMBOL_REF
13882 || !SYMBOL_REF_LOCAL_P (x)))))
13883 *total = 1;
13884 else
13885 *total = 0;
13886 return true;
13888 case CONST_DOUBLE:
13889 if (mode == VOIDmode)
13890 *total = 0;
13891 else
13892 switch (standard_80387_constant_p (x))
13894 case 1: /* 0.0 */
13895 *total = 1;
13896 break;
13897 default: /* Other constants */
13898 *total = 2;
13899 break;
13900 case 0:
13901 case -1:
13902 /* Start with (MEM (SYMBOL_REF)), since that's where
13903 it'll probably end up. Add a penalty for size. */
13904 *total = (COSTS_N_INSNS (1)
13905 + (flag_pic != 0 && !TARGET_64BIT)
13906 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
13907 break;
13909 return true;
13911 case ZERO_EXTEND:
13912 /* The zero extensions is often completely free on x86_64, so make
13913 it as cheap as possible. */
13914 if (TARGET_64BIT && mode == DImode
13915 && GET_MODE (XEXP (x, 0)) == SImode)
13916 *total = 1;
13917 else if (TARGET_ZERO_EXTEND_WITH_AND)
13918 *total = COSTS_N_INSNS (ix86_cost->add);
13919 else
13920 *total = COSTS_N_INSNS (ix86_cost->movzx);
13921 return false;
13923 case SIGN_EXTEND:
13924 *total = COSTS_N_INSNS (ix86_cost->movsx);
13925 return false;
13927 case ASHIFT:
13928 if (GET_CODE (XEXP (x, 1)) == CONST_INT
13929 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
13931 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
13932 if (value == 1)
13934 *total = COSTS_N_INSNS (ix86_cost->add);
13935 return false;
13937 if ((value == 2 || value == 3)
13938 && ix86_cost->lea <= ix86_cost->shift_const)
13940 *total = COSTS_N_INSNS (ix86_cost->lea);
13941 return false;
13944 /* FALLTHRU */
13946 case ROTATE:
13947 case ASHIFTRT:
13948 case LSHIFTRT:
13949 case ROTATERT:
13950 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
13952 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13954 if (INTVAL (XEXP (x, 1)) > 32)
13955 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
13956 else
13957 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
13959 else
13961 if (GET_CODE (XEXP (x, 1)) == AND)
13962 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
13963 else
13964 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
13967 else
13969 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13970 *total = COSTS_N_INSNS (ix86_cost->shift_const);
13971 else
13972 *total = COSTS_N_INSNS (ix86_cost->shift_var);
13974 return false;
13976 case MULT:
13977 if (FLOAT_MODE_P (mode))
13979 *total = COSTS_N_INSNS (ix86_cost->fmul);
13980 return false;
13982 else
13984 rtx op0 = XEXP (x, 0);
13985 rtx op1 = XEXP (x, 1);
13986 int nbits;
13987 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
13989 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
13990 for (nbits = 0; value != 0; value &= value - 1)
13991 nbits++;
13993 else
13994 /* This is arbitrary. */
13995 nbits = 7;
13997 /* Compute costs correctly for widening multiplication. */
13998 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
13999 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14000 == GET_MODE_SIZE (mode))
14002 int is_mulwiden = 0;
14003 enum machine_mode inner_mode = GET_MODE (op0);
14005 if (GET_CODE (op0) == GET_CODE (op1))
14006 is_mulwiden = 1, op1 = XEXP (op1, 0);
14007 else if (GET_CODE (op1) == CONST_INT)
14009 if (GET_CODE (op0) == SIGN_EXTEND)
14010 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14011 == INTVAL (op1);
14012 else
14013 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14016 if (is_mulwiden)
14017 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14020 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14021 + nbits * ix86_cost->mult_bit)
14022 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14024 return true;
14027 case DIV:
14028 case UDIV:
14029 case MOD:
14030 case UMOD:
14031 if (FLOAT_MODE_P (mode))
14032 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14033 else
14034 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14035 return false;
14037 case PLUS:
14038 if (FLOAT_MODE_P (mode))
14039 *total = COSTS_N_INSNS (ix86_cost->fadd);
14040 else if (GET_MODE_CLASS (mode) == MODE_INT
14041 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14043 if (GET_CODE (XEXP (x, 0)) == PLUS
14044 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14045 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14046 && CONSTANT_P (XEXP (x, 1)))
14048 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14049 if (val == 2 || val == 4 || val == 8)
14051 *total = COSTS_N_INSNS (ix86_cost->lea);
14052 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14053 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14054 outer_code);
14055 *total += rtx_cost (XEXP (x, 1), outer_code);
14056 return true;
14059 else if (GET_CODE (XEXP (x, 0)) == MULT
14060 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14062 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14063 if (val == 2 || val == 4 || val == 8)
14065 *total = COSTS_N_INSNS (ix86_cost->lea);
14066 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14067 *total += rtx_cost (XEXP (x, 1), outer_code);
14068 return true;
14071 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14073 *total = COSTS_N_INSNS (ix86_cost->lea);
14074 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14075 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14076 *total += rtx_cost (XEXP (x, 1), outer_code);
14077 return true;
14080 /* FALLTHRU */
14082 case MINUS:
14083 if (FLOAT_MODE_P (mode))
14085 *total = COSTS_N_INSNS (ix86_cost->fadd);
14086 return false;
14088 /* FALLTHRU */
14090 case AND:
14091 case IOR:
14092 case XOR:
14093 if (!TARGET_64BIT && mode == DImode)
14095 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14096 + (rtx_cost (XEXP (x, 0), outer_code)
14097 << (GET_MODE (XEXP (x, 0)) != DImode))
14098 + (rtx_cost (XEXP (x, 1), outer_code)
14099 << (GET_MODE (XEXP (x, 1)) != DImode)));
14100 return true;
14102 /* FALLTHRU */
14104 case NEG:
14105 if (FLOAT_MODE_P (mode))
14107 *total = COSTS_N_INSNS (ix86_cost->fchs);
14108 return false;
14110 /* FALLTHRU */
14112 case NOT:
14113 if (!TARGET_64BIT && mode == DImode)
14114 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14115 else
14116 *total = COSTS_N_INSNS (ix86_cost->add);
14117 return false;
14119 case FLOAT_EXTEND:
14120 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14121 *total = 0;
14122 return false;
14124 case ABS:
14125 if (FLOAT_MODE_P (mode))
14126 *total = COSTS_N_INSNS (ix86_cost->fabs);
14127 return false;
14129 case SQRT:
14130 if (FLOAT_MODE_P (mode))
14131 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14132 return false;
14134 case UNSPEC:
14135 if (XINT (x, 1) == UNSPEC_TP)
14136 *total = 0;
14137 return false;
14139 default:
14140 return false;
14144 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14145 static void
14146 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14148 init_section ();
14149 fputs ("\tpushl $", asm_out_file);
14150 assemble_name (asm_out_file, XSTR (symbol, 0));
14151 fputc ('\n', asm_out_file);
14153 #endif
14155 #if TARGET_MACHO
14157 static int current_machopic_label_num;
14159 /* Given a symbol name and its associated stub, write out the
14160 definition of the stub. */
14162 void
14163 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14165 unsigned int length;
14166 char *binder_name, *symbol_name, lazy_ptr_name[32];
14167 int label = ++current_machopic_label_num;
14169 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14170 symb = (*targetm.strip_name_encoding) (symb);
14172 length = strlen (stub);
14173 binder_name = alloca (length + 32);
14174 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14176 length = strlen (symb);
14177 symbol_name = alloca (length + 32);
14178 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14180 sprintf (lazy_ptr_name, "L%d$lz", label);
14182 if (MACHOPIC_PURE)
14183 machopic_picsymbol_stub_section ();
14184 else
14185 machopic_symbol_stub_section ();
14187 fprintf (file, "%s:\n", stub);
14188 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14190 if (MACHOPIC_PURE)
14192 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14193 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14194 fprintf (file, "\tjmp %%edx\n");
14196 else
14197 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14199 fprintf (file, "%s:\n", binder_name);
14201 if (MACHOPIC_PURE)
14203 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14204 fprintf (file, "\tpushl %%eax\n");
14206 else
14207 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14209 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14211 machopic_lazy_symbol_ptr_section ();
14212 fprintf (file, "%s:\n", lazy_ptr_name);
14213 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14214 fprintf (file, "\t.long %s\n", binder_name);
14216 #endif /* TARGET_MACHO */
14218 /* Order the registers for register allocator. */
14220 void
14221 x86_order_regs_for_local_alloc (void)
14223 int pos = 0;
14224 int i;
14226 /* First allocate the local general purpose registers. */
14227 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14228 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14229 reg_alloc_order [pos++] = i;
14231 /* Global general purpose registers. */
14232 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14233 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14234 reg_alloc_order [pos++] = i;
14236 /* x87 registers come first in case we are doing FP math
14237 using them. */
14238 if (!TARGET_SSE_MATH)
14239 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14240 reg_alloc_order [pos++] = i;
14242 /* SSE registers. */
14243 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14244 reg_alloc_order [pos++] = i;
14245 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14246 reg_alloc_order [pos++] = i;
14248 /* x87 registers. */
14249 if (TARGET_SSE_MATH)
14250 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14251 reg_alloc_order [pos++] = i;
14253 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14254 reg_alloc_order [pos++] = i;
14256 /* Initialize the rest of array as we do not allocate some registers
14257 at all. */
14258 while (pos < FIRST_PSEUDO_REGISTER)
14259 reg_alloc_order [pos++] = 0;
14262 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14263 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14264 #endif
14266 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14267 struct attribute_spec.handler. */
14268 static tree
14269 ix86_handle_struct_attribute (tree *node, tree name,
14270 tree args ATTRIBUTE_UNUSED,
14271 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14273 tree *type = NULL;
14274 if (DECL_P (*node))
14276 if (TREE_CODE (*node) == TYPE_DECL)
14277 type = &TREE_TYPE (*node);
14279 else
14280 type = node;
14282 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14283 || TREE_CODE (*type) == UNION_TYPE)))
14285 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14286 *no_add_attrs = true;
14289 else if ((is_attribute_p ("ms_struct", name)
14290 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14291 || ((is_attribute_p ("gcc_struct", name)
14292 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14294 warning ("`%s' incompatible attribute ignored",
14295 IDENTIFIER_POINTER (name));
14296 *no_add_attrs = true;
14299 return NULL_TREE;
14302 static bool
14303 ix86_ms_bitfield_layout_p (tree record_type)
14305 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14306 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14307 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14310 /* Returns an expression indicating where the this parameter is
14311 located on entry to the FUNCTION. */
14313 static rtx
14314 x86_this_parameter (tree function)
14316 tree type = TREE_TYPE (function);
14318 if (TARGET_64BIT)
14320 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14321 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14324 if (ix86_function_regparm (type, function) > 0)
14326 tree parm;
14328 parm = TYPE_ARG_TYPES (type);
14329 /* Figure out whether or not the function has a variable number of
14330 arguments. */
14331 for (; parm; parm = TREE_CHAIN (parm))
14332 if (TREE_VALUE (parm) == void_type_node)
14333 break;
14334 /* If not, the this parameter is in the first argument. */
14335 if (parm)
14337 int regno = 0;
14338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14339 regno = 2;
14340 return gen_rtx_REG (SImode, regno);
14344 if (aggregate_value_p (TREE_TYPE (type), type))
14345 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14346 else
14347 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14350 /* Determine whether x86_output_mi_thunk can succeed. */
14352 static bool
14353 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14354 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14355 HOST_WIDE_INT vcall_offset, tree function)
14357 /* 64-bit can handle anything. */
14358 if (TARGET_64BIT)
14359 return true;
14361 /* For 32-bit, everything's fine if we have one free register. */
14362 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14363 return true;
14365 /* Need a free register for vcall_offset. */
14366 if (vcall_offset)
14367 return false;
14369 /* Need a free register for GOT references. */
14370 if (flag_pic && !(*targetm.binds_local_p) (function))
14371 return false;
14373 /* Otherwise ok. */
14374 return true;
14377 /* Output the assembler code for a thunk function. THUNK_DECL is the
14378 declaration for the thunk function itself, FUNCTION is the decl for
14379 the target function. DELTA is an immediate constant offset to be
14380 added to THIS. If VCALL_OFFSET is nonzero, the word at
14381 *(*this + vcall_offset) should be added to THIS. */
14383 static void
14384 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14385 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14386 HOST_WIDE_INT vcall_offset, tree function)
14388 rtx xops[3];
14389 rtx this = x86_this_parameter (function);
14390 rtx this_reg, tmp;
14392 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14393 pull it in now and let DELTA benefit. */
14394 if (REG_P (this))
14395 this_reg = this;
14396 else if (vcall_offset)
14398 /* Put the this parameter into %eax. */
14399 xops[0] = this;
14400 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14401 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14403 else
14404 this_reg = NULL_RTX;
14406 /* Adjust the this parameter by a fixed constant. */
14407 if (delta)
14409 xops[0] = GEN_INT (delta);
14410 xops[1] = this_reg ? this_reg : this;
14411 if (TARGET_64BIT)
14413 if (!x86_64_general_operand (xops[0], DImode))
14415 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14416 xops[1] = tmp;
14417 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14418 xops[0] = tmp;
14419 xops[1] = this;
14421 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14423 else
14424 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14427 /* Adjust the this parameter by a value stored in the vtable. */
14428 if (vcall_offset)
14430 if (TARGET_64BIT)
14431 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14432 else
14434 int tmp_regno = 2 /* ECX */;
14435 if (lookup_attribute ("fastcall",
14436 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14437 tmp_regno = 0 /* EAX */;
14438 tmp = gen_rtx_REG (SImode, tmp_regno);
14441 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14442 xops[1] = tmp;
14443 if (TARGET_64BIT)
14444 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14445 else
14446 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14448 /* Adjust the this parameter. */
14449 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14450 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14452 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14453 xops[0] = GEN_INT (vcall_offset);
14454 xops[1] = tmp2;
14455 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14456 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14458 xops[1] = this_reg;
14459 if (TARGET_64BIT)
14460 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14461 else
14462 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14465 /* If necessary, drop THIS back to its stack slot. */
14466 if (this_reg && this_reg != this)
14468 xops[0] = this_reg;
14469 xops[1] = this;
14470 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14473 xops[0] = XEXP (DECL_RTL (function), 0);
14474 if (TARGET_64BIT)
14476 if (!flag_pic || (*targetm.binds_local_p) (function))
14477 output_asm_insn ("jmp\t%P0", xops);
14478 else
14480 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14481 tmp = gen_rtx_CONST (Pmode, tmp);
14482 tmp = gen_rtx_MEM (QImode, tmp);
14483 xops[0] = tmp;
14484 output_asm_insn ("jmp\t%A0", xops);
14487 else
14489 if (!flag_pic || (*targetm.binds_local_p) (function))
14490 output_asm_insn ("jmp\t%P0", xops);
14491 else
14492 #if TARGET_MACHO
14493 if (TARGET_MACHO)
14495 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14496 tmp = (gen_rtx_SYMBOL_REF
14497 (Pmode,
14498 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14499 tmp = gen_rtx_MEM (QImode, tmp);
14500 xops[0] = tmp;
14501 output_asm_insn ("jmp\t%0", xops);
14503 else
14504 #endif /* TARGET_MACHO */
14506 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14507 output_set_got (tmp);
14509 xops[1] = tmp;
14510 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14511 output_asm_insn ("jmp\t{*}%1", xops);
14516 static void
14517 x86_file_start (void)
14519 default_file_start ();
14520 if (X86_FILE_START_VERSION_DIRECTIVE)
14521 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14522 if (X86_FILE_START_FLTUSED)
14523 fputs ("\t.global\t__fltused\n", asm_out_file);
14524 if (ix86_asm_dialect == ASM_INTEL)
14525 fputs ("\t.intel_syntax\n", asm_out_file);
14529 x86_field_alignment (tree field, int computed)
14531 enum machine_mode mode;
14532 tree type = TREE_TYPE (field);
14534 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14535 return computed;
14536 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14537 ? get_inner_array_type (type) : type);
14538 if (mode == DFmode || mode == DCmode
14539 || GET_MODE_CLASS (mode) == MODE_INT
14540 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14541 return MIN (32, computed);
14542 return computed;
14545 /* Output assembler code to FILE to increment profiler label # LABELNO
14546 for profiling a function entry. */
14547 void
14548 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14550 if (TARGET_64BIT)
14551 if (flag_pic)
14553 #ifndef NO_PROFILE_COUNTERS
14554 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14555 #endif
14556 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14558 else
14560 #ifndef NO_PROFILE_COUNTERS
14561 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14562 #endif
14563 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14565 else if (flag_pic)
14567 #ifndef NO_PROFILE_COUNTERS
14568 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14569 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14570 #endif
14571 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14573 else
14575 #ifndef NO_PROFILE_COUNTERS
14576 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14577 PROFILE_COUNT_REGISTER);
14578 #endif
14579 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14583 /* We don't have exact information about the insn sizes, but we may assume
14584 quite safely that we are informed about all 1 byte insns and memory
14585 address sizes. This is enough to eliminate unnecessary padding in
14586 99% of cases. */
14588 static int
14589 min_insn_size (rtx insn)
14591 int l = 0;
14593 if (!INSN_P (insn) || !active_insn_p (insn))
14594 return 0;
14596 /* Discard alignments we've emit and jump instructions. */
14597 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14598 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14599 return 0;
14600 if (GET_CODE (insn) == JUMP_INSN
14601 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14602 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14603 return 0;
14605 /* Important case - calls are always 5 bytes.
14606 It is common to have many calls in the row. */
14607 if (GET_CODE (insn) == CALL_INSN
14608 && symbolic_reference_mentioned_p (PATTERN (insn))
14609 && !SIBLING_CALL_P (insn))
14610 return 5;
14611 if (get_attr_length (insn) <= 1)
14612 return 1;
14614 /* For normal instructions we may rely on the sizes of addresses
14615 and the presence of symbol to require 4 bytes of encoding.
14616 This is not the case for jumps where references are PC relative. */
14617 if (GET_CODE (insn) != JUMP_INSN)
14619 l = get_attr_length_address (insn);
14620 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14621 l = 4;
14623 if (l)
14624 return 1+l;
14625 else
14626 return 2;
14629 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14630 window. */
14632 static void
14633 ix86_avoid_jump_misspredicts (void)
14635 rtx insn, start = get_insns ();
14636 int nbytes = 0, njumps = 0;
14637 int isjump = 0;
14639 /* Look for all minimal intervals of instructions containing 4 jumps.
14640 The intervals are bounded by START and INSN. NBYTES is the total
14641 size of instructions in the interval including INSN and not including
14642 START. When the NBYTES is smaller than 16 bytes, it is possible
14643 that the end of START and INSN ends up in the same 16byte page.
14645 The smallest offset in the page INSN can start is the case where START
14646 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14647 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14649 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14652 nbytes += min_insn_size (insn);
14653 if (dump_file)
14654 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
14655 INSN_UID (insn), min_insn_size (insn));
14656 if ((GET_CODE (insn) == JUMP_INSN
14657 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14658 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14659 || GET_CODE (insn) == CALL_INSN)
14660 njumps++;
14661 else
14662 continue;
14664 while (njumps > 3)
14666 start = NEXT_INSN (start);
14667 if ((GET_CODE (start) == JUMP_INSN
14668 && GET_CODE (PATTERN (start)) != ADDR_VEC
14669 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14670 || GET_CODE (start) == CALL_INSN)
14671 njumps--, isjump = 1;
14672 else
14673 isjump = 0;
14674 nbytes -= min_insn_size (start);
14676 if (njumps < 0)
14677 abort ();
14678 if (dump_file)
14679 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
14680 INSN_UID (start), INSN_UID (insn), nbytes);
14682 if (njumps == 3 && isjump && nbytes < 16)
14684 int padsize = 15 - nbytes + min_insn_size (insn);
14686 if (dump_file)
14687 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14688 INSN_UID (insn), padsize);
14689 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14694 /* AMD Athlon works faster
14695 when RET is not destination of conditional jump or directly preceded
14696 by other jump instruction. We avoid the penalty by inserting NOP just
14697 before the RET instructions in such cases. */
14698 static void
14699 ix86_pad_returns (void)
14701 edge e;
14703 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14705 basic_block bb = e->src;
14706 rtx ret = BB_END (bb);
14707 rtx prev;
14708 bool replace = false;
14710 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
14711 || !maybe_hot_bb_p (bb))
14712 continue;
14713 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14714 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14715 break;
14716 if (prev && GET_CODE (prev) == CODE_LABEL)
14718 edge e;
14719 for (e = bb->pred; e; e = e->pred_next)
14720 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14721 && !(e->flags & EDGE_FALLTHRU))
14722 replace = true;
14724 if (!replace)
14726 prev = prev_active_insn (ret);
14727 if (prev
14728 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
14729 || GET_CODE (prev) == CALL_INSN))
14730 replace = true;
14731 /* Empty functions get branch mispredict even when the jump destination
14732 is not visible to us. */
14733 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14734 replace = true;
14736 if (replace)
14738 emit_insn_before (gen_return_internal_long (), ret);
14739 delete_insn (ret);
14744 /* Implement machine specific optimizations. We implement padding of returns
14745 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14746 static void
14747 ix86_reorg (void)
14749 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
14750 ix86_pad_returns ();
14751 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
14752 ix86_avoid_jump_misspredicts ();
14755 /* Return nonzero when QImode register that must be represented via REX prefix
14756 is used. */
14757 bool
14758 x86_extended_QIreg_mentioned_p (rtx insn)
14760 int i;
14761 extract_insn_cached (insn);
14762 for (i = 0; i < recog_data.n_operands; i++)
14763 if (REG_P (recog_data.operand[i])
14764 && REGNO (recog_data.operand[i]) >= 4)
14765 return true;
14766 return false;
14769 /* Return nonzero when P points to register encoded via REX prefix.
14770 Called via for_each_rtx. */
14771 static int
14772 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
14774 unsigned int regno;
14775 if (!REG_P (*p))
14776 return 0;
14777 regno = REGNO (*p);
14778 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14781 /* Return true when INSN mentions register that must be encoded using REX
14782 prefix. */
14783 bool
14784 x86_extended_reg_mentioned_p (rtx insn)
14786 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14789 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
14790 optabs would emit if we didn't have TFmode patterns. */
14792 void
14793 x86_emit_floatuns (rtx operands[2])
14795 rtx neglab, donelab, i0, i1, f0, in, out;
14796 enum machine_mode mode, inmode;
14798 inmode = GET_MODE (operands[1]);
14799 if (inmode != SImode
14800 && inmode != DImode)
14801 abort ();
14803 out = operands[0];
14804 in = force_reg (inmode, operands[1]);
14805 mode = GET_MODE (out);
14806 neglab = gen_label_rtx ();
14807 donelab = gen_label_rtx ();
14808 i1 = gen_reg_rtx (Pmode);
14809 f0 = gen_reg_rtx (mode);
14811 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
14813 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
14814 emit_jump_insn (gen_jump (donelab));
14815 emit_barrier ();
14817 emit_label (neglab);
14819 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
14820 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
14821 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
14822 expand_float (f0, i0, 0);
14823 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
14825 emit_label (donelab);
14828 /* Initialize vector TARGET via VALS. */
14829 void
14830 ix86_expand_vector_init (rtx target, rtx vals)
14832 enum machine_mode mode = GET_MODE (target);
14833 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
14834 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
14835 int i;
14837 for (i = n_elts - 1; i >= 0; i--)
14838 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
14839 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
14840 break;
14842 /* Few special cases first...
14843 ... constants are best loaded from constant pool. */
14844 if (i < 0)
14846 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
14847 return;
14850 /* ... values where only first field is non-constant are best loaded
14851 from the pool and overwritten via move later. */
14852 if (!i)
14854 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
14855 GET_MODE_INNER (mode), 0);
14857 op = force_reg (mode, op);
14858 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
14859 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
14860 switch (GET_MODE (target))
14862 case V2DFmode:
14863 emit_insn (gen_sse2_movsd (target, target, op));
14864 break;
14865 case V4SFmode:
14866 emit_insn (gen_sse_movss (target, target, op));
14867 break;
14868 default:
14869 break;
14871 return;
14874 /* And the busy sequence doing rotations. */
14875 switch (GET_MODE (target))
14877 case V2DFmode:
14879 rtx vecop0 =
14880 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
14881 rtx vecop1 =
14882 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
14884 vecop0 = force_reg (V2DFmode, vecop0);
14885 vecop1 = force_reg (V2DFmode, vecop1);
14886 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
14888 break;
14889 case V4SFmode:
14891 rtx vecop0 =
14892 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
14893 rtx vecop1 =
14894 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
14895 rtx vecop2 =
14896 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
14897 rtx vecop3 =
14898 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
14899 rtx tmp1 = gen_reg_rtx (V4SFmode);
14900 rtx tmp2 = gen_reg_rtx (V4SFmode);
14902 vecop0 = force_reg (V4SFmode, vecop0);
14903 vecop1 = force_reg (V4SFmode, vecop1);
14904 vecop2 = force_reg (V4SFmode, vecop2);
14905 vecop3 = force_reg (V4SFmode, vecop3);
14906 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
14907 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
14908 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
14910 break;
14911 default:
14912 abort ();
14916 /* Worker function for TARGET_MD_ASM_CLOBBERS.
14918 We do this in the new i386 backend to maintain source compatibility
14919 with the old cc0-based compiler. */
14921 static tree
14922 ix86_md_asm_clobbers (tree clobbers)
14924 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
14925 clobbers);
14926 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
14927 clobbers);
14928 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
14929 clobbers);
14930 return clobbers;
14933 /* Worker function for REVERSE_CONDITION. */
14935 enum rtx_code
14936 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
14938 return (mode != CCFPmode && mode != CCFPUmode
14939 ? reverse_condition (code)
14940 : reverse_condition_maybe_unordered (code));
14943 /* Output code to perform an x87 FP register move, from OPERANDS[1]
14944 to OPERANDS[0]. */
14946 const char *
14947 output_387_reg_move (rtx insn, rtx *operands)
14949 if (REG_P (operands[1])
14950 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14952 if (REGNO (operands[0]) == FIRST_STACK_REG
14953 && TARGET_USE_FFREEP)
14954 return "ffreep\t%y0";
14955 return "fstp\t%y0";
14957 if (STACK_TOP_P (operands[0]))
14958 return "fld%z1\t%y1";
14959 return "fst\t%y0";
14962 /* Output code to perform a conditional jump to LABEL, if C2 flag in
14963 FP status register is set. */
14965 void
14966 ix86_emit_fp_unordered_jump (rtx label)
14968 rtx reg = gen_reg_rtx (HImode);
14969 rtx temp;
14971 emit_insn (gen_x86_fnstsw_1 (reg));
14973 if (TARGET_USE_SAHF)
14975 emit_insn (gen_x86_sahf_1 (reg));
14977 temp = gen_rtx_REG (CCmode, FLAGS_REG);
14978 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
14980 else
14982 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
14984 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14985 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
14988 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
14989 gen_rtx_LABEL_REF (VOIDmode, label),
14990 pc_rtx);
14991 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
14992 emit_jump_insn (temp);
14995 /* Output code to perform a log1p XFmode calculation. */
14997 void ix86_emit_i387_log1p (rtx op0, rtx op1)
14999 rtx label1 = gen_label_rtx ();
15000 rtx label2 = gen_label_rtx ();
15002 rtx tmp = gen_reg_rtx (XFmode);
15003 rtx tmp2 = gen_reg_rtx (XFmode);
15005 emit_insn (gen_absxf2 (tmp, op1));
15006 emit_insn (gen_cmpxf (tmp,
15007 CONST_DOUBLE_FROM_REAL_VALUE (
15008 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15009 XFmode)));
15010 emit_jump_insn (gen_bge (label1));
15012 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15013 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15014 emit_jump (label2);
15016 emit_label (label1);
15017 emit_move_insn (tmp, CONST1_RTX (XFmode));
15018 emit_insn (gen_addxf3 (tmp, op1, tmp));
15019 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15020 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15022 emit_label (label2);
15025 #include "gt-i386.h"